]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge tag 'v4.20-rc6' into rdma.git for-next
authorJason Gunthorpe <jgg@mellanox.com>
Tue, 11 Dec 2018 21:24:57 +0000 (14:24 -0700)
committerJason Gunthorpe <jgg@mellanox.com>
Tue, 11 Dec 2018 21:24:57 +0000 (14:24 -0700)
For dependencies in following patches.

153 files changed:
drivers/infiniband/core/cma_configfs.c
drivers/infiniband/core/cma_priv.h
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
drivers/infiniband/core/fmr_pool.c
drivers/infiniband/core/nldev.c
drivers/infiniband/core/rdma_core.c
drivers/infiniband/core/rdma_core.h
drivers/infiniband/core/restrack.c
drivers/infiniband/core/umem_odp.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_ioctl.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/uverbs_std_types.c
drivers/infiniband/core/uverbs_std_types_counters.c
drivers/infiniband/core/uverbs_std_types_cq.c
drivers/infiniband/core/uverbs_std_types_dm.c
drivers/infiniband/core/uverbs_std_types_flow_action.c
drivers/infiniband/core/uverbs_std_types_mr.c
drivers/infiniband/core/uverbs_uapi.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb3/cxio_hal.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/hfi1/Makefile
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip_registers.h
drivers/infiniband/hw/hfi1/common.h
drivers/infiniband/hw/hfi1/debugfs.c
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/pio.c
drivers/infiniband/hw/hfi1/pio.h
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/sdma.c
drivers/infiniband/hw/hfi1/sdma.h
drivers/infiniband/hw/hfi1/tid_rdma.c [new file with mode: 0644]
drivers/infiniband/hw/hfi1/tid_rdma.h [new file with mode: 0644]
drivers/infiniband/hw/hfi1/uc.c
drivers/infiniband/hw/hfi1/ud.c
drivers/infiniband/hw/hfi1/user_sdma.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hfi1/verbs.h
drivers/infiniband/hw/hfi1/vnic_main.c
drivers/infiniband/hw/hfi1/vnic_sdma.c
drivers/infiniband/hw/hns/Makefile
drivers/infiniband/hw/hns/hns_roce_alloc.c
drivers/infiniband/hw/hns/hns_roce_cmd.h
drivers/infiniband/hw/hns/hns_roce_common.h
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hem.c
drivers/infiniband/hw/hns/hns_roce_hem.h
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/hns/hns_roce_mr.c
drivers/infiniband/hw/hns/hns_roce_qp.c
drivers/infiniband/hw/hns/hns_roce_srq.c [new file with mode: 0644]
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/sysfs.c
drivers/infiniband/hw/mlx5/Makefile
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/devx.c
drivers/infiniband/hw/mlx5/flow.c
drivers/infiniband/hw/mlx5/ib_rep.c
drivers/infiniband/hw/mlx5/mad.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/mlx5/srq.h [new file with mode: 0644]
drivers/infiniband/hw/mlx5/srq_cmd.c [moved from drivers/net/ethernet/mellanox/mlx5/core/srq.c with 71% similarity]
drivers/infiniband/hw/qib/qib_iba6120.c
drivers/infiniband/hw/qib/qib_iba7220.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_init.c
drivers/infiniband/hw/qib/qib_pcie.c
drivers/infiniband/hw/qib/qib_sdma.c
drivers/infiniband/hw/qib/qib_ud.c
drivers/infiniband/hw/qib/qib_user_sdma.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/sw/rxe/rxe.h
drivers/infiniband/sw/rxe/rxe_comp.c
drivers/infiniband/sw/rxe/rxe_hw_counters.c
drivers/infiniband/sw/rxe/rxe_hw_counters.h
drivers/infiniband/sw/rxe/rxe_loc.h
drivers/infiniband/sw/rxe/rxe_net.c
drivers/infiniband/sw/rxe/rxe_pool.c
drivers/infiniband/sw/rxe/rxe_pool.h
drivers/infiniband/sw/rxe/rxe_qp.c
drivers/infiniband/sw/rxe/rxe_req.c
drivers/infiniband/sw/rxe/rxe_resp.c
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/infiniband/sw/rxe/rxe_verbs.h
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/cq.c
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
drivers/net/ethernet/mellanox/mlx5/core/dev.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/events.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
drivers/net/ethernet/mellanox/mlx5/core/port.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/transobj.c
include/linux/mlx5/cq.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/eq.h [new file with mode: 0644]
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/port.h
include/linux/mlx5/qp.h
include/linux/mlx5/srq.h [deleted file]
include/linux/mlx5/transobj.h
include/rdma/ib_fmr_pool.h
include/rdma/ib_mad.h
include/rdma/ib_verbs.h
include/rdma/rdma_vt.h
include/rdma/restrack.h
include/rdma/uverbs_ioctl.h
include/rdma/uverbs_named_ioctl.h
include/rdma/uverbs_std_types.h
include/uapi/rdma/hfi/hfi1_user.h
include/uapi/rdma/hns-abi.h
include/uapi/rdma/ib_user_verbs.h
include/uapi/rdma/mlx5-abi.h
include/uapi/rdma/mlx5_user_ioctl_cmds.h

index 8c2dfb3e294ec9043b89685adbac24af1c44db8d..3ec2c415bb706f947a0a59ea1c05b8e2b6207384 100644 (file)
 #include <linux/module.h>
 #include <linux/configfs.h>
 #include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+
 #include "core_priv.h"
+#include "cma_priv.h"
 
 struct cma_device;
 
index 194cfe78c4475d7d3a3c18a08cdeec2cf15bb66a..cf47c69436a76b5ecdf88f3c06400f0e1ed85999 100644 (file)
@@ -94,4 +94,32 @@ struct rdma_id_private {
         */
        struct rdma_restrack_entry     res;
 };
+
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+       return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+
+void cma_ref_dev(struct cma_device *dev);
+void cma_deref_dev(struct cma_device *dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+                                            void *cookie);
+int cma_get_default_gid_type(struct cma_device *dev, unsigned int port);
+int cma_set_default_gid_type(struct cma_device *dev, unsigned int port,
+                            enum ib_gid_type default_gid_type);
+int cma_get_default_roce_tos(struct cma_device *dev, unsigned int port);
+int cma_set_default_roce_tos(struct cma_device *dev, unsigned int port,
+                            u8 default_roce_tos);
+struct ib_device *cma_get_ib_dev(struct cma_device *dev);
+
 #endif /* _CMA_PRIV_H */
index bb9007a0cca754ee53b21da0fa747862f747a7ad..cc7535c5e19233b8e9c9128638f4f6dea667bc5d 100644 (file)
@@ -54,35 +54,6 @@ struct pkey_index_qp_list {
        struct list_head    qp_list;
 };
 
-#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
-int cma_configfs_init(void);
-void cma_configfs_exit(void);
-#else
-static inline int cma_configfs_init(void)
-{
-       return 0;
-}
-
-static inline void cma_configfs_exit(void)
-{
-}
-#endif
-struct cma_device;
-void cma_ref_dev(struct cma_device *cma_dev);
-void cma_deref_dev(struct cma_device *cma_dev);
-typedef bool (*cma_device_filter)(struct ib_device *, void *);
-struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
-                                            void               *cookie);
-int cma_get_default_gid_type(struct cma_device *cma_dev,
-                            unsigned int port);
-int cma_set_default_gid_type(struct cma_device *cma_dev,
-                            unsigned int port,
-                            enum ib_gid_type default_gid_type);
-int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port);
-int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port,
-                            u8 default_roce_tos);
-struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
-
 int  ib_device_register_sysfs(struct ib_device *device,
                              int (*port_callback)(struct ib_device *,
                                                   u8, struct kobject *));
@@ -296,6 +267,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
 #endif
 
 struct ib_device *ib_device_get_by_index(u32 ifindex);
+void ib_device_put(struct ib_device *device);
 /* RDMA device netlink */
 void nldev_init(void);
 void nldev_exit(void);
index 87eb4f2cdd7d4d94abe01ed4b7a7f9ff0b9ae771..348a7fb1f945f59b5e405af370c31e1d96b611ae 100644 (file)
@@ -145,7 +145,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index)
 }
 
 /*
- * Caller is responsible to return refrerence count by calling put_device()
+ * Caller must perform ib_device_put() to return the device reference count
+ * when ib_device_get_by_index() returns valid device pointer.
  */
 struct ib_device *ib_device_get_by_index(u32 index)
 {
@@ -153,13 +154,21 @@ struct ib_device *ib_device_get_by_index(u32 index)
 
        down_read(&lists_rwsem);
        device = __ib_device_get_by_index(index);
-       if (device)
-               get_device(&device->dev);
-
+       if (device) {
+               /* Do not return a device if unregistration has started. */
+               if (!refcount_inc_not_zero(&device->refcount))
+                       device = NULL;
+       }
        up_read(&lists_rwsem);
        return device;
 }
 
+void ib_device_put(struct ib_device *device)
+{
+       if (refcount_dec_and_test(&device->refcount))
+               complete(&device->unreg_completion);
+}
+
 static struct ib_device *__ib_device_get_by_name(const char *name)
 {
        struct ib_device *device;
@@ -293,6 +302,8 @@ struct ib_device *ib_alloc_device(size_t size)
        rwlock_init(&device->client_data_lock);
        INIT_LIST_HEAD(&device->client_data_list);
        INIT_LIST_HEAD(&device->port_list);
+       refcount_set(&device->refcount, 1);
+       init_completion(&device->unreg_completion);
 
        return device;
 }
@@ -641,6 +652,13 @@ void ib_unregister_device(struct ib_device *device)
        struct ib_client_data *context, *tmp;
        unsigned long flags;
 
+       /*
+        * Wait for all netlink command callers to finish working on the
+        * device.
+        */
+       ib_device_put(device);
+       wait_for_completion(&device->unreg_completion);
+
        mutex_lock(&device_mutex);
 
        down_write(&lists_rwsem);
@@ -1024,6 +1042,9 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
 int ib_query_pkey(struct ib_device *device,
                  u8 port_num, u16 index, u16 *pkey)
 {
+       if (!rdma_is_port_valid(device, port_num))
+               return -EINVAL;
+
        return device->query_pkey(device, port_num, index, pkey);
 }
 EXPORT_SYMBOL(ib_query_pkey);
index 83ba0068e8bb97621e76736cb56c3da75065efe1..b00dfd2ad31e4c45206e7cafc4143554a0ff58c4 100644 (file)
@@ -474,7 +474,7 @@ EXPORT_SYMBOL(ib_fmr_pool_map_phys);
  * Unmap an FMR.  The FMR mapping may remain valid until the FMR is
  * reused (or until ib_flush_fmr_pool() is called).
  */
-int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
+void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
 {
        struct ib_fmr_pool *pool;
        unsigned long flags;
@@ -503,7 +503,5 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
 #endif
 
        spin_unlock_irqrestore(&pool->pool_lock, flags);
-
-       return 0;
 }
 EXPORT_SYMBOL(ib_fmr_pool_unmap);
index 573399e3ccc13f0d4c89d12518e23b070e7e4f4f..9abbadb9e3662581385acb1157c2a52fafb99c25 100644 (file)
@@ -308,6 +308,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
                [RDMA_RESTRACK_QP] = "qp",
                [RDMA_RESTRACK_CM_ID] = "cm_id",
                [RDMA_RESTRACK_MR] = "mr",
+               [RDMA_RESTRACK_CTX] = "ctx",
        };
 
        struct rdma_restrack_root *res = &device->res;
@@ -636,13 +637,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        nlmsg_end(msg, nlh);
 
-       put_device(&device->dev);
+       ib_device_put(device);
        return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
 
 err_free:
        nlmsg_free(msg);
 err:
-       put_device(&device->dev);
+       ib_device_put(device);
        return err;
 }
 
@@ -672,7 +673,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
                err = ib_device_rename(device, name);
        }
 
-       put_device(&device->dev);
+       ib_device_put(device);
        return err;
 }
 
@@ -756,14 +757,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
                goto err_free;
 
        nlmsg_end(msg, nlh);
-       put_device(&device->dev);
+       ib_device_put(device);
 
        return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
 
 err_free:
        nlmsg_free(msg);
 err:
-       put_device(&device->dev);
+       ib_device_put(device);
        return err;
 }
 
@@ -820,7 +821,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
        }
 
 out:
-       put_device(&device->dev);
+       ib_device_put(device);
        cb->args[0] = idx;
        return skb->len;
 }
@@ -859,13 +860,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
                goto err_free;
 
        nlmsg_end(msg, nlh);
-       put_device(&device->dev);
+       ib_device_put(device);
        return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
 
 err_free:
        nlmsg_free(msg);
 err:
-       put_device(&device->dev);
+       ib_device_put(device);
        return ret;
 }
 
@@ -1058,7 +1059,7 @@ next:             idx++;
        if (!filled)
                goto err;
 
-       put_device(&device->dev);
+       ib_device_put(device);
        return skb->len;
 
 res_err:
@@ -1069,7 +1070,7 @@ next:             idx++;
        nlmsg_cancel(skb, nlh);
 
 err_index:
-       put_device(&device->dev);
+       ib_device_put(device);
        return ret;
 }
 
index 752a55c6bdce5494919bcfcfff0d967925f22345..7d2f1ef75025fd575b90ed55f7a8f9787c5f2093 100644 (file)
@@ -224,12 +224,14 @@ int uobj_destroy(struct ib_uobject *uobj)
  * uverbs_put_destroy.
  */
 struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
-                                     u32 id, struct ib_uverbs_file *ufile)
+                                     u32 id,
+                                     const struct uverbs_attr_bundle *attrs)
 {
        struct ib_uobject *uobj;
        int ret;
 
-       uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY);
+       uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id,
+                                      UVERBS_LOOKUP_DESTROY);
        if (IS_ERR(uobj))
                return uobj;
 
@@ -243,21 +245,20 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
 }
 
 /*
- * Does both uobj_get_destroy() and uobj_put_destroy().  Returns success_res
- * on success (negative errno on failure). For use by callers that do not need
- * the uobj.
+ * Does both uobj_get_destroy() and uobj_put_destroy().  Returns 0 on success
+ * (negative errno on failure). For use by callers that do not need the uobj.
  */
 int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
-                          struct ib_uverbs_file *ufile, int success_res)
+                          const struct uverbs_attr_bundle *attrs)
 {
        struct ib_uobject *uobj;
 
-       uobj = __uobj_get_destroy(obj, id, ufile);
+       uobj = __uobj_get_destroy(obj, id, attrs);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
        rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
-       return success_res;
+       return 0;
 }
 
 /* alloc_uobj must be undone by uverbs_destroy_uobject() */
@@ -267,7 +268,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
        struct ib_uobject *uobj;
        struct ib_ucontext *ucontext;
 
-       ucontext = ib_uverbs_get_ucontext(ufile);
+       ucontext = ib_uverbs_get_ucontext_file(ufile);
        if (IS_ERR(ucontext))
                return ERR_CAST(ucontext);
 
@@ -397,16 +398,23 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
        struct ib_uobject *uobj;
        int ret;
 
-       if (!obj)
-               return ERR_PTR(-EINVAL);
+       if (IS_ERR(obj) && PTR_ERR(obj) == -ENOMSG) {
+               /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
+               uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
+               if (IS_ERR(uobj))
+                       return uobj;
+       } else {
+               if (IS_ERR(obj))
+                       return ERR_PTR(-EINVAL);
 
-       uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
-       if (IS_ERR(uobj))
-               return uobj;
+               uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
+               if (IS_ERR(uobj))
+                       return uobj;
 
-       if (uobj->uapi_object != obj) {
-               ret = -EINVAL;
-               goto free;
+               if (uobj->uapi_object != obj) {
+                       ret = -EINVAL;
+                       goto free;
+               }
        }
 
        /*
@@ -426,7 +434,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
 
        return uobj;
 free:
-       obj->type_class->lookup_put(uobj, mode);
+       uobj->uapi_object->type_class->lookup_put(uobj, mode);
        uverbs_uobject_put(uobj);
        return ERR_PTR(ret);
 }
@@ -490,7 +498,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
 {
        struct ib_uobject *ret;
 
-       if (!obj)
+       if (IS_ERR(obj))
                return ERR_PTR(-EINVAL);
 
        /*
@@ -819,6 +827,8 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
        ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
                           RDMACG_RESOURCE_HCA_HANDLE);
 
+       rdma_restrack_del(&ucontext->res);
+
        /*
         * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
         * the error return.
index 4886d2bba7c7f44475c412ac45e954560dbe7410..b3ca7457ac42669929bc766f37ad75d0e9b23a5a 100644 (file)
@@ -118,43 +118,67 @@ void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
  * Depending on ID the slot pointer in the radix tree points at one of these
  * structs.
  */
-struct uverbs_api_object {
-       const struct uverbs_obj_type *type_attrs;
-       const struct uverbs_obj_type_class *type_class;
-};
 
 struct uverbs_api_ioctl_method {
-       int (__rcu *handler)(struct ib_uverbs_file *ufile,
-                            struct uverbs_attr_bundle *ctx);
+       int(__rcu *handler)(struct uverbs_attr_bundle *attrs);
        DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
        u16 bundle_size;
        u8 use_stack:1;
        u8 driver_method:1;
+       u8 disabled:1;
+       u8 has_udata:1;
        u8 key_bitmap_len;
        u8 destroy_bkey;
 };
 
+struct uverbs_api_write_method {
+       int (*handler)(struct uverbs_attr_bundle *attrs);
+       u8 disabled:1;
+       u8 is_ex:1;
+       u8 has_udata:1;
+       u8 has_resp:1;
+       u8 req_size;
+       u8 resp_size;
+};
+
 struct uverbs_api_attr {
        struct uverbs_attr_spec spec;
 };
 
-struct uverbs_api_object;
 struct uverbs_api {
        /* radix tree contains struct uverbs_api_* pointers */
        struct radix_tree_root radix;
        enum rdma_driver_id driver_id;
+
+       unsigned int num_write;
+       unsigned int num_write_ex;
+       struct uverbs_api_write_method notsupp_method;
+       const struct uverbs_api_write_method **write_methods;
+       const struct uverbs_api_write_method **write_ex_methods;
 };
 
+/*
+ * Get an uverbs_api_object that corresponds to the given object_id.
+ * Note:
+ * -ENOMSG means that any object is allowed to match during lookup.
+ */
 static inline const struct uverbs_api_object *
 uapi_get_object(struct uverbs_api *uapi, u16 object_id)
 {
-       return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+       const struct uverbs_api_object *res;
+
+       if (object_id == UVERBS_IDR_ANY_OBJECT)
+               return ERR_PTR(-ENOMSG);
+
+       res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+       if (!res)
+               return ERR_PTR(-ENOENT);
+
+       return res;
 }
 
 char *uapi_key_format(char *S, unsigned int key);
-struct uverbs_api *uverbs_alloc_api(
-       const struct uverbs_object_tree_def *const *driver_specs,
-       enum rdma_driver_id driver_id);
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev);
 void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
 void uverbs_disassociate_api(struct uverbs_api *uapi);
 void uverbs_destroy_api(struct uverbs_api *uapi);
@@ -162,4 +186,32 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
                              unsigned int num_attrs);
 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
 
+extern const struct uapi_definition uverbs_def_obj_counters[];
+extern const struct uapi_definition uverbs_def_obj_cq[];
+extern const struct uapi_definition uverbs_def_obj_dm[];
+extern const struct uapi_definition uverbs_def_obj_flow_action[];
+extern const struct uapi_definition uverbs_def_obj_intf[];
+extern const struct uapi_definition uverbs_def_obj_mr[];
+extern const struct uapi_definition uverbs_def_write_intf[];
+
+static inline const struct uverbs_api_write_method *
+uapi_get_method(const struct uverbs_api *uapi, u32 command)
+{
+       u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+       if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
+                            IB_USER_VERBS_CMD_COMMAND_MASK))
+               return ERR_PTR(-EINVAL);
+
+       if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+               if (cmd_idx >= uapi->num_write_ex)
+                       return ERR_PTR(-EOPNOTSUPP);
+               return uapi->write_ex_methods[cmd_idx];
+       }
+
+       if (cmd_idx >= uapi->num_write)
+               return ERR_PTR(-EOPNOTSUPP);
+       return uapi->write_methods[cmd_idx];
+}
+
 #endif /* RDMA_CORE_H */
index 06d8657ce5834747d32490e7daa21505bbde53f4..3dd316159f5ff6a58b695eae2b716221a233a139 100644 (file)
@@ -32,6 +32,7 @@ static const char *type2str(enum rdma_restrack_type type)
                [RDMA_RESTRACK_QP] = "QP",
                [RDMA_RESTRACK_CM_ID] = "CM_ID",
                [RDMA_RESTRACK_MR] = "MR",
+               [RDMA_RESTRACK_CTX] = "CTX",
        };
 
        return names[type];
@@ -130,6 +131,8 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
                                    res)->id.device;
        case RDMA_RESTRACK_MR:
                return container_of(res, struct ib_mr, res)->device;
+       case RDMA_RESTRACK_CTX:
+               return container_of(res, struct ib_ucontext, res)->device;
        default:
                WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
                return NULL;
@@ -149,6 +152,8 @@ static bool res_is_user(struct rdma_restrack_entry *res)
                return !res->kern_name;
        case RDMA_RESTRACK_MR:
                return container_of(res, struct ib_mr, res)->pd->uobject;
+       case RDMA_RESTRACK_CTX:
+               return true;
        default:
                WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
                return false;
index 676c1fd1119d80a17d4542d035a319300332842f..9608681224e668d92ebe1c4b1b2004e61a930160 100644 (file)
@@ -647,8 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
                                flags, local_page_list, NULL, NULL);
                up_read(&owning_mm->mmap_sem);
 
-               if (npages < 0)
+               if (npages < 0) {
+                       if (npages != -EAGAIN)
+                               pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
+                       else
+                               pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
                        break;
+               }
 
                bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
                mutex_lock(&umem_odp->umem_mutex);
@@ -666,8 +671,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
                        ret = ib_umem_odp_map_dma_single_page(
                                        umem_odp, k, local_page_list[j],
                                        access_mask, current_seq);
-                       if (ret < 0)
+                       if (ret < 0) {
+                               if (ret != -EAGAIN)
+                                       pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
+                               else
+                                       pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
                                break;
+                       }
 
                        p = page_to_phys(local_page_list[j]);
                        k++;
index c97935a0c7c6ef2f9beb9c011bdd840dcbdcc27f..8b41c95300c62dff4ef6d873c93b5ad97352988a 100644 (file)
@@ -161,9 +161,6 @@ struct ib_uverbs_file {
        struct mutex umap_lock;
        struct list_head umaps;
 
-       u64 uverbs_cmd_mask;
-       u64 uverbs_ex_cmd_mask;
-
        struct idr              idr;
        /* spinlock protects write access to idr */
        spinlock_t              idr_lock;
@@ -297,63 +294,4 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
 extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
 extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
 
-#define IB_UVERBS_DECLARE_CMD(name)                                    \
-       ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,           \
-                                const char __user *buf, int in_len,    \
-                                int out_len)
-
-IB_UVERBS_DECLARE_CMD(get_context);
-IB_UVERBS_DECLARE_CMD(query_device);
-IB_UVERBS_DECLARE_CMD(query_port);
-IB_UVERBS_DECLARE_CMD(alloc_pd);
-IB_UVERBS_DECLARE_CMD(dealloc_pd);
-IB_UVERBS_DECLARE_CMD(reg_mr);
-IB_UVERBS_DECLARE_CMD(rereg_mr);
-IB_UVERBS_DECLARE_CMD(dereg_mr);
-IB_UVERBS_DECLARE_CMD(alloc_mw);
-IB_UVERBS_DECLARE_CMD(dealloc_mw);
-IB_UVERBS_DECLARE_CMD(create_comp_channel);
-IB_UVERBS_DECLARE_CMD(create_cq);
-IB_UVERBS_DECLARE_CMD(resize_cq);
-IB_UVERBS_DECLARE_CMD(poll_cq);
-IB_UVERBS_DECLARE_CMD(req_notify_cq);
-IB_UVERBS_DECLARE_CMD(destroy_cq);
-IB_UVERBS_DECLARE_CMD(create_qp);
-IB_UVERBS_DECLARE_CMD(open_qp);
-IB_UVERBS_DECLARE_CMD(query_qp);
-IB_UVERBS_DECLARE_CMD(modify_qp);
-IB_UVERBS_DECLARE_CMD(destroy_qp);
-IB_UVERBS_DECLARE_CMD(post_send);
-IB_UVERBS_DECLARE_CMD(post_recv);
-IB_UVERBS_DECLARE_CMD(post_srq_recv);
-IB_UVERBS_DECLARE_CMD(create_ah);
-IB_UVERBS_DECLARE_CMD(destroy_ah);
-IB_UVERBS_DECLARE_CMD(attach_mcast);
-IB_UVERBS_DECLARE_CMD(detach_mcast);
-IB_UVERBS_DECLARE_CMD(create_srq);
-IB_UVERBS_DECLARE_CMD(modify_srq);
-IB_UVERBS_DECLARE_CMD(query_srq);
-IB_UVERBS_DECLARE_CMD(destroy_srq);
-IB_UVERBS_DECLARE_CMD(create_xsrq);
-IB_UVERBS_DECLARE_CMD(open_xrcd);
-IB_UVERBS_DECLARE_CMD(close_xrcd);
-
-#define IB_UVERBS_DECLARE_EX_CMD(name)                         \
-       int ib_uverbs_ex_##name(struct ib_uverbs_file *file,    \
-                               struct ib_udata *ucore,         \
-                               struct ib_udata *uhw)
-
-IB_UVERBS_DECLARE_EX_CMD(create_flow);
-IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
-IB_UVERBS_DECLARE_EX_CMD(query_device);
-IB_UVERBS_DECLARE_EX_CMD(create_cq);
-IB_UVERBS_DECLARE_EX_CMD(create_qp);
-IB_UVERBS_DECLARE_EX_CMD(create_wq);
-IB_UVERBS_DECLARE_EX_CMD(modify_wq);
-IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
-IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(modify_qp);
-IB_UVERBS_DECLARE_EX_CMD(modify_cq);
-
 #endif /* UVERBS_H */
index a93853770e3ccf3c7f4bfc8e08df55d466c62e65..b707495424719fd3a885a53c98b7ae8eb22a18a1 100644 (file)
 #include "uverbs.h"
 #include "core_priv.h"
 
+/*
+ * Copy a response to userspace. If the provided 'resp' is larger than the
+ * user buffer it is silently truncated. If the user provided a larger buffer
+ * then the trailing portion is zero filled.
+ *
+ * These semantics are intended to support future extension of the output
+ * structures.
+ */
+static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp,
+                          size_t resp_len)
+{
+       u8 __user *cur = attrs->ucore.outbuf + resp_len;
+       u8 __user *end = attrs->ucore.outbuf + attrs->ucore.outlen;
+       int ret;
+
+       if (copy_to_user(attrs->ucore.outbuf, resp,
+                        min(attrs->ucore.outlen, resp_len)))
+               return -EFAULT;
+
+       /* Zero fill any extra memory that user space might have provided */
+       for (; cur < end; cur++) {
+               ret = put_user(0, cur);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+/*
+ * Copy a request from userspace. If the provided 'req' is larger than the
+ * user buffer then the user buffer is zero extended into the 'req'. If 'req'
+ * is smaller than the user buffer then the uncopied bytes in the user buffer
+ * must be zero.
+ */
+static int uverbs_request(struct uverbs_attr_bundle *attrs, void *req,
+                         size_t req_len)
+{
+       if (copy_from_user(req, attrs->ucore.inbuf,
+                          min(attrs->ucore.inlen, req_len)))
+               return -EFAULT;
+
+       if (attrs->ucore.inlen < req_len) {
+               memset(req + attrs->ucore.inlen, 0,
+                      req_len - attrs->ucore.inlen);
+       } else if (attrs->ucore.inlen > req_len) {
+               if (!ib_is_buffer_cleared(attrs->ucore.inbuf + req_len,
+                                         attrs->ucore.inlen - req_len))
+                       return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+/*
+ * Generate the value for the 'response_length' protocol used by write_ex.
+ * This is the number of bytes the kernel actually wrote. Userspace can use
+ * this to detect what structure members in the response the kernel
+ * understood.
+ */
+static u32 uverbs_response_length(struct uverbs_attr_bundle *attrs,
+                                 size_t resp_len)
+{
+       return min_t(size_t, attrs->ucore.outlen, resp_len);
+}
+
+/*
+ * The iterator version of the request interface is for handlers that need to
+ * step over a flex array at the end of a command header.
+ */
+struct uverbs_req_iter {
+       const void __user *cur;
+       const void __user *end;
+};
+
+static int uverbs_request_start(struct uverbs_attr_bundle *attrs,
+                               struct uverbs_req_iter *iter,
+                               void *req,
+                               size_t req_len)
+{
+       if (attrs->ucore.inlen < req_len)
+               return -ENOSPC;
+
+       if (copy_from_user(req, attrs->ucore.inbuf, req_len))
+               return -EFAULT;
+
+       iter->cur = attrs->ucore.inbuf + req_len;
+       iter->end = attrs->ucore.inbuf + attrs->ucore.inlen;
+       return 0;
+}
+
+static int uverbs_request_next(struct uverbs_req_iter *iter, void *val,
+                              size_t len)
+{
+       if (iter->cur + len > iter->end)
+               return -ENOSPC;
+
+       if (copy_from_user(val, iter->cur, len))
+               return -EFAULT;
+
+       iter->cur += len;
+       return 0;
+}
+
+static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter,
+                                                 size_t len)
+{
+       const void __user *res = iter->cur;
+
+       if (iter->cur + len > iter->end)
+               return ERR_PTR(-ENOSPC);
+       iter->cur += len;
+       return res;
+}
+
+static int uverbs_request_finish(struct uverbs_req_iter *iter)
+{
+       if (!ib_is_buffer_cleared(iter->cur, iter->end - iter->cur))
+               return -EOPNOTSUPP;
+       return 0;
+}
+
 static struct ib_uverbs_completion_event_file *
-_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
+_ib_uverbs_lookup_comp_file(s32 fd, const struct uverbs_attr_bundle *attrs)
 {
        struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
-                                              fd, ufile);
+                                              fd, attrs);
 
        if (IS_ERR(uobj))
                return (void *)uobj;
@@ -65,24 +186,20 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
 #define ib_uverbs_lookup_comp_file(_fd, _ufile)                                \
        _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
 
-ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
-                             const char __user *buf,
-                             int in_len, int out_len)
+static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 {
+       struct ib_uverbs_file *file = attrs->ufile;
        struct ib_uverbs_get_context      cmd;
        struct ib_uverbs_get_context_resp resp;
-       struct ib_udata                   udata;
        struct ib_ucontext               *ucontext;
        struct file                      *filp;
        struct ib_rdmacg_object          cg_obj;
        struct ib_device *ib_dev;
        int ret;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        mutex_lock(&file->ucontext_lock);
        ib_dev = srcu_dereference(file->device->ib_dev,
@@ -97,16 +214,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
                goto err;
        }
 
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                  in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                  out_len - sizeof(resp));
-
        ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
        if (ret)
                goto err;
 
-       ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
+       ucontext = ib_dev->alloc_ucontext(ib_dev, &attrs->driver_udata);
        if (IS_ERR(ucontext)) {
                ret = PTR_ERR(ucontext);
                goto err_alloc;
@@ -141,13 +253,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
                goto err_fd;
        }
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
-               ret = -EFAULT;
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret)
                goto err_file;
-       }
 
        fd_install(resp.async_fd, filp);
 
+       ucontext->res.type = RDMA_RESTRACK_CTX;
+       rdma_restrack_add(&ucontext->res);
+
        /*
         * Make sure that ib_uverbs_get_ucontext() sees the pointer update
         * only after all writes to setup the ucontext have completed
@@ -156,7 +270,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 
        mutex_unlock(&file->ucontext_lock);
 
-       return in_len;
+       return 0;
 
 err_file:
        ib_uverbs_free_async_event_file(file);
@@ -224,31 +338,25 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
        resp->phys_port_cnt             = ib_dev->phys_port_cnt;
 }
 
-ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
-                              const char __user *buf,
-                              int in_len, int out_len)
+static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_query_device      cmd;
        struct ib_uverbs_query_device_resp resp;
        struct ib_ucontext *ucontext;
+       int ret;
 
-       ucontext = ib_uverbs_get_ucontext(file);
+       ucontext = ib_uverbs_get_ucontext(attrs);
        if (IS_ERR(ucontext))
                return PTR_ERR(ucontext);
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        memset(&resp, 0, sizeof resp);
        copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
-               return -EFAULT;
-
-       return in_len;
+       return uverbs_response(attrs, &resp, sizeof(resp));
 }
 
 /*
@@ -272,9 +380,7 @@ static u32 make_port_cap_flags(const struct ib_port_attr *attr)
        return res;
 }
 
-ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
-                            const char __user *buf,
-                            int in_len, int out_len)
+static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_query_port      cmd;
        struct ib_uverbs_query_port_resp resp;
@@ -283,16 +389,14 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
        struct ib_ucontext *ucontext;
        struct ib_device *ib_dev;
 
-       ucontext = ib_uverbs_get_ucontext(file);
+       ucontext = ib_uverbs_get_ucontext(attrs);
        if (IS_ERR(ucontext))
                return PTR_ERR(ucontext);
        ib_dev = ucontext->device;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        ret = ib_query_port(ib_dev, cmd.port_num, &attr);
        if (ret)
@@ -331,40 +435,27 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
        resp.link_layer      = rdma_port_get_link_layer(ib_dev,
                                                        cmd.port_num);
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
-               return -EFAULT;
-
-       return in_len;
+       return uverbs_response(attrs, &resp, sizeof(resp));
 }
 
-ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
-                          const char __user *buf,
-                          int in_len, int out_len)
+static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_alloc_pd      cmd;
        struct ib_uverbs_alloc_pd_resp resp;
-       struct ib_udata                udata;
        struct ib_uobject             *uobj;
        struct ib_pd                  *pd;
        int                            ret;
        struct ib_device *ib_dev;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                   out_len - sizeof(resp));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev);
+       uobj = uobj_alloc(UVERBS_OBJECT_PD, attrs, &ib_dev);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
-       pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata);
+       pd = ib_dev->alloc_pd(ib_dev, uobj->context, &attrs->driver_udata);
        if (IS_ERR(pd)) {
                ret = PTR_ERR(pd);
                goto err;
@@ -381,12 +472,11 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
        pd->res.type = RDMA_RESTRACK_PD;
        rdma_restrack_add(&pd->res);
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
-               ret = -EFAULT;
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret)
                goto err_copy;
-       }
 
-       return uobj_alloc_commit(uobj, in_len);
+       return uobj_alloc_commit(uobj);
 
 err_copy:
        ib_dealloc_pd(pd);
@@ -396,17 +486,16 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
        return ret;
 }
 
-ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
-                            const char __user *buf,
-                            int in_len, int out_len)
+static int ib_uverbs_dealloc_pd(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_dealloc_pd cmd;
+       int ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file,
-                                   in_len);
+       return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
 }
 
 struct xrcd_table_entry {
@@ -494,13 +583,11 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
        }
 }
 
-ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
-                           const char __user *buf, int in_len,
-                           int out_len)
+static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
 {
+       struct ib_uverbs_device *ibudev = attrs->ufile->device;
        struct ib_uverbs_open_xrcd      cmd;
        struct ib_uverbs_open_xrcd_resp resp;
-       struct ib_udata                 udata;
        struct ib_uxrcd_object         *obj;
        struct ib_xrcd                 *xrcd = NULL;
        struct fd                       f = {NULL, 0};
@@ -509,18 +596,11 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
        int                             new_xrcd = 0;
        struct ib_device *ib_dev;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                   out_len - sizeof(resp));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       mutex_lock(&file->device->xrcd_tree_mutex);
+       mutex_lock(&ibudev->xrcd_tree_mutex);
 
        if (cmd.fd != -1) {
                /* search for file descriptor */
@@ -531,7 +611,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
                }
 
                inode = file_inode(f.file);
-               xrcd = find_xrcd(file->device, inode);
+               xrcd = find_xrcd(ibudev, inode);
                if (!xrcd && !(cmd.oflags & O_CREAT)) {
                        /* no file descriptor. Need CREATE flag */
                        ret = -EAGAIN;
@@ -544,7 +624,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
                }
        }
 
-       obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file,
+       obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, attrs,
                                                   &ib_dev);
        if (IS_ERR(obj)) {
                ret = PTR_ERR(obj);
@@ -552,7 +632,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
        }
 
        if (!xrcd) {
-               xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata);
+               xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context,
+                                         &attrs->driver_udata);
                if (IS_ERR(xrcd)) {
                        ret = PTR_ERR(xrcd);
                        goto err;
@@ -574,29 +655,28 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
        if (inode) {
                if (new_xrcd) {
                        /* create new inode/xrcd table entry */
-                       ret = xrcd_table_insert(file->device, inode, xrcd);
+                       ret = xrcd_table_insert(ibudev, inode, xrcd);
                        if (ret)
                                goto err_dealloc_xrcd;
                }
                atomic_inc(&xrcd->usecnt);
        }
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
-               ret = -EFAULT;
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret)
                goto err_copy;
-       }
 
        if (f.file)
                fdput(f);
 
-       mutex_unlock(&file->device->xrcd_tree_mutex);
+       mutex_unlock(&ibudev->xrcd_tree_mutex);
 
-       return uobj_alloc_commit(&obj->uobject, in_len);
+       return uobj_alloc_commit(&obj->uobject);
 
 err_copy:
        if (inode) {
                if (new_xrcd)
-                       xrcd_table_delete(file->device, inode);
+                       xrcd_table_delete(ibudev, inode);
                atomic_dec(&xrcd->usecnt);
        }
 
@@ -610,22 +690,21 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
        if (f.file)
                fdput(f);
 
-       mutex_unlock(&file->device->xrcd_tree_mutex);
+       mutex_unlock(&ibudev->xrcd_tree_mutex);
 
        return ret;
 }
 
-ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len,
-                            int out_len)
+static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_close_xrcd cmd;
+       int ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file,
-                                   in_len);
+       return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs);
 }
 
 int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
@@ -653,29 +732,19 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
        return ret;
 }
 
-ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
-                        const char __user *buf, int in_len,
-                        int out_len)
+static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_reg_mr      cmd;
        struct ib_uverbs_reg_mr_resp resp;
-       struct ib_udata              udata;
        struct ib_uobject           *uobj;
        struct ib_pd                *pd;
        struct ib_mr                *mr;
        int                          ret;
        struct ib_device *ib_dev;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                   out_len - sizeof(resp));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
                return -EINVAL;
@@ -684,11 +753,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        if (ret)
                return ret;
 
-       uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev);
+       uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
-       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
        if (!pd) {
                ret = -EINVAL;
                goto err_free;
@@ -704,7 +773,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        }
 
        mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
-                                    cmd.access_flags, &udata);
+                                    cmd.access_flags, &attrs->driver_udata);
        if (IS_ERR(mr)) {
                ret = PTR_ERR(mr);
                goto err_put;
@@ -725,14 +794,13 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        resp.rkey      = mr->rkey;
        resp.mr_handle = uobj->id;
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
-               ret = -EFAULT;
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret)
                goto err_copy;
-       }
 
        uobj_put_obj_read(pd);
 
-       return uobj_alloc_commit(uobj, in_len);
+       return uobj_alloc_commit(uobj);
 
 err_copy:
        ib_dereg_mr(mr);
@@ -745,29 +813,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        return ret;
 }
 
-ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
-                          const char __user *buf, int in_len,
-                          int out_len)
+static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_rereg_mr      cmd;
        struct ib_uverbs_rereg_mr_resp resp;
-       struct ib_udata              udata;
        struct ib_pd                *pd = NULL;
        struct ib_mr                *mr;
        struct ib_pd                *old_pd;
        int                          ret;
        struct ib_uobject           *uobj;
 
-       if (out_len < sizeof(resp))
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof(cmd)))
-               return -EFAULT;
-
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                   out_len - sizeof(resp));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
                return -EINVAL;
@@ -777,7 +835,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
             (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
                        return -EINVAL;
 
-       uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file);
+       uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
@@ -796,7 +854,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
 
        if (cmd.flags & IB_MR_REREG_PD) {
                pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
-                                      file);
+                                      attrs);
                if (!pd) {
                        ret = -EINVAL;
                        goto put_uobjs;
@@ -804,9 +862,9 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
        }
 
        old_pd = mr->pd;
-       ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
-                                       cmd.length, cmd.hca_va,
-                                       cmd.access_flags, pd, &udata);
+       ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length,
+                                       cmd.hca_va, cmd.access_flags, pd,
+                                       &attrs->driver_udata);
        if (!ret) {
                if (cmd.flags & IB_MR_REREG_PD) {
                        atomic_inc(&pd->usecnt);
@@ -821,10 +879,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
        resp.lkey      = mr->lkey;
        resp.rkey      = mr->rkey;
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
-               ret = -EFAULT;
-       else
-               ret = in_len;
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
 
 put_uobj_pd:
        if (cmd.flags & IB_MR_REREG_PD)
@@ -836,54 +891,43 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
        return ret;
 }
 
-ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
-                          const char __user *buf, int in_len,
-                          int out_len)
+static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_dereg_mr cmd;
+       int ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file,
-                                   in_len);
+       return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
 }
 
-ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
-                          const char __user *buf, int in_len,
-                          int out_len)
+static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_alloc_mw      cmd;
        struct ib_uverbs_alloc_mw_resp resp;
        struct ib_uobject             *uobj;
        struct ib_pd                  *pd;
        struct ib_mw                  *mw;
-       struct ib_udata                udata;
        int                            ret;
        struct ib_device *ib_dev;
 
-       if (out_len < sizeof(resp))
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof(cmd)))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev);
+       uobj = uobj_alloc(UVERBS_OBJECT_MW, attrs, &ib_dev);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
-       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
        if (!pd) {
                ret = -EINVAL;
                goto err_free;
        }
 
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                  in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                  out_len - sizeof(resp));
-
-       mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
+       mw = pd->device->alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
        if (IS_ERR(mw)) {
                ret = PTR_ERR(mw);
                goto err_put;
@@ -900,13 +944,12 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
        resp.rkey      = mw->rkey;
        resp.mw_handle = uobj->id;
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) {
-               ret = -EFAULT;
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret)
                goto err_copy;
-       }
 
        uobj_put_obj_read(pd);
-       return uobj_alloc_commit(uobj, in_len);
+       return uobj_alloc_commit(uobj);
 
 err_copy:
        uverbs_dealloc_mw(mw);
@@ -917,36 +960,32 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
        return ret;
 }
 
-ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len,
-                            int out_len)
+static int ib_uverbs_dealloc_mw(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_dealloc_mw cmd;
+       int ret;
 
-       if (copy_from_user(&cmd, buf, sizeof(cmd)))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file,
-                                   in_len);
+       return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, attrs);
 }
 
-ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
-                                     const char __user *buf, int in_len,
-                                     int out_len)
+static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_create_comp_channel       cmd;
        struct ib_uverbs_create_comp_channel_resp  resp;
        struct ib_uobject                         *uobj;
        struct ib_uverbs_completion_event_file    *ev_file;
        struct ib_device *ib_dev;
+       int ret;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev);
+       uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, attrs, &ib_dev);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
@@ -956,25 +995,17 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
                               uobj);
        ib_uverbs_init_event_queue(&ev_file->ev_queue);
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret) {
                uobj_alloc_abort(uobj);
-               return -EFAULT;
+               return ret;
        }
 
-       return uobj_alloc_commit(uobj, in_len);
+       return uobj_alloc_commit(uobj);
 }
 
-static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
-                                      struct ib_udata *ucore,
-                                      struct ib_udata *uhw,
-                                      struct ib_uverbs_ex_create_cq *cmd,
-                                      size_t cmd_sz,
-                                      int (*cb)(struct ib_uverbs_file *file,
-                                                struct ib_ucq_object *obj,
-                                                struct ib_uverbs_ex_create_cq_resp *resp,
-                                                struct ib_udata *udata,
-                                                void *context),
-                                      void *context)
+static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
+                                      struct ib_uverbs_ex_create_cq *cmd)
 {
        struct ib_ucq_object           *obj;
        struct ib_uverbs_completion_event_file    *ev_file = NULL;
@@ -984,21 +1015,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
        struct ib_cq_init_attr attr = {};
        struct ib_device *ib_dev;
 
-       if (cmd->comp_vector >= file->device->num_comp_vectors)
+       if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors)
                return ERR_PTR(-EINVAL);
 
-       obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file,
+       obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs,
                                                 &ib_dev);
        if (IS_ERR(obj))
                return obj;
 
-       if (!ib_dev->create_cq) {
-               ret = -EOPNOTSUPP;
-               goto err;
-       }
-
        if (cmd->comp_channel >= 0) {
-               ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
+               ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs);
                if (IS_ERR(ev_file)) {
                        ret = PTR_ERR(ev_file);
                        goto err;
@@ -1013,11 +1039,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 
        attr.cqe = cmd->cqe;
        attr.comp_vector = cmd->comp_vector;
+       attr.flags = cmd->flags;
 
-       if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
-               attr.flags = cmd->flags;
-
-       cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw);
+       cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context,
+                              &attrs->driver_udata);
        if (IS_ERR(cq)) {
                ret = PTR_ERR(cq);
                goto err_file;
@@ -1034,18 +1059,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
        memset(&resp, 0, sizeof resp);
        resp.base.cq_handle = obj->uobject.id;
        resp.base.cqe       = cq->cqe;
-
-       resp.response_length = offsetof(typeof(resp), response_length) +
-               sizeof(resp.response_length);
+       resp.response_length = uverbs_response_length(attrs, sizeof(resp));
 
        cq->res.type = RDMA_RESTRACK_CQ;
        rdma_restrack_add(&cq->res);
 
-       ret = cb(file, obj, &resp, ucore, context);
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
        if (ret)
                goto err_cb;
 
-       ret = uobj_alloc_commit(&obj->uobject, 0);
+       ret = uobj_alloc_commit(&obj->uobject);
        if (ret)
                return ERR_PTR(ret);
        return obj;
@@ -1055,7 +1078,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 
 err_file:
        if (ev_file)
-               ib_uverbs_release_ucq(file, ev_file, obj);
+               ib_uverbs_release_ucq(attrs->ufile, ev_file, obj);
 
 err:
        uobj_alloc_abort(&obj->uobject);
@@ -1063,41 +1086,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
        return ERR_PTR(ret);
 }
 
-static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
-                                 struct ib_ucq_object *obj,
-                                 struct ib_uverbs_ex_create_cq_resp *resp,
-                                 struct ib_udata *ucore, void *context)
-{
-       if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
-               return -EFAULT;
-
-       return 0;
-}
-
-ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
-                           const char __user *buf, int in_len,
-                           int out_len)
+static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_create_cq      cmd;
        struct ib_uverbs_ex_create_cq   cmd_ex;
-       struct ib_uverbs_create_cq_resp resp;
-       struct ib_udata                 ucore;
-       struct ib_udata                 uhw;
        struct ib_ucq_object           *obj;
+       int ret;
 
-       if (out_len < sizeof(resp))
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof(cmd)))
-               return -EFAULT;
-
-       ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response),
-                            sizeof(cmd), sizeof(resp));
-
-       ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                  in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                  out_len - sizeof(resp));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        memset(&cmd_ex, 0, sizeof(cmd_ex));
        cmd_ex.user_handle = cmd.user_handle;
@@ -1105,43 +1103,19 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
        cmd_ex.comp_vector = cmd.comp_vector;
        cmd_ex.comp_channel = cmd.comp_channel;
 
-       obj = create_cq(file, &ucore, &uhw, &cmd_ex,
-                       offsetof(typeof(cmd_ex), comp_channel) +
-                       sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
-                       NULL);
-
-       if (IS_ERR(obj))
-               return PTR_ERR(obj);
-
-       return in_len;
-}
-
-static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
-                                    struct ib_ucq_object *obj,
-                                    struct ib_uverbs_ex_create_cq_resp *resp,
-                                    struct ib_udata *ucore, void *context)
-{
-       if (ib_copy_to_udata(ucore, resp, resp->response_length))
-               return -EFAULT;
-
-       return 0;
+       obj = create_cq(attrs, &cmd_ex);
+       return PTR_ERR_OR_ZERO(obj);
 }
 
-int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
-                          struct ib_udata *ucore,
-                          struct ib_udata *uhw)
+static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
 {
-       struct ib_uverbs_ex_create_cq_resp resp;
        struct ib_uverbs_ex_create_cq  cmd;
        struct ib_ucq_object           *obj;
-       int err;
-
-       if (ucore->inlen < sizeof(cmd))
-               return -EINVAL;
+       int ret;
 
-       err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
-       if (err)
-               return err;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        if (cmd.comp_mask)
                return -EINVAL;
@@ -1149,52 +1123,36 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
        if (cmd.reserved)
                return -EINVAL;
 
-       if (ucore->outlen < (offsetof(typeof(resp), response_length) +
-                            sizeof(resp.response_length)))
-               return -ENOSPC;
-
-       obj = create_cq(file, ucore, uhw, &cmd,
-                       min(ucore->inlen, sizeof(cmd)),
-                       ib_uverbs_ex_create_cq_cb, NULL);
-
+       obj = create_cq(attrs, &cmd);
        return PTR_ERR_OR_ZERO(obj);
 }
 
-ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
-                           const char __user *buf, int in_len,
-                           int out_len)
+static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_resize_cq      cmd;
        struct ib_uverbs_resize_cq_resp resp = {};
-       struct ib_udata                 udata;
        struct ib_cq                    *cq;
        int                             ret = -EINVAL;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                  in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                  out_len - sizeof(resp));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
        if (!cq)
                return -EINVAL;
 
-       ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
+       ret = cq->device->resize_cq(cq, cmd.cqe, &attrs->driver_udata);
        if (ret)
                goto out;
 
        resp.cqe = cq->cqe;
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp.cqe))
-               ret = -EFAULT;
-
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
 out:
        uobj_put_obj_read(cq);
 
-       return ret ? ret : in_len;
+       return ret;
 }
 
 static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
@@ -1227,9 +1185,7 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
        return 0;
 }
 
-ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
-                         const char __user *buf, int in_len,
-                         int out_len)
+static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_poll_cq       cmd;
        struct ib_uverbs_poll_cq_resp  resp;
@@ -1239,15 +1195,16 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
        struct ib_wc                   wc;
        int                            ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
        if (!cq)
                return -EINVAL;
 
        /* we copy a struct ib_uverbs_poll_cq_resp to user space */
-       header_ptr = u64_to_user_ptr(cmd.response);
+       header_ptr = attrs->ucore.outbuf;
        data_ptr = header_ptr + sizeof resp;
 
        memset(&resp, 0, sizeof resp);
@@ -1271,24 +1228,24 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
                goto out_put;
        }
 
-       ret = in_len;
+       ret = 0;
 
 out_put:
        uobj_put_obj_read(cq);
        return ret;
 }
 
-ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
-                               const char __user *buf, int in_len,
-                               int out_len)
+static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_req_notify_cq cmd;
        struct ib_cq                  *cq;
+       int ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
        if (!cq)
                return -EINVAL;
 
@@ -1297,22 +1254,22 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 
        uobj_put_obj_read(cq);
 
-       return in_len;
+       return 0;
 }
 
-ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len,
-                            int out_len)
+static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_destroy_cq      cmd;
        struct ib_uverbs_destroy_cq_resp resp;
        struct ib_uobject               *uobj;
        struct ib_ucq_object            *obj;
+       int ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+       uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
@@ -1323,21 +1280,11 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 
        uobj_put_destroy(uobj);
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
-               return -EFAULT;
-
-       return in_len;
+       return uverbs_response(attrs, &resp, sizeof(resp));
 }
 
-static int create_qp(struct ib_uverbs_file *file,
-                    struct ib_udata *ucore,
-                    struct ib_udata *uhw,
-                    struct ib_uverbs_ex_create_qp *cmd,
-                    size_t cmd_sz,
-                    int (*cb)(struct ib_uverbs_file *file,
-                              struct ib_uverbs_ex_create_qp_resp *resp,
-                              struct ib_udata *udata),
-                    void *context)
+static int create_qp(struct uverbs_attr_bundle *attrs,
+                    struct ib_uverbs_ex_create_qp *cmd)
 {
        struct ib_uqp_object            *obj;
        struct ib_device                *device;
@@ -1347,7 +1294,6 @@ static int create_qp(struct ib_uverbs_file *file,
        struct ib_cq                    *scq = NULL, *rcq = NULL;
        struct ib_srq                   *srq = NULL;
        struct ib_qp                    *qp;
-       char                            *buf;
        struct ib_qp_init_attr          attr = {};
        struct ib_uverbs_ex_create_qp_resp resp;
        int                             ret;
@@ -1358,7 +1304,7 @@ static int create_qp(struct ib_uverbs_file *file,
        if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
                return -EPERM;
 
-       obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+       obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
                                                 &ib_dev);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
@@ -1366,12 +1312,10 @@ static int create_qp(struct ib_uverbs_file *file,
        obj->uevent.uobject.user_handle = cmd->user_handle;
        mutex_init(&obj->mcast_lock);
 
-       if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
-                     sizeof(cmd->rwq_ind_tbl_handle) &&
-                     (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
+       if (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE) {
                ind_tbl = uobj_get_obj_read(rwq_ind_table,
                                            UVERBS_OBJECT_RWQ_IND_TBL,
-                                           cmd->rwq_ind_tbl_handle, file);
+                                           cmd->rwq_ind_tbl_handle, attrs);
                if (!ind_tbl) {
                        ret = -EINVAL;
                        goto err_put;
@@ -1380,13 +1324,6 @@ static int create_qp(struct ib_uverbs_file *file,
                attr.rwq_ind_tbl = ind_tbl;
        }
 
-       if (cmd_sz > sizeof(*cmd) &&
-           !ib_is_udata_cleared(ucore, sizeof(*cmd),
-                                cmd_sz - sizeof(*cmd))) {
-               ret = -EOPNOTSUPP;
-               goto err_put;
-       }
-
        if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) {
                ret = -EINVAL;
                goto err_put;
@@ -1397,7 +1334,7 @@ static int create_qp(struct ib_uverbs_file *file,
 
        if (cmd->qp_type == IB_QPT_XRC_TGT) {
                xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
-                                         file);
+                                         attrs);
 
                if (IS_ERR(xrcd_uobj)) {
                        ret = -EINVAL;
@@ -1417,7 +1354,7 @@ static int create_qp(struct ib_uverbs_file *file,
                } else {
                        if (cmd->is_srq) {
                                srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
-                                                       cmd->srq_handle, file);
+                                                       cmd->srq_handle, attrs);
                                if (!srq || srq->srq_type == IB_SRQT_XRC) {
                                        ret = -EINVAL;
                                        goto err_put;
@@ -1428,7 +1365,7 @@ static int create_qp(struct ib_uverbs_file *file,
                                if (cmd->recv_cq_handle != cmd->send_cq_handle) {
                                        rcq = uobj_get_obj_read(
                                                cq, UVERBS_OBJECT_CQ,
-                                               cmd->recv_cq_handle, file);
+                                               cmd->recv_cq_handle, attrs);
                                        if (!rcq) {
                                                ret = -EINVAL;
                                                goto err_put;
@@ -1439,11 +1376,11 @@ static int create_qp(struct ib_uverbs_file *file,
 
                if (has_sq)
                        scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
-                                               cmd->send_cq_handle, file);
+                                               cmd->send_cq_handle, attrs);
                if (!ind_tbl)
                        rcq = rcq ?: scq;
                pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
-                                      file);
+                                      attrs);
                if (!pd || (!scq && has_sq)) {
                        ret = -EINVAL;
                        goto err_put;
@@ -1453,7 +1390,7 @@ static int create_qp(struct ib_uverbs_file *file,
        }
 
        attr.event_handler = ib_uverbs_qp_event_handler;
-       attr.qp_context    = file;
+       attr.qp_context    = attrs->ufile;
        attr.send_cq       = scq;
        attr.recv_cq       = rcq;
        attr.srq           = srq;
@@ -1473,10 +1410,7 @@ static int create_qp(struct ib_uverbs_file *file,
        INIT_LIST_HEAD(&obj->uevent.event_list);
        INIT_LIST_HEAD(&obj->mcast_list);
 
-       if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
-                     sizeof(cmd->create_flags))
-               attr.create_flags = cmd->create_flags;
-
+       attr.create_flags = cmd->create_flags;
        if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
                                IB_QP_CREATE_CROSS_CHANNEL |
                                IB_QP_CREATE_MANAGED_SEND |
@@ -1498,18 +1432,10 @@ static int create_qp(struct ib_uverbs_file *file,
                attr.source_qpn = cmd->source_qpn;
        }
 
-       buf = (void *)cmd + sizeof(*cmd);
-       if (cmd_sz > sizeof(*cmd))
-               if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
-                                            cmd_sz - sizeof(*cmd) - 1))) {
-                       ret = -EINVAL;
-                       goto err_put;
-               }
-
        if (cmd->qp_type == IB_QPT_XRC_TGT)
                qp = ib_create_qp(pd, &attr);
        else
-               qp = _ib_create_qp(device, pd, &attr, uhw,
+               qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
                                   &obj->uevent.uobject);
 
        if (IS_ERR(qp)) {
@@ -1557,11 +1483,9 @@ static int create_qp(struct ib_uverbs_file *file,
        resp.base.max_recv_wr     = attr.cap.max_recv_wr;
        resp.base.max_send_wr     = attr.cap.max_send_wr;
        resp.base.max_inline_data = attr.cap.max_inline_data;
+       resp.response_length = uverbs_response_length(attrs, sizeof(resp));
 
-       resp.response_length = offsetof(typeof(resp), response_length) +
-                              sizeof(resp.response_length);
-
-       ret = cb(file, &resp, ucore);
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
        if (ret)
                goto err_cb;
 
@@ -1583,7 +1507,7 @@ static int create_qp(struct ib_uverbs_file *file,
        if (ind_tbl)
                uobj_put_obj_read(ind_tbl);
 
-       return uobj_alloc_commit(&obj->uevent.uobject, 0);
+       return uobj_alloc_commit(&obj->uevent.uobject);
 err_cb:
        ib_destroy_qp(qp);
 
@@ -1605,39 +1529,15 @@ static int create_qp(struct ib_uverbs_file *file,
        return ret;
 }
 
-static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
-                                 struct ib_uverbs_ex_create_qp_resp *resp,
-                                 struct ib_udata *ucore)
-{
-       if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
-               return -EFAULT;
-
-       return 0;
-}
-
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
-                           const char __user *buf, int in_len,
-                           int out_len)
+static int ib_uverbs_create_qp(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_create_qp      cmd;
        struct ib_uverbs_ex_create_qp   cmd_ex;
-       struct ib_udata                 ucore;
-       struct ib_udata                 uhw;
-       ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
-       int                             err;
-
-       if (out_len < resp_size)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof(cmd)))
-               return -EFAULT;
+       int ret;
 
-       ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response),
-                  sizeof(cmd), resp_size);
-       ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + resp_size,
-                  in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                  out_len - resp_size);
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        memset(&cmd_ex, 0, sizeof(cmd_ex));
        cmd_ex.user_handle = cmd.user_handle;
@@ -1654,42 +1554,17 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
        cmd_ex.qp_type = cmd.qp_type;
        cmd_ex.is_srq = cmd.is_srq;
 
-       err = create_qp(file, &ucore, &uhw, &cmd_ex,
-                       offsetof(typeof(cmd_ex), is_srq) +
-                       sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
-                       NULL);
-
-       if (err)
-               return err;
-
-       return in_len;
+       return create_qp(attrs, &cmd_ex);
 }
 
-static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
-                                    struct ib_uverbs_ex_create_qp_resp *resp,
-                                    struct ib_udata *ucore)
+static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs)
 {
-       if (ib_copy_to_udata(ucore, resp, resp->response_length))
-               return -EFAULT;
-
-       return 0;
-}
-
-int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
-                          struct ib_udata *ucore,
-                          struct ib_udata *uhw)
-{
-       struct ib_uverbs_ex_create_qp_resp resp;
-       struct ib_uverbs_ex_create_qp cmd = {0};
-       int err;
-
-       if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
-                           sizeof(cmd.comp_mask)))
-               return -EINVAL;
+       struct ib_uverbs_ex_create_qp cmd;
+       int ret;
 
-       err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
-       if (err)
-               return err;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK)
                return -EINVAL;
@@ -1697,26 +1572,13 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
        if (cmd.reserved)
                return -EINVAL;
 
-       if (ucore->outlen < (offsetof(typeof(resp), response_length) +
-                            sizeof(resp.response_length)))
-               return -ENOSPC;
-
-       err = create_qp(file, ucore, uhw, &cmd,
-                       min(ucore->inlen, sizeof(cmd)),
-                       ib_uverbs_ex_create_qp_cb, NULL);
-
-       if (err)
-               return err;
-
-       return 0;
+       return create_qp(attrs, &cmd);
 }
 
-ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
-                         const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_open_qp        cmd;
        struct ib_uverbs_create_qp_resp resp;
-       struct ib_udata                 udata;
        struct ib_uqp_object           *obj;
        struct ib_xrcd                 *xrcd;
        struct ib_uobject              *uninitialized_var(xrcd_uobj);
@@ -1725,23 +1587,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
        int ret;
        struct ib_device *ib_dev;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                  in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                  out_len - sizeof(resp));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+       obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
                                                 &ib_dev);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
 
-       xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file);
+       xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, attrs);
        if (IS_ERR(xrcd_uobj)) {
                ret = -EINVAL;
                goto err_put;
@@ -1754,7 +1609,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
        }
 
        attr.event_handler = ib_uverbs_qp_event_handler;
-       attr.qp_context    = file;
+       attr.qp_context    = attrs->ufile;
        attr.qp_num        = cmd.qpn;
        attr.qp_type       = cmd.qp_type;
 
@@ -1775,17 +1630,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
        resp.qpn       = qp->qp_num;
        resp.qp_handle = obj->uevent.uobject.id;
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
-               ret = -EFAULT;
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret)
                goto err_destroy;
-       }
 
        obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
        atomic_inc(&obj->uxrcd->refcnt);
        qp->uobject = &obj->uevent.uobject;
        uobj_put_read(xrcd_uobj);
 
-       return uobj_alloc_commit(&obj->uevent.uobject, in_len);
+       return uobj_alloc_commit(&obj->uevent.uobject);
 
 err_destroy:
        ib_destroy_qp(qp);
@@ -1818,9 +1672,7 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
        uverb_attr->port_num          = rdma_ah_get_port_num(rdma_attr);
 }
 
-ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
-                          const char __user *buf, int in_len,
-                          int out_len)
+static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_query_qp      cmd;
        struct ib_uverbs_query_qp_resp resp;
@@ -1829,8 +1681,9 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
        struct ib_qp_init_attr         *init_attr;
        int                            ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        attr      = kmalloc(sizeof *attr, GFP_KERNEL);
        init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
@@ -1839,7 +1692,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
                goto out;
        }
 
-       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
        if (!qp) {
                ret = -EINVAL;
                goto out;
@@ -1886,14 +1739,13 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
        resp.max_inline_data        = init_attr->cap.max_inline_data;
        resp.sq_sig_all             = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
-               ret = -EFAULT;
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
 
 out:
        kfree(attr);
        kfree(init_attr);
 
-       return ret ? ret : in_len;
+       return ret;
 }
 
 /* Remove ignored fields set in the attribute mask */
@@ -1933,8 +1785,8 @@ static void copy_ah_attr_from_uverbs(struct ib_device *dev,
        rdma_ah_set_make_grd(rdma_attr, false);
 }
 
-static int modify_qp(struct ib_uverbs_file *file,
-                    struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata)
+static int modify_qp(struct uverbs_attr_bundle *attrs,
+                    struct ib_uverbs_ex_modify_qp *cmd)
 {
        struct ib_qp_attr *attr;
        struct ib_qp *qp;
@@ -1944,7 +1796,8 @@ static int modify_qp(struct ib_uverbs_file *file,
        if (!attr)
                return -ENOMEM;
 
-       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle,
+                              attrs);
        if (!qp) {
                ret = -EINVAL;
                goto out;
@@ -2081,7 +1934,7 @@ static int modify_qp(struct ib_uverbs_file *file,
        ret = ib_modify_qp_with_udata(qp, attr,
                                      modify_qp_mask(qp->qp_type,
                                                     cmd->base.attr_mask),
-                                     udata);
+                                     &attrs->driver_udata);
 
 release_qp:
        uobj_put_obj_read(qp);
@@ -2091,80 +1944,64 @@ static int modify_qp(struct ib_uverbs_file *file,
        return ret;
 }
 
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
-                           const char __user *buf, int in_len,
-                           int out_len)
+static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs)
 {
-       struct ib_uverbs_ex_modify_qp cmd = {};
-       struct ib_udata udata;
+       struct ib_uverbs_ex_modify_qp cmd;
        int ret;
 
-       if (copy_from_user(&cmd.base, buf, sizeof(cmd.base)))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd.base, sizeof(cmd.base));
+       if (ret)
+               return ret;
 
        if (cmd.base.attr_mask &
            ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
                return -EOPNOTSUPP;
 
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd.base), NULL,
-                  in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr),
-                  out_len);
-
-       ret = modify_qp(file, &cmd, &udata);
-       if (ret)
-               return ret;
-
-       return in_len;
+       return modify_qp(attrs, &cmd);
 }
 
-int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
-                          struct ib_udata *ucore,
-                          struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
 {
-       struct ib_uverbs_ex_modify_qp cmd = {};
+       struct ib_uverbs_ex_modify_qp cmd;
+       struct ib_uverbs_ex_modify_qp_resp resp = {
+               .response_length = uverbs_response_length(attrs, sizeof(resp))
+       };
        int ret;
 
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
+
        /*
         * Last bit is reserved for extending the attr_mask by
         * using another field.
         */
        BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
 
-       if (ucore->inlen < sizeof(cmd.base))
-               return -EINVAL;
-
-       ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
-       if (ret)
-               return ret;
-
        if (cmd.base.attr_mask &
            ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
                return -EOPNOTSUPP;
 
-       if (ucore->inlen > sizeof(cmd)) {
-               if (!ib_is_udata_cleared(ucore, sizeof(cmd),
-                                        ucore->inlen - sizeof(cmd)))
-                       return -EOPNOTSUPP;
-       }
-
-       ret = modify_qp(file, &cmd, uhw);
+       ret = modify_qp(attrs, &cmd);
+       if (ret)
+               return ret;
 
-       return ret;
+       return uverbs_response(attrs, &resp, sizeof(resp));
 }
 
-ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len,
-                            int out_len)
+static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_destroy_qp      cmd;
        struct ib_uverbs_destroy_qp_resp resp;
        struct ib_uobject               *uobj;
        struct ib_uqp_object            *obj;
+       int ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file);
+       uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
@@ -2174,10 +2011,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 
        uobj_put_destroy(uobj);
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
-               return -EFAULT;
-
-       return in_len;
+       return uverbs_response(attrs, &resp, sizeof(resp));
 }
 
 static void *alloc_wr(size_t wr_size, __u32 num_sge)
@@ -2190,9 +2024,7 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge)
                         num_sge * sizeof (struct ib_sge), GFP_KERNEL);
 }
 
-ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
-                           const char __user *buf, int in_len,
-                           int out_len)
+static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_post_send      cmd;
        struct ib_uverbs_post_send_resp resp;
@@ -2202,24 +2034,31 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
        struct ib_qp                   *qp;
        int                             i, sg_ind;
        int                             is_ud;
-       ssize_t                         ret = -EINVAL;
+       int ret, ret2;
        size_t                          next_size;
+       const struct ib_sge __user *sgls;
+       const void __user *wqes;
+       struct uverbs_req_iter iter;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
-           cmd.sge_count * sizeof (struct ib_uverbs_sge))
-               return -EINVAL;
-
-       if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
-               return -EINVAL;
+       ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
+       wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count);
+       if (IS_ERR(wqes))
+               return PTR_ERR(wqes);
+       sgls = uverbs_request_next_ptr(
+               &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge));
+       if (IS_ERR(sgls))
+               return PTR_ERR(sgls);
+       ret = uverbs_request_finish(&iter);
+       if (ret)
+               return ret;
 
        user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
        if (!user_wr)
                return -ENOMEM;
 
-       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
        if (!qp)
                goto out;
 
@@ -2227,8 +2066,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
        sg_ind = 0;
        last = NULL;
        for (i = 0; i < cmd.wr_count; ++i) {
-               if (copy_from_user(user_wr,
-                                  buf + sizeof cmd + i * cmd.wqe_size,
+               if (copy_from_user(user_wr, wqes + i * cmd.wqe_size,
                                   cmd.wqe_size)) {
                        ret = -EFAULT;
                        goto out_put;
@@ -2256,7 +2094,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                        }
 
                        ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
-                                                  user_wr->wr.ud.ah, file);
+                                                  user_wr->wr.ud.ah, attrs);
                        if (!ud->ah) {
                                kfree(ud);
                                ret = -EINVAL;
@@ -2336,11 +2174,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                if (next->num_sge) {
                        next->sg_list = (void *) next +
                                ALIGN(next_size, sizeof(struct ib_sge));
-                       if (copy_from_user(next->sg_list,
-                                          buf + sizeof cmd +
-                                          cmd.wr_count * cmd.wqe_size +
-                                          sg_ind * sizeof (struct ib_sge),
-                                          next->num_sge * sizeof (struct ib_sge))) {
+                       if (copy_from_user(next->sg_list, sgls + sg_ind,
+                                          next->num_sge *
+                                                  sizeof(struct ib_sge))) {
                                ret = -EFAULT;
                                goto out_put;
                        }
@@ -2358,8 +2194,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                                break;
                }
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
-               ret = -EFAULT;
+       ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret2)
+               ret = ret2;
 
 out_put:
        uobj_put_obj_read(qp);
@@ -2375,28 +2212,35 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 out:
        kfree(user_wr);
 
-       return ret ? ret : in_len;
+       return ret;
 }
 
-static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
-                                                   int in_len,
-                                                   u32 wr_count,
-                                                   u32 sge_count,
-                                                   u32 wqe_size)
+static struct ib_recv_wr *
+ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
+                         u32 wqe_size, u32 sge_count)
 {
        struct ib_uverbs_recv_wr *user_wr;
        struct ib_recv_wr        *wr = NULL, *last, *next;
        int                       sg_ind;
        int                       i;
        int                       ret;
-
-       if (in_len < wqe_size * wr_count +
-           sge_count * sizeof (struct ib_uverbs_sge))
-               return ERR_PTR(-EINVAL);
+       const struct ib_sge __user *sgls;
+       const void __user *wqes;
 
        if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
                return ERR_PTR(-EINVAL);
 
+       wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count);
+       if (IS_ERR(wqes))
+               return ERR_CAST(wqes);
+       sgls = uverbs_request_next_ptr(
+               iter, sge_count * sizeof(struct ib_uverbs_sge));
+       if (IS_ERR(sgls))
+               return ERR_CAST(sgls);
+       ret = uverbs_request_finish(iter);
+       if (ret)
+               return ERR_PTR(ret);
+
        user_wr = kmalloc(wqe_size, GFP_KERNEL);
        if (!user_wr)
                return ERR_PTR(-ENOMEM);
@@ -2404,7 +2248,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
        sg_ind = 0;
        last = NULL;
        for (i = 0; i < wr_count; ++i) {
-               if (copy_from_user(user_wr, buf + i * wqe_size,
+               if (copy_from_user(user_wr, wqes + i * wqe_size,
                                   wqe_size)) {
                        ret = -EFAULT;
                        goto err;
@@ -2443,10 +2287,9 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
                if (next->num_sge) {
                        next->sg_list = (void *) next +
                                ALIGN(sizeof *next, sizeof (struct ib_sge));
-                       if (copy_from_user(next->sg_list,
-                                          buf + wr_count * wqe_size +
-                                          sg_ind * sizeof (struct ib_sge),
-                                          next->num_sge * sizeof (struct ib_sge))) {
+                       if (copy_from_user(next->sg_list, sgls + sg_ind,
+                                          next->num_sge *
+                                                  sizeof(struct ib_sge))) {
                                ret = -EFAULT;
                                goto err;
                        }
@@ -2470,29 +2313,30 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
        return ERR_PTR(ret);
 }
 
-ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
-                           const char __user *buf, int in_len,
-                           int out_len)
+static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_post_recv      cmd;
        struct ib_uverbs_post_recv_resp resp;
        struct ib_recv_wr              *wr, *next;
        const struct ib_recv_wr        *bad_wr;
        struct ib_qp                   *qp;
-       ssize_t                         ret = -EINVAL;
+       int ret, ret2;
+       struct uverbs_req_iter iter;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
-                                      in_len - sizeof cmd, cmd.wr_count,
-                                      cmd.sge_count, cmd.wqe_size);
+       wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+                                      cmd.sge_count);
        if (IS_ERR(wr))
                return PTR_ERR(wr);
 
-       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
-       if (!qp)
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+       if (!qp) {
+               ret = -EINVAL;
                goto out;
+       }
 
        resp.bad_wr = 0;
        ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
@@ -2506,9 +2350,9 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
                }
        }
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
-               ret = -EFAULT;
-
+       ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret2)
+               ret = ret2;
 out:
        while (wr) {
                next = wr->next;
@@ -2516,36 +2360,36 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
                wr = next;
        }
 
-       return ret ? ret : in_len;
+       return ret;
 }
 
-ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
-                               const char __user *buf, int in_len,
-                               int out_len)
+static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_post_srq_recv      cmd;
        struct ib_uverbs_post_srq_recv_resp resp;
        struct ib_recv_wr                  *wr, *next;
        const struct ib_recv_wr            *bad_wr;
        struct ib_srq                      *srq;
-       ssize_t                             ret = -EINVAL;
+       int ret, ret2;
+       struct uverbs_req_iter iter;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
-                                      in_len - sizeof cmd, cmd.wr_count,
-                                      cmd.sge_count, cmd.wqe_size);
+       wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+                                      cmd.sge_count);
        if (IS_ERR(wr))
                return PTR_ERR(wr);
 
-       srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
-       if (!srq)
+       srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
+       if (!srq) {
+               ret = -EINVAL;
                goto out;
+       }
 
        resp.bad_wr = 0;
-       ret = srq->device->post_srq_recv ?
-               srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP;
+       ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
 
        uobj_put_obj_read(srq);
 
@@ -2556,8 +2400,9 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
                                break;
                }
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
-               ret = -EFAULT;
+       ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret2)
+               ret = ret2;
 
 out:
        while (wr) {
@@ -2566,12 +2411,10 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
                wr = next;
        }
 
-       return ret ? ret : in_len;
+       return ret;
 }
 
-ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
-                           const char __user *buf, int in_len,
-                           int out_len)
+static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_create_ah       cmd;
        struct ib_uverbs_create_ah_resp  resp;
@@ -2580,21 +2423,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
        struct ib_ah                    *ah;
        struct rdma_ah_attr             attr = {};
        int ret;
-       struct ib_udata                   udata;
        struct ib_device *ib_dev;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                  in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                  out_len - sizeof(resp));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev);
+       uobj = uobj_alloc(UVERBS_OBJECT_AH, attrs, &ib_dev);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
@@ -2603,7 +2438,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
                goto err;
        }
 
-       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
        if (!pd) {
                ret = -EINVAL;
                goto err;
@@ -2627,7 +2462,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
                rdma_ah_set_ah_flags(&attr, 0);
        }
 
-       ah = rdma_create_user_ah(pd, &attr, &udata);
+       ah = rdma_create_user_ah(pd, &attr, &attrs->driver_udata);
        if (IS_ERR(ah)) {
                ret = PTR_ERR(ah);
                goto err_put;
@@ -2639,13 +2474,12 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 
        resp.ah_handle = uobj->id;
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
-               ret = -EFAULT;
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret)
                goto err_copy;
-       }
 
        uobj_put_obj_read(pd);
-       return uobj_alloc_commit(uobj, in_len);
+       return uobj_alloc_commit(uobj);
 
 err_copy:
        rdma_destroy_ah(ah);
@@ -2658,21 +2492,19 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
        return ret;
 }
 
-ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_destroy_ah(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_destroy_ah cmd;
+       int ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file,
-                                   in_len);
+       return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, attrs);
 }
 
-ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
-                              const char __user *buf, int in_len,
-                              int out_len)
+static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_attach_mcast cmd;
        struct ib_qp                 *qp;
@@ -2680,10 +2512,11 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
        struct ib_uverbs_mcast_entry *mcast;
        int                           ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
        if (!qp)
                return -EINVAL;
 
@@ -2716,12 +2549,10 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
        mutex_unlock(&obj->mcast_lock);
        uobj_put_obj_read(qp);
 
-       return ret ? ret : in_len;
+       return ret;
 }
 
-ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
-                              const char __user *buf, int in_len,
-                              int out_len)
+static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_detach_mcast cmd;
        struct ib_uqp_object         *obj;
@@ -2730,10 +2561,11 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
        int                           ret = -EINVAL;
        bool                          found = false;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
        if (!qp)
                return -EINVAL;
 
@@ -2759,7 +2591,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
 out_put:
        mutex_unlock(&obj->mcast_lock);
        uobj_put_obj_read(qp);
-       return ret ? ret : in_len;
+       return ret;
 }
 
 struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
@@ -2838,7 +2670,7 @@ void flow_resources_add(struct ib_uflow_resources *uflow_res,
 }
 EXPORT_SYMBOL(flow_resources_add);
 
-static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
+static int kern_spec_to_ib_spec_action(const struct uverbs_attr_bundle *attrs,
                                       struct ib_uverbs_flow_spec *kern_spec,
                                       union ib_flow_spec *ib_spec,
                                       struct ib_uflow_resources *uflow_res)
@@ -2867,7 +2699,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
                ib_spec->action.act = uobj_get_obj_read(flow_action,
                                                        UVERBS_OBJECT_FLOW_ACTION,
                                                        kern_spec->action.handle,
-                                                       ufile);
+                                                       attrs);
                if (!ib_spec->action.act)
                        return -EINVAL;
                ib_spec->action.size =
@@ -2885,7 +2717,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
                        uobj_get_obj_read(counters,
                                          UVERBS_OBJECT_COUNTERS,
                                          kern_spec->flow_count.handle,
-                                         ufile);
+                                         attrs);
                if (!ib_spec->flow_count.counters)
                        return -EINVAL;
                ib_spec->flow_count.size =
@@ -3066,7 +2898,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
                                                     kern_filter_sz, ib_spec);
 }
 
-static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
+static int kern_spec_to_ib_spec(struct uverbs_attr_bundle *attrs,
                                struct ib_uverbs_flow_spec *kern_spec,
                                union ib_flow_spec *ib_spec,
                                struct ib_uflow_resources *uflow_res)
@@ -3075,17 +2907,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
                return -EINVAL;
 
        if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
-               return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec,
+               return kern_spec_to_ib_spec_action(attrs, kern_spec, ib_spec,
                                                   uflow_res);
        else
                return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
 }
 
-int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
-                          struct ib_udata *ucore,
-                          struct ib_udata *uhw)
+static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
 {
-       struct ib_uverbs_ex_create_wq     cmd = {};
+       struct ib_uverbs_ex_create_wq cmd;
        struct ib_uverbs_ex_create_wq_resp resp = {};
        struct ib_uwq_object           *obj;
        int err = 0;
@@ -3093,43 +2923,27 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
        struct ib_pd *pd;
        struct ib_wq *wq;
        struct ib_wq_init_attr wq_init_attr = {};
-       size_t required_cmd_sz;
-       size_t required_resp_len;
        struct ib_device *ib_dev;
 
-       required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
-       required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
-
-       if (ucore->inlen < required_cmd_sz)
-               return -EINVAL;
-
-       if (ucore->outlen < required_resp_len)
-               return -ENOSPC;
-
-       if (ucore->inlen > sizeof(cmd) &&
-           !ib_is_udata_cleared(ucore, sizeof(cmd),
-                                ucore->inlen - sizeof(cmd)))
-               return -EOPNOTSUPP;
-
-       err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+       err = uverbs_request(attrs, &cmd, sizeof(cmd));
        if (err)
                return err;
 
        if (cmd.comp_mask)
                return -EOPNOTSUPP;
 
-       obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file,
+       obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, attrs,
                                                 &ib_dev);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
 
-       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
        if (!pd) {
                err = -EINVAL;
                goto err_uobj;
        }
 
-       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
        if (!cq) {
                err = -EINVAL;
                goto err_put_pd;
@@ -3138,20 +2952,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
        wq_init_attr.cq = cq;
        wq_init_attr.max_sge = cmd.max_sge;
        wq_init_attr.max_wr = cmd.max_wr;
-       wq_init_attr.wq_context = file;
+       wq_init_attr.wq_context = attrs->ufile;
        wq_init_attr.wq_type = cmd.wq_type;
        wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
-       if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
-                            sizeof(cmd.create_flags)))
-               wq_init_attr.create_flags = cmd.create_flags;
+       wq_init_attr.create_flags = cmd.create_flags;
        obj->uevent.events_reported = 0;
        INIT_LIST_HEAD(&obj->uevent.event_list);
 
-       if (!pd->device->create_wq) {
-               err = -EOPNOTSUPP;
-               goto err_put_cq;
-       }
-       wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
+       wq = pd->device->create_wq(pd, &wq_init_attr, &attrs->driver_udata);
        if (IS_ERR(wq)) {
                err = PTR_ERR(wq);
                goto err_put_cq;
@@ -3175,15 +2983,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
        resp.max_sge = wq_init_attr.max_sge;
        resp.max_wr = wq_init_attr.max_wr;
        resp.wqn = wq->wq_num;
-       resp.response_length = required_resp_len;
-       err = ib_copy_to_udata(ucore,
-                              &resp, resp.response_length);
+       resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+       err = uverbs_response(attrs, &resp, sizeof(resp));
        if (err)
                goto err_copy;
 
        uobj_put_obj_read(pd);
        uobj_put_obj_read(cq);
-       return uobj_alloc_commit(&obj->uevent.uobject, 0);
+       return uobj_alloc_commit(&obj->uevent.uobject);
 
 err_copy:
        ib_destroy_wq(wq);
@@ -3197,41 +3004,23 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
        return err;
 }
 
-int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
-                           struct ib_udata *ucore,
-                           struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_wq(struct uverbs_attr_bundle *attrs)
 {
-       struct ib_uverbs_ex_destroy_wq  cmd = {};
+       struct ib_uverbs_ex_destroy_wq  cmd;
        struct ib_uverbs_ex_destroy_wq_resp     resp = {};
        struct ib_uobject               *uobj;
        struct ib_uwq_object            *obj;
-       size_t required_cmd_sz;
-       size_t required_resp_len;
        int                             ret;
 
-       required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle);
-       required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
-
-       if (ucore->inlen < required_cmd_sz)
-               return -EINVAL;
-
-       if (ucore->outlen < required_resp_len)
-               return -ENOSPC;
-
-       if (ucore->inlen > sizeof(cmd) &&
-           !ib_is_udata_cleared(ucore, sizeof(cmd),
-                                ucore->inlen - sizeof(cmd)))
-               return -EOPNOTSUPP;
-
-       ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
        if (ret)
                return ret;
 
        if (cmd.comp_mask)
                return -EOPNOTSUPP;
 
-       resp.response_length = required_resp_len;
-       uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
+       resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+       uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
@@ -3240,29 +3029,17 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
 
        uobj_put_destroy(uobj);
 
-       return ib_copy_to_udata(ucore, &resp, resp.response_length);
+       return uverbs_response(attrs, &resp, sizeof(resp));
 }
 
-int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
-                          struct ib_udata *ucore,
-                          struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
 {
-       struct ib_uverbs_ex_modify_wq cmd = {};
+       struct ib_uverbs_ex_modify_wq cmd;
        struct ib_wq *wq;
        struct ib_wq_attr wq_attr = {};
-       size_t required_cmd_sz;
        int ret;
 
-       required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state);
-       if (ucore->inlen < required_cmd_sz)
-               return -EINVAL;
-
-       if (ucore->inlen > sizeof(cmd) &&
-           !ib_is_udata_cleared(ucore, sizeof(cmd),
-                                ucore->inlen - sizeof(cmd)))
-               return -EOPNOTSUPP;
-
-       ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
        if (ret)
                return ret;
 
@@ -3272,7 +3049,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
        if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
                return -EINVAL;
 
-       wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file);
+       wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
        if (!wq)
                return -EINVAL;
 
@@ -3282,24 +3059,18 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
                wq_attr.flags = cmd.flags;
                wq_attr.flags_mask = cmd.flags_mask;
        }
-       if (!wq->device->modify_wq) {
-               ret = -EOPNOTSUPP;
-               goto out;
-       }
-       ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
-out:
+       ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask,
+                                   &attrs->driver_udata);
        uobj_put_obj_read(wq);
        return ret;
 }
 
-int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
-                                     struct ib_udata *ucore,
-                                     struct ib_udata *uhw)
+static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
 {
-       struct ib_uverbs_ex_create_rwq_ind_table          cmd = {};
+       struct ib_uverbs_ex_create_rwq_ind_table cmd;
        struct ib_uverbs_ex_create_rwq_ind_table_resp  resp = {};
        struct ib_uobject                 *uobj;
-       int err = 0;
+       int err;
        struct ib_rwq_ind_table_init_attr init_attr = {};
        struct ib_rwq_ind_table *rwq_ind_tbl;
        struct ib_wq    **wqs = NULL;
@@ -3307,27 +3078,13 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
        struct ib_wq    *wq = NULL;
        int i, j, num_read_wqs;
        u32 num_wq_handles;
-       u32 expected_in_size;
-       size_t required_cmd_sz_header;
-       size_t required_resp_len;
+       struct uverbs_req_iter iter;
        struct ib_device *ib_dev;
 
-       required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
-       required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
-
-       if (ucore->inlen < required_cmd_sz_header)
-               return -EINVAL;
-
-       if (ucore->outlen < required_resp_len)
-               return -ENOSPC;
-
-       err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header);
+       err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
        if (err)
                return err;
 
-       ucore->inbuf += required_cmd_sz_header;
-       ucore->inlen -= required_cmd_sz_header;
-
        if (cmd.comp_mask)
                return -EOPNOTSUPP;
 
@@ -3335,26 +3092,17 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
                return -EINVAL;
 
        num_wq_handles = 1 << cmd.log_ind_tbl_size;
-       expected_in_size = num_wq_handles * sizeof(__u32);
-       if (num_wq_handles == 1)
-               /* input size for wq handles is u64 aligned */
-               expected_in_size += sizeof(__u32);
-
-       if (ucore->inlen < expected_in_size)
-               return -EINVAL;
-
-       if (ucore->inlen > expected_in_size &&
-           !ib_is_udata_cleared(ucore, expected_in_size,
-                                ucore->inlen - expected_in_size))
-               return -EOPNOTSUPP;
-
        wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
                              GFP_KERNEL);
        if (!wqs_handles)
                return -ENOMEM;
 
-       err = ib_copy_from_udata(wqs_handles, ucore,
-                                num_wq_handles * sizeof(__u32));
+       err = uverbs_request_next(&iter, wqs_handles,
+                                 num_wq_handles * sizeof(__u32));
+       if (err)
+               goto err_free;
+
+       err = uverbs_request_finish(&iter);
        if (err)
                goto err_free;
 
@@ -3367,7 +3115,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
        for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
                        num_read_wqs++) {
                wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
-                                      wqs_handles[num_read_wqs], file);
+                                      wqs_handles[num_read_wqs], attrs);
                if (!wq) {
                        err = -EINVAL;
                        goto put_wqs;
@@ -3376,7 +3124,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
                wqs[num_read_wqs] = wq;
        }
 
-       uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev);
+       uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev);
        if (IS_ERR(uobj)) {
                err = PTR_ERR(uobj);
                goto put_wqs;
@@ -3385,11 +3133,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
        init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
        init_attr.ind_tbl = wqs;
 
-       if (!ib_dev->create_rwq_ind_table) {
-               err = -EOPNOTSUPP;
-               goto err_uobj;
-       }
-       rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
+       rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr,
+                                                  &attrs->driver_udata);
 
        if (IS_ERR(rwq_ind_tbl)) {
                err = PTR_ERR(rwq_ind_tbl);
@@ -3408,10 +3153,9 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 
        resp.ind_tbl_handle = uobj->id;
        resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
-       resp.response_length = required_resp_len;
+       resp.response_length = uverbs_response_length(attrs, sizeof(resp));
 
-       err = ib_copy_to_udata(ucore,
-                              &resp, resp.response_length);
+       err = uverbs_response(attrs, &resp, sizeof(resp));
        if (err)
                goto err_copy;
 
@@ -3420,7 +3164,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
        for (j = 0; j < num_read_wqs; j++)
                uobj_put_obj_read(wqs[j]);
 
-       return uobj_alloc_commit(uobj, 0);
+       return uobj_alloc_commit(uobj);
 
 err_copy:
        ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@ -3435,25 +3179,12 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
        return err;
 }
 
-int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
-                                      struct ib_udata *ucore,
-                                      struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs)
 {
-       struct ib_uverbs_ex_destroy_rwq_ind_table       cmd = {};
-       int                     ret;
-       size_t required_cmd_sz;
-
-       required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
-
-       if (ucore->inlen < required_cmd_sz)
-               return -EINVAL;
-
-       if (ucore->inlen > sizeof(cmd) &&
-           !ib_is_udata_cleared(ucore, sizeof(cmd),
-                                ucore->inlen - sizeof(cmd)))
-               return -EOPNOTSUPP;
+       struct ib_uverbs_ex_destroy_rwq_ind_table cmd;
+       int ret;
 
-       ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
        if (ret)
                return ret;
 
@@ -3461,12 +3192,10 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
                return -EOPNOTSUPP;
 
        return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
-                                   cmd.ind_tbl_handle, file, 0);
+                                   cmd.ind_tbl_handle, attrs);
 }
 
-int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
-                            struct ib_udata *ucore,
-                            struct ib_udata *uhw)
+static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_create_flow      cmd;
        struct ib_uverbs_create_flow_resp resp;
@@ -3477,24 +3206,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
        struct ib_qp                      *qp;
        struct ib_uflow_resources         *uflow_res;
        struct ib_uverbs_flow_spec_hdr    *kern_spec;
-       int err = 0;
+       struct uverbs_req_iter iter;
+       int err;
        void *ib_spec;
        int i;
        struct ib_device *ib_dev;
 
-       if (ucore->inlen < sizeof(cmd))
-               return -EINVAL;
-
-       if (ucore->outlen < sizeof(resp))
-               return -ENOSPC;
-
-       err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+       err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
        if (err)
                return err;
 
-       ucore->inbuf += sizeof(cmd);
-       ucore->inlen -= sizeof(cmd);
-
        if (cmd.comp_mask)
                return -EINVAL;
 
@@ -3512,8 +3233,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
        if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
                return -EINVAL;
 
-       if (cmd.flow_attr.size > ucore->inlen ||
-           cmd.flow_attr.size >
+       if (cmd.flow_attr.size >
            (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
                return -EINVAL;
 
@@ -3528,21 +3248,25 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
                        return -ENOMEM;
 
                *kern_flow_attr = cmd.flow_attr;
-               err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore,
-                                        cmd.flow_attr.size);
+               err = uverbs_request_next(&iter, &kern_flow_attr->flow_specs,
+                                         cmd.flow_attr.size);
                if (err)
                        goto err_free_attr;
        } else {
                kern_flow_attr = &cmd.flow_attr;
        }
 
-       uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev);
+       err = uverbs_request_finish(&iter);
+       if (err)
+               goto err_free_attr;
+
+       uobj = uobj_alloc(UVERBS_OBJECT_FLOW, attrs, &ib_dev);
        if (IS_ERR(uobj)) {
                err = PTR_ERR(uobj);
                goto err_free_attr;
        }
 
-       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
        if (!qp) {
                err = -EINVAL;
                goto err_uobj;
@@ -3553,11 +3277,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
                goto err_put;
        }
 
-       if (!qp->device->create_flow) {
-               err = -EOPNOTSUPP;
-               goto err_put;
-       }
-
        flow_attr = kzalloc(struct_size(flow_attr, flows,
                                cmd.flow_attr.num_of_specs), GFP_KERNEL);
        if (!flow_attr) {
@@ -3584,7 +3303,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
                        cmd.flow_attr.size >= kern_spec->size;
             i++) {
                err = kern_spec_to_ib_spec(
-                               file, (struct ib_uverbs_flow_spec *)kern_spec,
+                               attrs, (struct ib_uverbs_flow_spec *)kern_spec,
                                ib_spec, uflow_res);
                if (err)
                        goto err_free;
@@ -3602,8 +3321,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
                goto err_free;
        }
 
-       flow_id = qp->device->create_flow(qp, flow_attr,
-                                         IB_FLOW_DOMAIN_USER, uhw);
+       flow_id = qp->device->create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER,
+                                         &attrs->driver_udata);
 
        if (IS_ERR(flow_id)) {
                err = PTR_ERR(flow_id);
@@ -3615,8 +3334,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
        memset(&resp, 0, sizeof(resp));
        resp.flow_handle = uobj->id;
 
-       err = ib_copy_to_udata(ucore,
-                              &resp, sizeof(resp));
+       err = uverbs_response(attrs, &resp, sizeof(resp));
        if (err)
                goto err_copy;
 
@@ -3624,7 +3342,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
        kfree(flow_attr);
        if (cmd.flow_attr.num_of_specs)
                kfree(kern_flow_attr);
-       return uobj_alloc_commit(uobj, 0);
+       return uobj_alloc_commit(uobj);
 err_copy:
        if (!qp->device->destroy_flow(flow_id))
                atomic_dec(&qp->usecnt);
@@ -3642,28 +3360,22 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
        return err;
 }
 
-int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
-                             struct ib_udata *ucore,
-                             struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_flow(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_destroy_flow   cmd;
        int                             ret;
 
-       if (ucore->inlen < sizeof(cmd))
-               return -EINVAL;
-
-       ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
        if (ret)
                return ret;
 
        if (cmd.comp_mask)
                return -EINVAL;
 
-       return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file,
-                                   0);
+       return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, attrs);
 }
 
-static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
+static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
                                struct ib_uverbs_create_xsrq *cmd,
                                struct ib_udata *udata)
 {
@@ -3676,7 +3388,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
        int ret;
        struct ib_device *ib_dev;
 
-       obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file,
+       obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs,
                                                  &ib_dev);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
@@ -3686,7 +3398,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 
        if (cmd->srq_type == IB_SRQT_XRC) {
                xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
-                                         file);
+                                         attrs);
                if (IS_ERR(xrcd_uobj)) {
                        ret = -EINVAL;
                        goto err;
@@ -3704,21 +3416,21 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 
        if (ib_srq_has_cq(cmd->srq_type)) {
                attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
-                                               cmd->cq_handle, file);
+                                               cmd->cq_handle, attrs);
                if (!attr.ext.cq) {
                        ret = -EINVAL;
                        goto err_put_xrcd;
                }
        }
 
-       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file);
+       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs);
        if (!pd) {
                ret = -EINVAL;
                goto err_put_cq;
        }
 
        attr.event_handler  = ib_uverbs_srq_event_handler;
-       attr.srq_context    = file;
+       attr.srq_context    = attrs->ufile;
        attr.srq_type       = cmd->srq_type;
        attr.attr.max_wr    = cmd->max_wr;
        attr.attr.max_sge   = cmd->max_sge;
@@ -3763,11 +3475,9 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
        if (cmd->srq_type == IB_SRQT_XRC)
                resp.srqn = srq->ext.xrc.srq_num;
 
-       if (copy_to_user(u64_to_user_ptr(cmd->response),
-                        &resp, sizeof resp)) {
-               ret = -EFAULT;
+       ret = uverbs_response(attrs, &resp, sizeof(resp));
+       if (ret)
                goto err_copy;
-       }
 
        if (cmd->srq_type == IB_SRQT_XRC)
                uobj_put_read(xrcd_uobj);
@@ -3776,7 +3486,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
                uobj_put_obj_read(attr.ext.cq);
 
        uobj_put_obj_read(pd);
-       return uobj_alloc_commit(&obj->uevent.uobject, 0);
+       return uobj_alloc_commit(&obj->uevent.uobject);
 
 err_copy:
        ib_destroy_srq(srq);
@@ -3799,21 +3509,15 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
        return ret;
 }
 
-ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len,
-                            int out_len)
+static int ib_uverbs_create_srq(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_create_srq      cmd;
        struct ib_uverbs_create_xsrq     xcmd;
-       struct ib_uverbs_create_srq_resp resp;
-       struct ib_udata                  udata;
        int ret;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
        memset(&xcmd, 0, sizeof(xcmd));
        xcmd.response    = cmd.response;
@@ -3824,77 +3528,48 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
        xcmd.max_sge     = cmd.max_sge;
        xcmd.srq_limit   = cmd.srq_limit;
 
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                  in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                  out_len - sizeof(resp));
-
-       ret = __uverbs_create_xsrq(file, &xcmd, &udata);
-       if (ret)
-               return ret;
-
-       return in_len;
+       return __uverbs_create_xsrq(attrs, &xcmd, &attrs->driver_udata);
 }
 
-ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
-                             const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_create_xsrq(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_create_xsrq     cmd;
-       struct ib_uverbs_create_srq_resp resp;
-       struct ib_udata                  udata;
        int ret;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
-                  u64_to_user_ptr(cmd.response) + sizeof(resp),
-                  in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
-                  out_len - sizeof(resp));
-
-       ret = __uverbs_create_xsrq(file, &cmd, &udata);
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
        if (ret)
                return ret;
 
-       return in_len;
+       return __uverbs_create_xsrq(attrs, &cmd, &attrs->driver_udata);
 }
 
-ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len,
-                            int out_len)
+static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_modify_srq cmd;
-       struct ib_udata             udata;
        struct ib_srq              *srq;
        struct ib_srq_attr          attr;
        int                         ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
-                  out_len);
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+       srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
        if (!srq)
                return -EINVAL;
 
        attr.max_wr    = cmd.max_wr;
        attr.srq_limit = cmd.srq_limit;
 
-       ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
+       ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask,
+                                     &attrs->driver_udata);
 
        uobj_put_obj_read(srq);
 
-       return ret ? ret : in_len;
+       return ret;
 }
 
-ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
-                           const char __user *buf,
-                           int in_len, int out_len)
+static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_query_srq      cmd;
        struct ib_uverbs_query_srq_resp resp;
@@ -3902,13 +3577,11 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
        struct ib_srq                   *srq;
        int                             ret;
 
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+       srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
        if (!srq)
                return -EINVAL;
 
@@ -3925,25 +3598,22 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
        resp.max_sge   = attr.max_sge;
        resp.srq_limit = attr.srq_limit;
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
-               return -EFAULT;
-
-       return in_len;
+       return uverbs_response(attrs, &resp, sizeof(resp));
 }
 
-ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
-                             const char __user *buf, int in_len,
-                             int out_len)
+static int ib_uverbs_destroy_srq(struct uverbs_attr_bundle *attrs)
 {
        struct ib_uverbs_destroy_srq      cmd;
        struct ib_uverbs_destroy_srq_resp resp;
        struct ib_uobject                *uobj;
        struct ib_uevent_object          *obj;
+       int ret;
 
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+       if (ret)
+               return ret;
 
-       uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+       uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
@@ -3953,35 +3623,24 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 
        uobj_put_destroy(uobj);
 
-       if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
-               return -EFAULT;
-
-       return in_len;
+       return uverbs_response(attrs, &resp, sizeof(resp));
 }
 
-int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
-                             struct ib_udata *ucore,
-                             struct ib_udata *uhw)
+static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
 {
-       struct ib_uverbs_ex_query_device_resp resp = { {0} };
+       struct ib_uverbs_ex_query_device_resp resp = {};
        struct ib_uverbs_ex_query_device  cmd;
        struct ib_device_attr attr = {0};
        struct ib_ucontext *ucontext;
        struct ib_device *ib_dev;
        int err;
 
-       ucontext = ib_uverbs_get_ucontext(file);
+       ucontext = ib_uverbs_get_ucontext(attrs);
        if (IS_ERR(ucontext))
                return PTR_ERR(ucontext);
        ib_dev = ucontext->device;
 
-       if (!ib_dev->query_device)
-               return -EOPNOTSUPP;
-
-       if (ucore->inlen < sizeof(cmd))
-               return -EINVAL;
-
-       err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+       err = uverbs_request(attrs, &cmd, sizeof(cmd));
        if (err)
                return err;
 
@@ -3991,20 +3650,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
        if (cmd.reserved)
                return -EINVAL;
 
-       resp.response_length = offsetof(typeof(resp), odp_caps);
-
-       if (ucore->outlen < resp.response_length)
-               return -ENOSPC;
-
-       err = ib_dev->query_device(ib_dev, &attr, uhw);
+       err = ib_dev->query_device(ib_dev, &attr, &attrs->driver_udata);
        if (err)
                return err;
 
        copy_query_dev_fields(ucontext, &resp.base, &attr);
 
-       if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
-               goto end;
-
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        resp.odp_caps.general_caps = attr.odp_caps.general_caps;
        resp.odp_caps.per_transport_caps.rc_odp_caps =
@@ -4014,99 +3665,39 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
        resp.odp_caps.per_transport_caps.ud_odp_caps =
                attr.odp_caps.per_transport_caps.ud_odp_caps;
 #endif
-       resp.response_length += sizeof(resp.odp_caps);
-
-       if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask))
-               goto end;
 
        resp.timestamp_mask = attr.timestamp_mask;
-       resp.response_length += sizeof(resp.timestamp_mask);
-
-       if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock))
-               goto end;
-
        resp.hca_core_clock = attr.hca_core_clock;
-       resp.response_length += sizeof(resp.hca_core_clock);
-
-       if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex))
-               goto end;
-
        resp.device_cap_flags_ex = attr.device_cap_flags;
-       resp.response_length += sizeof(resp.device_cap_flags_ex);
-
-       if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps))
-               goto end;
-
        resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts;
        resp.rss_caps.max_rwq_indirection_tables =
                attr.rss_caps.max_rwq_indirection_tables;
        resp.rss_caps.max_rwq_indirection_table_size =
                attr.rss_caps.max_rwq_indirection_table_size;
-
-       resp.response_length += sizeof(resp.rss_caps);
-
-       if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq))
-               goto end;
-
        resp.max_wq_type_rq = attr.max_wq_type_rq;
-       resp.response_length += sizeof(resp.max_wq_type_rq);
-
-       if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
-               goto end;
-
        resp.raw_packet_caps = attr.raw_packet_caps;
-       resp.response_length += sizeof(resp.raw_packet_caps);
-
-       if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
-               goto end;
-
        resp.tm_caps.max_rndv_hdr_size  = attr.tm_caps.max_rndv_hdr_size;
        resp.tm_caps.max_num_tags       = attr.tm_caps.max_num_tags;
        resp.tm_caps.max_ops            = attr.tm_caps.max_ops;
        resp.tm_caps.max_sge            = attr.tm_caps.max_sge;
        resp.tm_caps.flags              = attr.tm_caps.flags;
-       resp.response_length += sizeof(resp.tm_caps);
-
-       if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps))
-               goto end;
-
        resp.cq_moderation_caps.max_cq_moderation_count  =
                attr.cq_caps.max_cq_moderation_count;
        resp.cq_moderation_caps.max_cq_moderation_period =
                attr.cq_caps.max_cq_moderation_period;
-       resp.response_length += sizeof(resp.cq_moderation_caps);
-
-       if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size))
-               goto end;
-
        resp.max_dm_size = attr.max_dm_size;
-       resp.response_length += sizeof(resp.max_dm_size);
-end:
-       err = ib_copy_to_udata(ucore, &resp, resp.response_length);
-       return err;
+       resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+
+       return uverbs_response(attrs, &resp, sizeof(resp));
 }
 
-int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
-                          struct ib_udata *ucore,
-                          struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
 {
-       struct ib_uverbs_ex_modify_cq cmd = {};
+       struct ib_uverbs_ex_modify_cq cmd;
        struct ib_cq *cq;
-       size_t required_cmd_sz;
        int ret;
 
-       required_cmd_sz = offsetof(typeof(cmd), reserved) +
-                               sizeof(cmd.reserved);
-       if (ucore->inlen < required_cmd_sz)
-               return -EINVAL;
-
-       /* sanity checks */
-       if (ucore->inlen > sizeof(cmd) &&
-           !ib_is_udata_cleared(ucore, sizeof(cmd),
-                                ucore->inlen - sizeof(cmd)))
-               return -EOPNOTSUPP;
-
-       ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+       ret = uverbs_request(attrs, &cmd, sizeof(cmd));
        if (ret)
                return ret;
 
@@ -4116,7 +3707,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
        if (cmd.attr_mask > IB_CQ_MODERATE)
                return -EOPNOTSUPP;
 
-       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
        if (!cq)
                return -EINVAL;
 
@@ -4126,3 +3717,381 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
 
        return ret;
 }
+
+/*
+ * Describe the input structs for write(). Some write methods have an input
+ * only struct, most have an input and output. If the struct has an output then
+ * the 'response' u64 must be the first field in the request structure.
+ *
+ * If udata is present then both the request and response structs have a
+ * trailing driver_data flex array. In this case the size of the base struct
+ * cannot be changed.
+ */
+#define offsetof_after(_struct, _member)                                       \
+       (offsetof(_struct, _member) + sizeof(((_struct *)NULL)->_member))
+
+#define UAPI_DEF_WRITE_IO(req, resp)                                           \
+       .write.has_resp = 1 +                                                  \
+                         BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) +    \
+                         BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) !=    \
+                                           sizeof(u64)),                      \
+       .write.req_size = sizeof(req), .write.resp_size = sizeof(resp)
+
+#define UAPI_DEF_WRITE_I(req) .write.req_size = sizeof(req)
+
+#define UAPI_DEF_WRITE_UDATA_IO(req, resp)                                     \
+       UAPI_DEF_WRITE_IO(req, resp),                                          \
+               .write.has_udata =                                             \
+                       1 +                                                    \
+                       BUILD_BUG_ON_ZERO(offsetof(req, driver_data) !=        \
+                                         sizeof(req)) +                       \
+                       BUILD_BUG_ON_ZERO(offsetof(resp, driver_data) !=       \
+                                         sizeof(resp))
+
+#define UAPI_DEF_WRITE_UDATA_I(req)                                            \
+       UAPI_DEF_WRITE_I(req),                                                 \
+               .write.has_udata =                                             \
+                       1 + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) !=    \
+                                             sizeof(req))
+
+/*
+ * The _EX versions are for use with WRITE_EX and allow the last struct member
+ * to be specified. Buffers that do not include that member will be rejected.
+ */
+#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member)     \
+       .write.has_resp = 1,                                                   \
+       .write.req_size = offsetof_after(req, req_last_member),                \
+       .write.resp_size = offsetof_after(resp, resp_last_member)
+
+#define UAPI_DEF_WRITE_I_EX(req, req_last_member)                              \
+       .write.req_size = offsetof_after(req, req_last_member)
+
+const struct uapi_definition uverbs_def_write_intf[] = {
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_AH,
+               DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_AH,
+                                    ib_uverbs_create_ah,
+                                    UAPI_DEF_WRITE_UDATA_IO(
+                                            struct ib_uverbs_create_ah,
+                                            struct ib_uverbs_create_ah_resp),
+                                    UAPI_DEF_METHOD_NEEDS_FN(create_ah)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_DESTROY_AH,
+                       ib_uverbs_destroy_ah,
+                       UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah),
+                       UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_COMP_CHANNEL,
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
+                       ib_uverbs_create_comp_channel,
+                       UAPI_DEF_WRITE_IO(
+                               struct ib_uverbs_create_comp_channel,
+                               struct ib_uverbs_create_comp_channel_resp))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_CQ,
+               DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_CQ,
+                                    ib_uverbs_create_cq,
+                                    UAPI_DEF_WRITE_UDATA_IO(
+                                            struct ib_uverbs_create_cq,
+                                            struct ib_uverbs_create_cq_resp),
+                                    UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_DESTROY_CQ,
+                       ib_uverbs_destroy_cq,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_cq,
+                                         struct ib_uverbs_destroy_cq_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(destroy_cq)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_POLL_CQ,
+                       ib_uverbs_poll_cq,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_poll_cq,
+                                         struct ib_uverbs_poll_cq_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(poll_cq)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
+                       ib_uverbs_req_notify_cq,
+                       UAPI_DEF_WRITE_I(struct ib_uverbs_req_notify_cq),
+                       UAPI_DEF_METHOD_NEEDS_FN(req_notify_cq)),
+               DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_RESIZE_CQ,
+                                    ib_uverbs_resize_cq,
+                                    UAPI_DEF_WRITE_UDATA_IO(
+                                            struct ib_uverbs_resize_cq,
+                                            struct ib_uverbs_resize_cq_resp),
+                                    UAPI_DEF_METHOD_NEEDS_FN(resize_cq)),
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_CREATE_CQ,
+                       ib_uverbs_ex_create_cq,
+                       UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_cq,
+                                            reserved,
+                                            struct ib_uverbs_ex_create_cq_resp,
+                                            response_length),
+                       UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_MODIFY_CQ,
+                       ib_uverbs_ex_modify_cq,
+                       UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq),
+                       UAPI_DEF_METHOD_NEEDS_FN(create_cq))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_DEVICE,
+               DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_GET_CONTEXT,
+                                    ib_uverbs_get_context,
+                                    UAPI_DEF_WRITE_UDATA_IO(
+                                            struct ib_uverbs_get_context,
+                                            struct ib_uverbs_get_context_resp)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_QUERY_DEVICE,
+                       ib_uverbs_query_device,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_query_device,
+                                         struct ib_uverbs_query_device_resp)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_QUERY_PORT,
+                       ib_uverbs_query_port,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_query_port,
+                                         struct ib_uverbs_query_port_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(query_port)),
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_QUERY_DEVICE,
+                       ib_uverbs_ex_query_device,
+                       UAPI_DEF_WRITE_IO_EX(
+                               struct ib_uverbs_ex_query_device,
+                               reserved,
+                               struct ib_uverbs_ex_query_device_resp,
+                               response_length),
+                       UAPI_DEF_METHOD_NEEDS_FN(query_device)),
+               UAPI_DEF_OBJ_NEEDS_FN(alloc_ucontext),
+               UAPI_DEF_OBJ_NEEDS_FN(dealloc_ucontext)),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_FLOW,
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_CREATE_FLOW,
+                       ib_uverbs_ex_create_flow,
+                       UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_create_flow,
+                                            flow_attr,
+                                            struct ib_uverbs_create_flow_resp,
+                                            flow_handle),
+                       UAPI_DEF_METHOD_NEEDS_FN(create_flow)),
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
+                       ib_uverbs_ex_destroy_flow,
+                       UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_flow),
+                       UAPI_DEF_METHOD_NEEDS_FN(destroy_flow))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_MR,
+               DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_DEREG_MR,
+                                    ib_uverbs_dereg_mr,
+                                    UAPI_DEF_WRITE_I(struct ib_uverbs_dereg_mr),
+                                    UAPI_DEF_METHOD_NEEDS_FN(dereg_mr)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_REG_MR,
+                       ib_uverbs_reg_mr,
+                       UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr,
+                                               struct ib_uverbs_reg_mr_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_REREG_MR,
+                       ib_uverbs_rereg_mr,
+                       UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr,
+                                               struct ib_uverbs_rereg_mr_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_MW,
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_ALLOC_MW,
+                       ib_uverbs_alloc_mw,
+                       UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_mw,
+                                               struct ib_uverbs_alloc_mw_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(alloc_mw)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_DEALLOC_MW,
+                       ib_uverbs_dealloc_mw,
+                       UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_mw),
+                       UAPI_DEF_METHOD_NEEDS_FN(dealloc_mw))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_PD,
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_ALLOC_PD,
+                       ib_uverbs_alloc_pd,
+                       UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_pd,
+                                               struct ib_uverbs_alloc_pd_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(alloc_pd)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_DEALLOC_PD,
+                       ib_uverbs_dealloc_pd,
+                       UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_pd),
+                       UAPI_DEF_METHOD_NEEDS_FN(dealloc_pd))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_QP,
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_ATTACH_MCAST,
+                       ib_uverbs_attach_mcast,
+                       UAPI_DEF_WRITE_I(struct ib_uverbs_attach_mcast),
+                       UAPI_DEF_METHOD_NEEDS_FN(attach_mcast),
+                       UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+               DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_QP,
+                                    ib_uverbs_create_qp,
+                                    UAPI_DEF_WRITE_UDATA_IO(
+                                            struct ib_uverbs_create_qp,
+                                            struct ib_uverbs_create_qp_resp),
+                                    UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_DESTROY_QP,
+                       ib_uverbs_destroy_qp,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_qp,
+                                         struct ib_uverbs_destroy_qp_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(destroy_qp)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_DETACH_MCAST,
+                       ib_uverbs_detach_mcast,
+                       UAPI_DEF_WRITE_I(struct ib_uverbs_detach_mcast),
+                       UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_MODIFY_QP,
+                       ib_uverbs_modify_qp,
+                       UAPI_DEF_WRITE_I(struct ib_uverbs_modify_qp),
+                       UAPI_DEF_METHOD_NEEDS_FN(modify_qp)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_POST_RECV,
+                       ib_uverbs_post_recv,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_post_recv,
+                                         struct ib_uverbs_post_recv_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(post_recv)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_POST_SEND,
+                       ib_uverbs_post_send,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_post_send,
+                                         struct ib_uverbs_post_send_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(post_send)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_QUERY_QP,
+                       ib_uverbs_query_qp,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_query_qp,
+                                         struct ib_uverbs_query_qp_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(query_qp)),
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_CREATE_QP,
+                       ib_uverbs_ex_create_qp,
+                       UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_qp,
+                                            comp_mask,
+                                            struct ib_uverbs_ex_create_qp_resp,
+                                            response_length),
+                       UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_MODIFY_QP,
+                       ib_uverbs_ex_modify_qp,
+                       UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_modify_qp,
+                                            base,
+                                            struct ib_uverbs_ex_modify_qp_resp,
+                                            response_length),
+                       UAPI_DEF_METHOD_NEEDS_FN(modify_qp))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_RWQ_IND_TBL,
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL,
+                       ib_uverbs_ex_create_rwq_ind_table,
+                       UAPI_DEF_WRITE_IO_EX(
+                               struct ib_uverbs_ex_create_rwq_ind_table,
+                               log_ind_tbl_size,
+                               struct ib_uverbs_ex_create_rwq_ind_table_resp,
+                               ind_tbl_num),
+                       UAPI_DEF_METHOD_NEEDS_FN(create_rwq_ind_table)),
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL,
+                       ib_uverbs_ex_destroy_rwq_ind_table,
+                       UAPI_DEF_WRITE_I(
+                               struct ib_uverbs_ex_destroy_rwq_ind_table),
+                       UAPI_DEF_METHOD_NEEDS_FN(destroy_rwq_ind_table))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_WQ,
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_CREATE_WQ,
+                       ib_uverbs_ex_create_wq,
+                       UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_wq,
+                                            max_sge,
+                                            struct ib_uverbs_ex_create_wq_resp,
+                                            wqn),
+                       UAPI_DEF_METHOD_NEEDS_FN(create_wq)),
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_DESTROY_WQ,
+                       ib_uverbs_ex_destroy_wq,
+                       UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_destroy_wq,
+                                            wq_handle,
+                                            struct ib_uverbs_ex_destroy_wq_resp,
+                                            reserved),
+                       UAPI_DEF_METHOD_NEEDS_FN(destroy_wq)),
+               DECLARE_UVERBS_WRITE_EX(
+                       IB_USER_VERBS_EX_CMD_MODIFY_WQ,
+                       ib_uverbs_ex_modify_wq,
+                       UAPI_DEF_WRITE_I_EX(struct ib_uverbs_ex_modify_wq,
+                                           curr_wq_state),
+                       UAPI_DEF_METHOD_NEEDS_FN(modify_wq))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_SRQ,
+               DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_SRQ,
+                                    ib_uverbs_create_srq,
+                                    UAPI_DEF_WRITE_UDATA_IO(
+                                            struct ib_uverbs_create_srq,
+                                            struct ib_uverbs_create_srq_resp),
+                                    UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+               DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_XSRQ,
+                                    ib_uverbs_create_xsrq,
+                                    UAPI_DEF_WRITE_UDATA_IO(
+                                            struct ib_uverbs_create_xsrq,
+                                            struct ib_uverbs_create_srq_resp),
+                                    UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_DESTROY_SRQ,
+                       ib_uverbs_destroy_srq,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_srq,
+                                         struct ib_uverbs_destroy_srq_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(destroy_srq)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_MODIFY_SRQ,
+                       ib_uverbs_modify_srq,
+                       UAPI_DEF_WRITE_UDATA_I(struct ib_uverbs_modify_srq),
+                       UAPI_DEF_METHOD_NEEDS_FN(modify_srq)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_POST_SRQ_RECV,
+                       ib_uverbs_post_srq_recv,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_post_srq_recv,
+                                         struct ib_uverbs_post_srq_recv_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(post_srq_recv)),
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_QUERY_SRQ,
+                       ib_uverbs_query_srq,
+                       UAPI_DEF_WRITE_IO(struct ib_uverbs_query_srq,
+                                         struct ib_uverbs_query_srq_resp),
+                       UAPI_DEF_METHOD_NEEDS_FN(query_srq))),
+
+       DECLARE_UVERBS_OBJECT(
+               UVERBS_OBJECT_XRCD,
+               DECLARE_UVERBS_WRITE(
+                       IB_USER_VERBS_CMD_CLOSE_XRCD,
+                       ib_uverbs_close_xrcd,
+                       UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd),
+                       UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)),
+               DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP,
+                                    ib_uverbs_open_qp,
+                                    UAPI_DEF_WRITE_UDATA_IO(
+                                            struct ib_uverbs_open_qp,
+                                            struct ib_uverbs_create_qp_resp)),
+               DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_XRCD,
+                                    ib_uverbs_open_xrcd,
+                                    UAPI_DEF_WRITE_UDATA_IO(
+                                            struct ib_uverbs_open_xrcd,
+                                            struct ib_uverbs_open_xrcd_resp),
+                                    UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))),
+
+       {},
+};
index b0e493e8d860ab046fd4ff44eb8873fd42a7690f..e643a43dce8d61715dd653da4395a4e425e5e5a2 100644 (file)
@@ -404,8 +404,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle,
 static int ib_uverbs_run_method(struct bundle_priv *pbundle,
                                unsigned int num_attrs)
 {
-       int (*handler)(struct ib_uverbs_file *ufile,
-                      struct uverbs_attr_bundle *ctx);
+       int (*handler)(struct uverbs_attr_bundle *attrs);
        size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
        unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
        unsigned int i;
@@ -436,6 +435,9 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
                                    pbundle->method_elm->key_bitmap_len)))
                return -EINVAL;
 
+       if (pbundle->method_elm->has_udata)
+               create_udata(&pbundle->bundle, &pbundle->bundle.driver_udata);
+
        if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
                struct uverbs_obj_attr *destroy_attr =
                        &pbundle->bundle.attrs[destroy_bkey].obj_attr;
@@ -445,10 +447,10 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
                        return ret;
                __clear_bit(destroy_bkey, pbundle->uobj_finalize);
 
-               ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+               ret = handler(&pbundle->bundle);
                uobj_put_destroy(destroy_attr->uobject);
        } else {
-               ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+               ret = handler(&pbundle->bundle);
        }
 
        /*
index 6d373f5515b7e2f525b4cf9ffece3f1d5da0420e..96a5f89bbb75183a1d0723ad1d409b4820fa0a21 100644 (file)
@@ -74,64 +74,6 @@ static dev_t dynamic_uverbs_dev;
 static struct class *uverbs_class;
 
 static DEFINE_IDA(uverbs_ida);
-
-static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
-                                    const char __user *buf, int in_len,
-                                    int out_len) = {
-       [IB_USER_VERBS_CMD_GET_CONTEXT]         = ib_uverbs_get_context,
-       [IB_USER_VERBS_CMD_QUERY_DEVICE]        = ib_uverbs_query_device,
-       [IB_USER_VERBS_CMD_QUERY_PORT]          = ib_uverbs_query_port,
-       [IB_USER_VERBS_CMD_ALLOC_PD]            = ib_uverbs_alloc_pd,
-       [IB_USER_VERBS_CMD_DEALLOC_PD]          = ib_uverbs_dealloc_pd,
-       [IB_USER_VERBS_CMD_REG_MR]              = ib_uverbs_reg_mr,
-       [IB_USER_VERBS_CMD_REREG_MR]            = ib_uverbs_rereg_mr,
-       [IB_USER_VERBS_CMD_DEREG_MR]            = ib_uverbs_dereg_mr,
-       [IB_USER_VERBS_CMD_ALLOC_MW]            = ib_uverbs_alloc_mw,
-       [IB_USER_VERBS_CMD_DEALLOC_MW]          = ib_uverbs_dealloc_mw,
-       [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
-       [IB_USER_VERBS_CMD_CREATE_CQ]           = ib_uverbs_create_cq,
-       [IB_USER_VERBS_CMD_RESIZE_CQ]           = ib_uverbs_resize_cq,
-       [IB_USER_VERBS_CMD_POLL_CQ]             = ib_uverbs_poll_cq,
-       [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]       = ib_uverbs_req_notify_cq,
-       [IB_USER_VERBS_CMD_DESTROY_CQ]          = ib_uverbs_destroy_cq,
-       [IB_USER_VERBS_CMD_CREATE_QP]           = ib_uverbs_create_qp,
-       [IB_USER_VERBS_CMD_QUERY_QP]            = ib_uverbs_query_qp,
-       [IB_USER_VERBS_CMD_MODIFY_QP]           = ib_uverbs_modify_qp,
-       [IB_USER_VERBS_CMD_DESTROY_QP]          = ib_uverbs_destroy_qp,
-       [IB_USER_VERBS_CMD_POST_SEND]           = ib_uverbs_post_send,
-       [IB_USER_VERBS_CMD_POST_RECV]           = ib_uverbs_post_recv,
-       [IB_USER_VERBS_CMD_POST_SRQ_RECV]       = ib_uverbs_post_srq_recv,
-       [IB_USER_VERBS_CMD_CREATE_AH]           = ib_uverbs_create_ah,
-       [IB_USER_VERBS_CMD_DESTROY_AH]          = ib_uverbs_destroy_ah,
-       [IB_USER_VERBS_CMD_ATTACH_MCAST]        = ib_uverbs_attach_mcast,
-       [IB_USER_VERBS_CMD_DETACH_MCAST]        = ib_uverbs_detach_mcast,
-       [IB_USER_VERBS_CMD_CREATE_SRQ]          = ib_uverbs_create_srq,
-       [IB_USER_VERBS_CMD_MODIFY_SRQ]          = ib_uverbs_modify_srq,
-       [IB_USER_VERBS_CMD_QUERY_SRQ]           = ib_uverbs_query_srq,
-       [IB_USER_VERBS_CMD_DESTROY_SRQ]         = ib_uverbs_destroy_srq,
-       [IB_USER_VERBS_CMD_OPEN_XRCD]           = ib_uverbs_open_xrcd,
-       [IB_USER_VERBS_CMD_CLOSE_XRCD]          = ib_uverbs_close_xrcd,
-       [IB_USER_VERBS_CMD_CREATE_XSRQ]         = ib_uverbs_create_xsrq,
-       [IB_USER_VERBS_CMD_OPEN_QP]             = ib_uverbs_open_qp,
-};
-
-static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
-                                   struct ib_udata *ucore,
-                                   struct ib_udata *uhw) = {
-       [IB_USER_VERBS_EX_CMD_CREATE_FLOW]      = ib_uverbs_ex_create_flow,
-       [IB_USER_VERBS_EX_CMD_DESTROY_FLOW]     = ib_uverbs_ex_destroy_flow,
-       [IB_USER_VERBS_EX_CMD_QUERY_DEVICE]     = ib_uverbs_ex_query_device,
-       [IB_USER_VERBS_EX_CMD_CREATE_CQ]        = ib_uverbs_ex_create_cq,
-       [IB_USER_VERBS_EX_CMD_CREATE_QP]        = ib_uverbs_ex_create_qp,
-       [IB_USER_VERBS_EX_CMD_CREATE_WQ]        = ib_uverbs_ex_create_wq,
-       [IB_USER_VERBS_EX_CMD_MODIFY_WQ]        = ib_uverbs_ex_modify_wq,
-       [IB_USER_VERBS_EX_CMD_DESTROY_WQ]       = ib_uverbs_ex_destroy_wq,
-       [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
-       [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
-       [IB_USER_VERBS_EX_CMD_MODIFY_QP]        = ib_uverbs_ex_modify_qp,
-       [IB_USER_VERBS_EX_CMD_MODIFY_CQ]        = ib_uverbs_ex_modify_cq,
-};
-
 static void ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
 
@@ -139,7 +81,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
  * Must be called with the ufile->device->disassociate_srcu held, and the lock
  * must be held until use of the ucontext is finished.
  */
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
 {
        /*
         * We do not hold the hw_destroy_rwsem lock for this flow, instead
@@ -157,7 +99,7 @@ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
 
        return ucontext;
 }
-EXPORT_SYMBOL(ib_uverbs_get_ucontext);
+EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
 
 int uverbs_dealloc_mw(struct ib_mw *mw)
 {
@@ -646,51 +588,19 @@ struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file
        return filp;
 }
 
-static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command,
-                               bool extended)
-{
-       if (!extended)
-               return ufile->uverbs_cmd_mask & BIT_ULL(command);
-
-       return ufile->uverbs_ex_cmd_mask & BIT_ULL(command);
-}
-
-static bool verify_command_idx(u32 command, bool extended)
-{
-       if (extended)
-               return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
-                      uverbs_ex_cmd_table[command];
-
-       return command < ARRAY_SIZE(uverbs_cmd_table) &&
-              uverbs_cmd_table[command];
-}
-
-static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
-                          u32 *command, bool *extended)
-{
-       if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
-                                  IB_USER_VERBS_CMD_COMMAND_MASK))
-               return -EINVAL;
-
-       *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
-       *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
-
-       if (!verify_command_idx(*command, *extended))
-               return -EOPNOTSUPP;
-
-       return 0;
-}
-
 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
-                         struct ib_uverbs_ex_cmd_hdr *ex_hdr,
-                         size_t count, bool extended)
+                         struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count,
+                         const struct uverbs_api_write_method *method_elm)
 {
-       if (extended) {
+       if (method_elm->is_ex) {
                count -= sizeof(*hdr) + sizeof(*ex_hdr);
 
                if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
                        return -EINVAL;
 
+               if (hdr->in_words * 8 < method_elm->req_size)
+                       return -ENOSPC;
+
                if (ex_hdr->cmd_hdr_reserved)
                        return -EINVAL;
 
@@ -698,6 +608,9 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
                        if (!hdr->out_words && !ex_hdr->provider_out_words)
                                return -EINVAL;
 
+                       if (hdr->out_words * 8 < method_elm->resp_size)
+                               return -ENOSPC;
+
                        if (!access_ok(VERIFY_WRITE,
                                       u64_to_user_ptr(ex_hdr->response),
                                       (hdr->out_words + ex_hdr->provider_out_words) * 8))
@@ -714,6 +627,24 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
        if (hdr->in_words * 4 != count)
                return -EINVAL;
 
+       if (count < method_elm->req_size + sizeof(hdr)) {
+               /*
+                * rdma-core v18 and v19 have a bug where they send DESTROY_CQ
+                * with a 16 byte write instead of 24. Old kernels didn't
+                * check the size so they allowed this. Now that the size is
+                * checked provide a compatibility work around to not break
+                * those userspaces.
+                */
+               if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
+                   count == 16) {
+                       hdr->in_words = 6;
+                       return 0;
+               }
+               return -ENOSPC;
+       }
+       if (hdr->out_words * 4 < method_elm->resp_size)
+               return -ENOSPC;
+
        return 0;
 }
 
@@ -721,11 +652,12 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                             size_t count, loff_t *pos)
 {
        struct ib_uverbs_file *file = filp->private_data;
+       const struct uverbs_api_write_method *method_elm;
+       struct uverbs_api *uapi = file->device->uapi;
        struct ib_uverbs_ex_cmd_hdr ex_hdr;
        struct ib_uverbs_cmd_hdr hdr;
-       bool extended;
+       struct uverbs_attr_bundle bundle;
        int srcu_key;
-       u32 command;
        ssize_t ret;
 
        if (!ib_safe_file_access(filp)) {
@@ -740,57 +672,92 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
        if (copy_from_user(&hdr, buf, sizeof(hdr)))
                return -EFAULT;
 
-       ret = process_hdr(&hdr, &command, &extended);
-       if (ret)
-               return ret;
+       method_elm = uapi_get_method(uapi, hdr.command);
+       if (IS_ERR(method_elm))
+               return PTR_ERR(method_elm);
 
-       if (extended) {
+       if (method_elm->is_ex) {
                if (count < (sizeof(hdr) + sizeof(ex_hdr)))
                        return -EINVAL;
                if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
                        return -EFAULT;
        }
 
-       ret = verify_hdr(&hdr, &ex_hdr, count, extended);
+       ret = verify_hdr(&hdr, &ex_hdr, count, method_elm);
        if (ret)
                return ret;
 
        srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
 
-       if (!verify_command_mask(file, command, extended)) {
-               ret = -EOPNOTSUPP;
-               goto out;
-       }
-
        buf += sizeof(hdr);
 
-       if (!extended) {
-               ret = uverbs_cmd_table[command](file, buf,
-                                               hdr.in_words * 4,
-                                               hdr.out_words * 4);
-       } else {
-               struct ib_udata ucore;
-               struct ib_udata uhw;
+       bundle.ufile = file;
+       if (!method_elm->is_ex) {
+               size_t in_len = hdr.in_words * 4 - sizeof(hdr);
+               size_t out_len = hdr.out_words * 4;
+               u64 response = 0;
+
+               if (method_elm->has_udata) {
+                       bundle.driver_udata.inlen =
+                               in_len - method_elm->req_size;
+                       in_len = method_elm->req_size;
+                       if (bundle.driver_udata.inlen)
+                               bundle.driver_udata.inbuf = buf + in_len;
+                       else
+                               bundle.driver_udata.inbuf = NULL;
+               } else {
+                       memset(&bundle.driver_udata, 0,
+                              sizeof(bundle.driver_udata));
+               }
+
+               if (method_elm->has_resp) {
+                       /*
+                        * The macros check that if has_resp is set
+                        * then the command request structure starts
+                        * with a '__aligned u64 response' member.
+                        */
+                       ret = get_user(response, (const u64 *)buf);
+                       if (ret)
+                               goto out_unlock;
+
+                       if (method_elm->has_udata) {
+                               bundle.driver_udata.outlen =
+                                       out_len - method_elm->resp_size;
+                               out_len = method_elm->resp_size;
+                               if (bundle.driver_udata.outlen)
+                                       bundle.driver_udata.outbuf =
+                                               u64_to_user_ptr(response +
+                                                               out_len);
+                               else
+                                       bundle.driver_udata.outbuf = NULL;
+                       }
+               } else {
+                       bundle.driver_udata.outlen = 0;
+                       bundle.driver_udata.outbuf = NULL;
+               }
 
+               ib_uverbs_init_udata_buf_or_null(
+                       &bundle.ucore, buf, u64_to_user_ptr(response),
+                       in_len, out_len);
+       } else {
                buf += sizeof(ex_hdr);
 
-               ib_uverbs_init_udata_buf_or_null(&ucore, buf,
+               ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf,
                                        u64_to_user_ptr(ex_hdr.response),
                                        hdr.in_words * 8, hdr.out_words * 8);
 
-               ib_uverbs_init_udata_buf_or_null(&uhw,
-                                       buf + ucore.inlen,
-                                       u64_to_user_ptr(ex_hdr.response) + ucore.outlen,
-                                       ex_hdr.provider_in_words * 8,
-                                       ex_hdr.provider_out_words * 8);
+               ib_uverbs_init_udata_buf_or_null(
+                       &bundle.driver_udata, buf + bundle.ucore.inlen,
+                       u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen,
+                       ex_hdr.provider_in_words * 8,
+                       ex_hdr.provider_out_words * 8);
 
-               ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
-               ret = (ret) ? : count;
        }
 
-out:
+       ret = method_elm->handler(&bundle);
+out_unlock:
        srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
-       return ret;
+       return (ret) ? : count;
 }
 
 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -801,7 +768,7 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
        int srcu_key;
 
        srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
-       ucontext = ib_uverbs_get_ucontext(file);
+       ucontext = ib_uverbs_get_ucontext_file(file);
        if (IS_ERR(ucontext)) {
                ret = PTR_ERR(ucontext);
                goto out;
@@ -1102,9 +1069,6 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
        mutex_unlock(&dev->lists_mutex);
        srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
 
-       file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask;
-       file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask;
-
        setup_ufile_idr_uobject(file);
 
        return nonseekable_open(inode, filp);
@@ -1224,7 +1188,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device,
 {
        struct uverbs_api *uapi;
 
-       uapi = uverbs_alloc_api(device->driver_specs, device->driver_id);
+       uapi = uverbs_alloc_api(device);
        if (IS_ERR(uapi))
                return PTR_ERR(uapi);
 
index 203cc96ac6f508f8b84a7d4943d12ff9b48102b5..063aff9e7a048a581826f84b67260c258229f0fc 100644 (file)
@@ -210,8 +210,7 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
        return 0;
 };
 
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
-                              struct uverbs_attr_bundle *attrs)
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
 {
        return 0;
 }
@@ -262,25 +261,28 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
 
 DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE);
 
-DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
-                          &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_PD),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_MR),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_QP),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_AH),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_MW),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_DM),
-                          &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
-
-const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
-{
-       return &uverbs_default_objects;
-}
+const struct uapi_definition uverbs_def_obj_intf[] = {
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_PD,
+                                     UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL,
+                                     UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
+                                     UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH,
+                                     UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW,
+                                     UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
+                                     UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW,
+                                     UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
+                                     UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+               UVERBS_OBJECT_RWQ_IND_TBL,
+               UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_XRCD,
+                                     UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)),
+       {}
+};
index a0ffdcf9a51cd130b8bee57d9a46e3983d4cf27f..8835bad5c6ddd969792287902b6e8d247e3cf7f6 100644 (file)
@@ -48,7 +48,7 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
 }
 
 static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct ib_uobject *uobj = uverbs_attr_get_uobject(
                attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
@@ -82,7 +82,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
 }
 
 static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct ib_counters_read_attr read_attr = {};
        const struct uverbs_attr *uattr;
@@ -149,3 +149,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
                            &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
                            &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
                            &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
+
+const struct uapi_definition uverbs_def_obj_counters[] = {
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COUNTERS,
+                                     UAPI_DEF_OBJ_NEEDS_FN(destroy_counters)),
+       {}
+};
index 5b5f2052cd5216c64270391b0db78e847cc2d475..859518eab5830e7ad9d11068853ad12257b458f1 100644 (file)
@@ -58,13 +58,12 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
 }
 
 static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct ib_ucq_object *obj = container_of(
                uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
                typeof(*obj), uobject);
        struct ib_device *ib_dev = obj->uobject.context->device;
-       struct ib_udata uhw;
        int ret;
        u64 user_handle;
        struct ib_cq_init_attr attr = {};
@@ -101,7 +100,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
                uverbs_uobject_get(ev_file_uobj);
        }
 
-       if (attr.comp_vector >= file->device->num_comp_vectors) {
+       if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) {
                ret = -EINVAL;
                goto err_event_file;
        }
@@ -111,10 +110,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
        INIT_LIST_HEAD(&obj->comp_list);
        INIT_LIST_HEAD(&obj->async_list);
 
-       /* Temporary, only until drivers get the new uverbs_attr_bundle */
-       create_udata(attrs, &uhw);
-
-       cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw);
+       cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context,
+                              &attrs->driver_udata);
        if (IS_ERR(cq)) {
                ret = PTR_ERR(cq);
                goto err_event_file;
@@ -173,7 +170,7 @@ DECLARE_UVERBS_NAMED_METHOD(
        UVERBS_ATTR_UHW());
 
 static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct ib_uobject *uobj =
                uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
@@ -207,3 +204,9 @@ DECLARE_UVERBS_NAMED_OBJECT(
        &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
 #endif
 );
+
+const struct uapi_definition uverbs_def_obj_cq[] = {
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_CQ,
+                                     UAPI_DEF_OBJ_NEEDS_FN(destroy_cq)),
+       {}
+};
index edc3ff7733d47d38e6457c924efc85926a6c22ae..658261b8f08e44b2a3a238f2043654cdb3a145e3 100644 (file)
@@ -46,9 +46,8 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
        return dm->device->dealloc_dm(dm);
 }
 
-static int
-UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
-                                      struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(
+       struct uverbs_attr_bundle *attrs)
 {
        struct ib_dm_alloc_attr attr = {};
        struct ib_uobject *uobj =
@@ -109,3 +108,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
                            UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
                            &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
                            &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
+
+const struct uapi_definition uverbs_def_obj_dm[] = {
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM,
+                                     UAPI_DEF_OBJ_NEEDS_FN(dealloc_dm)),
+       {}
+};
index cb9486ad5c677609bbfc5311c0baba0730c08835..e4d01fb5335dde5b2d07b2f9165c8d29b11e0b5a 100644 (file)
@@ -223,7 +223,6 @@ struct ib_flow_action_esp_attr {
 
 #define ESP_LAST_SUPPORTED_FLAG                IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
 static int parse_flow_action_esp(struct ib_device *ib_dev,
-                                struct ib_uverbs_file *file,
                                 struct uverbs_attr_bundle *attrs,
                                 struct ib_flow_action_esp_attr *esp_attr,
                                 bool is_modify)
@@ -305,7 +304,7 @@ static int parse_flow_action_esp(struct ib_device *ib_dev,
 }
 
 static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct ib_uobject *uobj = uverbs_attr_get_uobject(
                attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
@@ -317,7 +316,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
        if (!ib_dev->create_flow_action_esp)
                return -EOPNOTSUPP;
 
-       ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false);
+       ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false);
        if (ret)
                return ret;
 
@@ -333,7 +332,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
 }
 
 static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct ib_uobject *uobj = uverbs_attr_get_uobject(
                attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
@@ -344,8 +343,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
        if (!action->device->modify_flow_action_esp)
                return -EOPNOTSUPP;
 
-       ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr,
-                                   true);
+       ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true);
        if (ret)
                return ret;
 
@@ -438,3 +436,10 @@ DECLARE_UVERBS_NAMED_OBJECT(
        &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
        &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
        &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+
+const struct uapi_definition uverbs_def_obj_flow_action[] = {
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+               UVERBS_OBJECT_FLOW_ACTION,
+               UAPI_DEF_OBJ_NEEDS_FN(destroy_flow_action)),
+       {}
+};
index cf02e774303e3b2d4dfac0d73acd1ae6f5bf86c1..70ea48cfc0470a5e8cafd89f9e2bf620e967da1e 100644 (file)
@@ -40,7 +40,7 @@ static int uverbs_free_mr(struct ib_uobject *uobject,
 }
 
 static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct ib_dm_mr_attr attr = {};
        struct ib_uobject *uobj =
@@ -147,3 +147,9 @@ DECLARE_UVERBS_NAMED_OBJECT(
        UVERBS_OBJECT_MR,
        UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
        &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
+
+const struct uapi_definition uverbs_def_obj_mr[] = {
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
+                                     UAPI_DEF_OBJ_NEEDS_FN(dereg_mr)),
+       {}
+};
index 86f3fc5e04b44f66dbc1234379246de50be906ea..0136c1d78a0f53d9f6ce68edb202cf315349030a 100644 (file)
@@ -8,6 +8,11 @@
 #include "rdma_core.h"
 #include "uverbs.h"
 
+static int ib_uverbs_notsupp(struct uverbs_attr_bundle *attrs)
+{
+       return -EOPNOTSUPP;
+}
+
 static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
 {
        void *elm;
@@ -26,6 +31,70 @@ static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
        return elm;
 }
 
+static void *uapi_add_get_elm(struct uverbs_api *uapi, u32 key,
+                             size_t alloc_size, bool *exists)
+{
+       void *elm;
+
+       elm = uapi_add_elm(uapi, key, alloc_size);
+       if (!IS_ERR(elm)) {
+               *exists = false;
+               return elm;
+       }
+
+       if (elm != ERR_PTR(-EEXIST))
+               return elm;
+
+       elm = radix_tree_lookup(&uapi->radix, key);
+       if (WARN_ON(!elm))
+               return ERR_PTR(-EINVAL);
+       *exists = true;
+       return elm;
+}
+
+static int uapi_create_write(struct uverbs_api *uapi,
+                            struct ib_device *ibdev,
+                            const struct uapi_definition *def,
+                            u32 obj_key,
+                            u32 *cur_method_key)
+{
+       struct uverbs_api_write_method *method_elm;
+       u32 method_key = obj_key;
+       bool exists;
+
+       if (def->write.is_ex)
+               method_key |= uapi_key_write_ex_method(def->write.command_num);
+       else
+               method_key |= uapi_key_write_method(def->write.command_num);
+
+       method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+                                     &exists);
+       if (IS_ERR(method_elm))
+               return PTR_ERR(method_elm);
+
+       if (WARN_ON(exists && (def->write.is_ex != method_elm->is_ex)))
+               return -EINVAL;
+
+       method_elm->is_ex = def->write.is_ex;
+       method_elm->handler = def->func_write;
+       if (def->write.is_ex)
+               method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask &
+                                        BIT_ULL(def->write.command_num));
+       else
+               method_elm->disabled = !(ibdev->uverbs_cmd_mask &
+                                        BIT_ULL(def->write.command_num));
+
+       if (!def->write.is_ex && def->func_write) {
+               method_elm->has_udata = def->write.has_udata;
+               method_elm->has_resp = def->write.has_resp;
+               method_elm->req_size = def->write.req_size;
+               method_elm->resp_size = def->write.resp_size;
+       }
+
+       *cur_method_key = method_key;
+       return 0;
+}
+
 static int uapi_merge_method(struct uverbs_api *uapi,
                             struct uverbs_api_object *obj_elm, u32 obj_key,
                             const struct uverbs_method_def *method,
@@ -34,23 +103,21 @@ static int uapi_merge_method(struct uverbs_api *uapi,
        u32 method_key = obj_key | uapi_key_ioctl_method(method->id);
        struct uverbs_api_ioctl_method *method_elm;
        unsigned int i;
+       bool exists;
 
        if (!method->attrs)
                return 0;
 
-       method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm));
-       if (IS_ERR(method_elm)) {
-               if (method_elm != ERR_PTR(-EEXIST))
-                       return PTR_ERR(method_elm);
-
+       method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+                                     &exists);
+       if (IS_ERR(method_elm))
+               return PTR_ERR(method_elm);
+       if (exists) {
                /*
                 * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE
                 */
                if (WARN_ON(method->handler))
                        return -EINVAL;
-               method_elm = radix_tree_lookup(&uapi->radix, method_key);
-               if (WARN_ON(!method_elm))
-                       return -EINVAL;
        } else {
                WARN_ON(!method->handler);
                rcu_assign_pointer(method_elm->handler, method->handler);
@@ -98,72 +165,182 @@ static int uapi_merge_method(struct uverbs_api *uapi,
        return 0;
 }
 
-static int uapi_merge_tree(struct uverbs_api *uapi,
-                          const struct uverbs_object_tree_def *tree,
-                          bool is_driver)
+static int uapi_merge_obj_tree(struct uverbs_api *uapi,
+                              const struct uverbs_object_def *obj,
+                              bool is_driver)
 {
-       unsigned int i, j;
+       struct uverbs_api_object *obj_elm;
+       unsigned int i;
+       u32 obj_key;
+       bool exists;
        int rc;
 
-       if (!tree->objects)
+       obj_key = uapi_key_obj(obj->id);
+       obj_elm = uapi_add_get_elm(uapi, obj_key, sizeof(*obj_elm), &exists);
+       if (IS_ERR(obj_elm))
+               return PTR_ERR(obj_elm);
+
+       if (obj->type_attrs) {
+               if (WARN_ON(obj_elm->type_attrs))
+                       return -EINVAL;
+
+               obj_elm->id = obj->id;
+               obj_elm->type_attrs = obj->type_attrs;
+               obj_elm->type_class = obj->type_attrs->type_class;
+               /*
+                * Today drivers are only permitted to use idr_class
+                * types. They cannot use FD types because we currently have
+                * no way to revoke the fops pointer after device
+                * disassociation.
+                */
+               if (WARN_ON(is_driver &&
+                           obj->type_attrs->type_class != &uverbs_idr_class))
+                       return -EINVAL;
+       }
+
+       if (!obj->methods)
                return 0;
 
-       for (i = 0; i != tree->num_objects; i++) {
-               const struct uverbs_object_def *obj = (*tree->objects)[i];
-               struct uverbs_api_object *obj_elm;
-               u32 obj_key;
+       for (i = 0; i != obj->num_methods; i++) {
+               const struct uverbs_method_def *method = (*obj->methods)[i];
 
-               if (!obj)
+               if (!method)
                        continue;
 
-               obj_key = uapi_key_obj(obj->id);
-               obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm));
-               if (IS_ERR(obj_elm)) {
-                       if (obj_elm != ERR_PTR(-EEXIST))
-                               return PTR_ERR(obj_elm);
+               rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
+                                      is_driver);
+               if (rc)
+                       return rc;
+       }
 
-                       /* This occurs when a driver uses ADD_UVERBS_METHODS */
-                       if (WARN_ON(obj->type_attrs))
-                               return -EINVAL;
-                       obj_elm = radix_tree_lookup(&uapi->radix, obj_key);
-                       if (WARN_ON(!obj_elm))
+       return 0;
+}
+
+static int uapi_disable_elm(struct uverbs_api *uapi,
+                           const struct uapi_definition *def,
+                           u32 obj_key,
+                           u32 method_key)
+{
+       bool exists;
+
+       if (def->scope == UAPI_SCOPE_OBJECT) {
+               struct uverbs_api_object *obj_elm;
+
+               obj_elm = uapi_add_get_elm(
+                       uapi, obj_key, sizeof(*obj_elm), &exists);
+               if (IS_ERR(obj_elm))
+                       return PTR_ERR(obj_elm);
+               obj_elm->disabled = 1;
+               return 0;
+       }
+
+       if (def->scope == UAPI_SCOPE_METHOD &&
+           uapi_key_is_ioctl_method(method_key)) {
+               struct uverbs_api_ioctl_method *method_elm;
+
+               method_elm = uapi_add_get_elm(uapi, method_key,
+                                             sizeof(*method_elm), &exists);
+               if (IS_ERR(method_elm))
+                       return PTR_ERR(method_elm);
+               method_elm->disabled = 1;
+               return 0;
+       }
+
+       if (def->scope == UAPI_SCOPE_METHOD &&
+           (uapi_key_is_write_method(method_key) ||
+            uapi_key_is_write_ex_method(method_key))) {
+               struct uverbs_api_write_method *write_elm;
+
+               write_elm = uapi_add_get_elm(uapi, method_key,
+                                            sizeof(*write_elm), &exists);
+               if (IS_ERR(write_elm))
+                       return PTR_ERR(write_elm);
+               write_elm->disabled = 1;
+               return 0;
+       }
+
+       WARN_ON(true);
+       return -EINVAL;
+}
+
+static int uapi_merge_def(struct uverbs_api *uapi, struct ib_device *ibdev,
+                         const struct uapi_definition *def_list,
+                         bool is_driver)
+{
+       const struct uapi_definition *def = def_list;
+       u32 cur_obj_key = UVERBS_API_KEY_ERR;
+       u32 cur_method_key = UVERBS_API_KEY_ERR;
+       bool exists;
+       int rc;
+
+       if (!def_list)
+               return 0;
+
+       for (;; def++) {
+               switch ((enum uapi_definition_kind)def->kind) {
+               case UAPI_DEF_CHAIN:
+                       rc = uapi_merge_def(uapi, ibdev, def->chain, is_driver);
+                       if (rc)
+                               return rc;
+                       continue;
+
+               case UAPI_DEF_CHAIN_OBJ_TREE:
+                       if (WARN_ON(def->object_start.object_id !=
+                                   def->chain_obj_tree->id))
                                return -EINVAL;
-               } else {
-                       obj_elm->type_attrs = obj->type_attrs;
-                       if (obj->type_attrs) {
-                               obj_elm->type_class =
-                                       obj->type_attrs->type_class;
-                               /*
-                                * Today drivers are only permitted to use
-                                * idr_class types. They cannot use FD types
-                                * because we currently have no way to revoke
-                                * the fops pointer after device
-                                * disassociation.
-                                */
-                               if (WARN_ON(is_driver &&
-                                           obj->type_attrs->type_class !=
-                                                   &uverbs_idr_class))
-                                       return -EINVAL;
-                       }
-               }
 
-               if (!obj->methods)
+                       cur_obj_key = uapi_key_obj(def->object_start.object_id);
+                       rc = uapi_merge_obj_tree(uapi, def->chain_obj_tree,
+                                                is_driver);
+                       if (rc)
+                               return rc;
                        continue;
 
-               for (j = 0; j != obj->num_methods; j++) {
-                       const struct uverbs_method_def *method =
-                               (*obj->methods)[j];
-                       if (!method)
+               case UAPI_DEF_END:
+                       return 0;
+
+               case UAPI_DEF_IS_SUPPORTED_DEV_FN: {
+                       void **ibdev_fn = (void *)ibdev + def->needs_fn_offset;
+
+                       if (*ibdev_fn)
                                continue;
+                       rc = uapi_disable_elm(
+                               uapi, def, cur_obj_key, cur_method_key);
+                       if (rc)
+                               return rc;
+                       continue;
+               }
 
-                       rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
-                                              is_driver);
+               case UAPI_DEF_IS_SUPPORTED_FUNC:
+                       if (def->func_is_supported(ibdev))
+                               continue;
+                       rc = uapi_disable_elm(
+                               uapi, def, cur_obj_key, cur_method_key);
                        if (rc)
                                return rc;
+                       continue;
+
+               case UAPI_DEF_OBJECT_START: {
+                       struct uverbs_api_object *obj_elm;
+
+                       cur_obj_key = uapi_key_obj(def->object_start.object_id);
+                       obj_elm = uapi_add_get_elm(uapi, cur_obj_key,
+                                                  sizeof(*obj_elm), &exists);
+                       if (IS_ERR(obj_elm))
+                               return PTR_ERR(obj_elm);
+                       continue;
                }
-       }
 
-       return 0;
+               case UAPI_DEF_WRITE:
+                       rc = uapi_create_write(
+                               uapi, ibdev, def, cur_obj_key, &cur_method_key);
+                       if (rc)
+                               return rc;
+                       continue;
+               }
+               WARN_ON(true);
+               return -EINVAL;
+       }
 }
 
 static int
@@ -186,13 +363,16 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi,
                u32 attr_bkey = uapi_bkey_attr(attr_key);
                u8 type = elm->spec.type;
 
-               if (uapi_key_attr_to_method(iter.index) !=
-                   uapi_key_attr_to_method(method_key))
+               if (uapi_key_attr_to_ioctl_method(iter.index) !=
+                   uapi_key_attr_to_ioctl_method(method_key))
                        break;
 
                if (elm->spec.mandatory)
                        __set_bit(attr_bkey, method_elm->attr_mandatory);
 
+               if (elm->spec.is_udata)
+                       method_elm->has_udata = true;
+
                if (type == UVERBS_ATTR_TYPE_IDR ||
                    type == UVERBS_ATTR_TYPE_FD) {
                        u8 access = elm->spec.u.obj.access;
@@ -229,9 +409,13 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi,
 
 static int uapi_finalize(struct uverbs_api *uapi)
 {
+       const struct uverbs_api_write_method **data;
+       unsigned long max_write_ex = 0;
+       unsigned long max_write = 0;
        struct radix_tree_iter iter;
        void __rcu **slot;
        int rc;
+       int i;
 
        radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
                struct uverbs_api_ioctl_method *method_elm =
@@ -243,29 +427,208 @@ static int uapi_finalize(struct uverbs_api *uapi)
                        if (rc)
                                return rc;
                }
+
+               if (uapi_key_is_write_method(iter.index))
+                       max_write = max(max_write,
+                                       iter.index & UVERBS_API_ATTR_KEY_MASK);
+               if (uapi_key_is_write_ex_method(iter.index))
+                       max_write_ex =
+                               max(max_write_ex,
+                                   iter.index & UVERBS_API_ATTR_KEY_MASK);
+       }
+
+       uapi->notsupp_method.handler = ib_uverbs_notsupp;
+       uapi->num_write = max_write + 1;
+       uapi->num_write_ex = max_write_ex + 1;
+       data = kmalloc_array(uapi->num_write + uapi->num_write_ex,
+                            sizeof(*uapi->write_methods), GFP_KERNEL);
+       for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++)
+               data[i] = &uapi->notsupp_method;
+       uapi->write_methods = data;
+       uapi->write_ex_methods = data + uapi->num_write;
+
+       radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+               if (uapi_key_is_write_method(iter.index))
+                       uapi->write_methods[iter.index &
+                                           UVERBS_API_ATTR_KEY_MASK] =
+                               rcu_dereference_protected(*slot, true);
+               if (uapi_key_is_write_ex_method(iter.index))
+                       uapi->write_ex_methods[iter.index &
+                                              UVERBS_API_ATTR_KEY_MASK] =
+                               rcu_dereference_protected(*slot, true);
        }
 
        return 0;
 }
 
-void uverbs_destroy_api(struct uverbs_api *uapi)
+static void uapi_remove_range(struct uverbs_api *uapi, u32 start, u32 last)
 {
        struct radix_tree_iter iter;
        void __rcu **slot;
 
-       if (!uapi)
-               return;
-
-       radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+       radix_tree_for_each_slot (slot, &uapi->radix, &iter, start) {
+               if (iter.index > last)
+                       return;
                kfree(rcu_dereference_protected(*slot, true));
                radix_tree_iter_delete(&uapi->radix, &iter, slot);
        }
+}
+
+static void uapi_remove_object(struct uverbs_api *uapi, u32 obj_key)
+{
+       uapi_remove_range(uapi, obj_key,
+                         obj_key | UVERBS_API_METHOD_KEY_MASK |
+                                 UVERBS_API_ATTR_KEY_MASK);
+}
+
+static void uapi_remove_method(struct uverbs_api *uapi, u32 method_key)
+{
+       uapi_remove_range(uapi, method_key,
+                         method_key | UVERBS_API_ATTR_KEY_MASK);
+}
+
+
+static u32 uapi_get_obj_id(struct uverbs_attr_spec *spec)
+{
+       if (spec->type == UVERBS_ATTR_TYPE_IDR ||
+           spec->type == UVERBS_ATTR_TYPE_FD)
+               return spec->u.obj.obj_type;
+       if (spec->type == UVERBS_ATTR_TYPE_IDRS_ARRAY)
+               return spec->u2.objs_arr.obj_type;
+       return UVERBS_API_KEY_ERR;
+}
+
+static void uapi_key_okay(u32 key)
+{
+       unsigned int count = 0;
+
+       if (uapi_key_is_object(key))
+               count++;
+       if (uapi_key_is_ioctl_method(key))
+               count++;
+       if (uapi_key_is_write_method(key))
+               count++;
+       if (uapi_key_is_write_ex_method(key))
+               count++;
+       if (uapi_key_is_attr(key))
+               count++;
+       WARN(count != 1, "Bad count %d key=%x", count, key);
+}
+
+static void uapi_finalize_disable(struct uverbs_api *uapi)
+{
+       struct radix_tree_iter iter;
+       u32 starting_key = 0;
+       bool scan_again = false;
+       void __rcu **slot;
+
+again:
+       radix_tree_for_each_slot (slot, &uapi->radix, &iter, starting_key) {
+               uapi_key_okay(iter.index);
+
+               if (uapi_key_is_object(iter.index)) {
+                       struct uverbs_api_object *obj_elm =
+                               rcu_dereference_protected(*slot, true);
+
+                       if (obj_elm->disabled) {
+                               /* Have to check all the attrs again */
+                               scan_again = true;
+                               starting_key = iter.index;
+                               uapi_remove_object(uapi, iter.index);
+                               goto again;
+                       }
+                       continue;
+               }
+
+               if (uapi_key_is_ioctl_method(iter.index)) {
+                       struct uverbs_api_ioctl_method *method_elm =
+                               rcu_dereference_protected(*slot, true);
+
+                       if (method_elm->disabled) {
+                               starting_key = iter.index;
+                               uapi_remove_method(uapi, iter.index);
+                               goto again;
+                       }
+                       continue;
+               }
+
+               if (uapi_key_is_write_method(iter.index) ||
+                   uapi_key_is_write_ex_method(iter.index)) {
+                       struct uverbs_api_write_method *method_elm =
+                               rcu_dereference_protected(*slot, true);
+
+                       if (method_elm->disabled) {
+                               kfree(method_elm);
+                               radix_tree_iter_delete(&uapi->radix, &iter, slot);
+                       }
+                       continue;
+               }
+
+               if (uapi_key_is_attr(iter.index)) {
+                       struct uverbs_api_attr *attr_elm =
+                               rcu_dereference_protected(*slot, true);
+                       const struct uverbs_api_object *tmp_obj;
+                       u32 obj_key;
+
+                       /*
+                        * If the method has a mandatory object handle
+                        * attribute which relies on an object which is not
+                        * present then the entire method is uncallable.
+                        */
+                       if (!attr_elm->spec.mandatory)
+                               continue;
+                       obj_key = uapi_get_obj_id(&attr_elm->spec);
+                       if (obj_key == UVERBS_API_KEY_ERR)
+                               continue;
+                       tmp_obj = uapi_get_object(uapi, obj_key);
+                       if (IS_ERR(tmp_obj)) {
+                               if (PTR_ERR(tmp_obj) == -ENOMSG)
+                                       continue;
+                       } else {
+                               if (!tmp_obj->disabled)
+                                       continue;
+                       }
+
+                       starting_key = iter.index;
+                       uapi_remove_method(
+                               uapi,
+                               iter.index & (UVERBS_API_OBJ_KEY_MASK |
+                                             UVERBS_API_METHOD_KEY_MASK));
+                       goto again;
+               }
+
+               WARN_ON(false);
+       }
+
+       if (!scan_again)
+               return;
+       scan_again = false;
+       starting_key = 0;
+       goto again;
+}
+
+void uverbs_destroy_api(struct uverbs_api *uapi)
+{
+       if (!uapi)
+               return;
+
+       uapi_remove_range(uapi, 0, U32_MAX);
+       kfree(uapi->write_methods);
        kfree(uapi);
 }
 
-struct uverbs_api *uverbs_alloc_api(
-       const struct uverbs_object_tree_def *const *driver_specs,
-       enum rdma_driver_id driver_id)
+static const struct uapi_definition uverbs_core_api[] = {
+       UAPI_DEF_CHAIN(uverbs_def_obj_counters),
+       UAPI_DEF_CHAIN(uverbs_def_obj_cq),
+       UAPI_DEF_CHAIN(uverbs_def_obj_dm),
+       UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
+       UAPI_DEF_CHAIN(uverbs_def_obj_intf),
+       UAPI_DEF_CHAIN(uverbs_def_obj_mr),
+       UAPI_DEF_CHAIN(uverbs_def_write_intf),
+       {},
+};
+
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev)
 {
        struct uverbs_api *uapi;
        int rc;
@@ -275,18 +638,16 @@ struct uverbs_api *uverbs_alloc_api(
                return ERR_PTR(-ENOMEM);
 
        INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
-       uapi->driver_id = driver_id;
+       uapi->driver_id = ibdev->driver_id;
 
-       rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false);
+       rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false);
+       if (rc)
+               goto err;
+       rc = uapi_merge_def(uapi, ibdev, ibdev->driver_def, true);
        if (rc)
                goto err;
 
-       for (; driver_specs && *driver_specs; driver_specs++) {
-               rc = uapi_merge_tree(uapi, *driver_specs, true);
-               if (rc)
-                       goto err;
-       }
-
+       uapi_finalize_disable(uapi);
        rc = uapi_finalize(uapi);
        if (rc)
                goto err;
@@ -294,8 +655,9 @@ struct uverbs_api *uverbs_alloc_api(
        return uapi;
 err:
        if (rc != -ENOMEM)
-               pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
-                      rc);
+               dev_err(&ibdev->dev,
+                       "Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
+                       rc);
 
        uverbs_destroy_api(uapi);
        return ERR_PTR(rc);
index 178899e3ce73766ce3df79d00d4d8a46bd8352a6..fb2fc0c7ecfb66cc94bdda93d2582e545d0ec4d0 100644 (file)
@@ -141,6 +141,10 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
        case IB_RATE_100_GBPS: return  40;
        case IB_RATE_200_GBPS: return  80;
        case IB_RATE_300_GBPS: return 120;
+       case IB_RATE_28_GBPS:  return  11;
+       case IB_RATE_50_GBPS:  return  20;
+       case IB_RATE_400_GBPS: return 160;
+       case IB_RATE_600_GBPS: return 240;
        default:               return  -1;
        }
 }
@@ -166,6 +170,10 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
        case 40:  return IB_RATE_100_GBPS;
        case 80:  return IB_RATE_200_GBPS;
        case 120: return IB_RATE_300_GBPS;
+       case 11:  return IB_RATE_28_GBPS;
+       case 20:  return IB_RATE_50_GBPS;
+       case 160: return IB_RATE_400_GBPS;
+       case 240: return IB_RATE_600_GBPS;
        default:  return IB_RATE_PORT_CURRENT;
        }
 }
@@ -191,6 +199,10 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
        case IB_RATE_100_GBPS: return 103125;
        case IB_RATE_200_GBPS: return 206250;
        case IB_RATE_300_GBPS: return 309375;
+       case IB_RATE_28_GBPS:  return 28125;
+       case IB_RATE_50_GBPS:  return 53125;
+       case IB_RATE_400_GBPS: return 425000;
+       case IB_RATE_600_GBPS: return 637500;
        default:               return -1;
        }
 }
index dcb4bba522ba001acb2e632bd8b184a8193193ea..df4f7a3f043dc505d16ae018b160bbb53f44c03b 100644 (file)
@@ -291,13 +291,12 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
        if (!wq->sq)
                goto err3;
 
-       wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+       wq->queue = dma_zalloc_coherent(&(rdev_p->rnic_info.pdev->dev),
                                             depth * sizeof(union t3_wr),
                                             &(wq->dma_addr), GFP_KERNEL);
        if (!wq->queue)
                goto err4;
 
-       memset(wq->queue, 0, depth * sizeof(union t3_wr));
        dma_unmap_addr_set(wq, mapping, wq->dma_addr);
        wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
        if (!kernel_domain)
index 615413bd3e8d79744fe5ef9c168e86b836f15574..659175c9ae91a5f2e621085a40afa3bd24c36e39 100644 (file)
@@ -2795,7 +2795,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
                break;
        case MPA_REQ_SENT:
                (void)stop_ep_timer(ep);
-               if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
+               if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
+                   (mpa_rev == 2 && ep->tried_with_mpa_v1))
                        connect_reply_upcall(ep, -ECONNRESET);
                else {
                        /*
index 13478f3b70570e9e265a476b8302a35339459e3a..5a8030bd420850f309036e501cdc33ce0a58c7ff 100644 (file)
@@ -2564,13 +2564,12 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
        wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
                T4_RQT_ENTRY_SHIFT;
 
-       wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+       wq->queue = dma_zalloc_coherent(&rdev->lldi.pdev->dev,
                                       wq->memsize, &wq->dma_addr,
                        GFP_KERNEL);
        if (!wq->queue)
                goto err_free_rqtpool;
 
-       memset(wq->queue, 0, wq->memsize);
        dma_unmap_addr_set(wq, mapping, wq->dma_addr);
 
        wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
index ff790390c91ad1642249b56b56fc9fb17c098538..3ce9dc8c346332eb342ace48b25f0d28a41e5292 100644 (file)
@@ -34,6 +34,7 @@ hfi1-y := \
        ruc.o \
        sdma.o \
        sysfs.o \
+       tid_rdma.o \
        trace.o \
        uc.o \
        ud.o \
index 9b20479dc71075c7dd4dad3c82945cbdea7f65e2..385c33745c9f0b7a0843c095c1be067763d45fa4 100644 (file)
@@ -1072,6 +1072,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
 static void log_physical_state(struct hfi1_pportdata *ppd, u32 state);
 static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
                                   int msecs);
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+                                        int msecs);
 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
 static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
 static void handle_temp_err(struct hfi1_devdata *dd);
@@ -10770,13 +10772,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
                        break;
 
                ppd->port_error_action = 0;
-               ppd->host_link_state = HLS_DN_POLL;
 
                if (quick_linkup) {
                        /* quick linkup does not go into polling */
                        ret = do_quick_linkup(dd);
                } else {
                        ret1 = set_physical_link_state(dd, PLS_POLLING);
+                       if (!ret1)
+                               ret1 = wait_phys_link_out_of_offline(ppd,
+                                                                    3000);
                        if (ret1 != HCMD_SUCCESS) {
                                dd_dev_err(dd,
                                           "Failed to transition to Polling link state, return 0x%x\n",
@@ -10784,6 +10788,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
                                ret = -EINVAL;
                        }
                }
+
+               /*
+                * Change the host link state after requesting DC8051 to
+                * change its physical state so that we can ignore any
+                * interrupt with stale LNI(XX) error, which will not be
+                * cleared until DC8051 transitions to Polling state.
+                */
+               ppd->host_link_state = HLS_DN_POLL;
                ppd->offline_disabled_reason =
                        HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
                /*
@@ -12927,6 +12939,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
        return read_state;
 }
 
+/*
+ * wait_phys_link_out_of_offline - wait for any out of offline state
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any out of offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+                                        int msecs)
+{
+       u32 read_state;
+       unsigned long timeout;
+
+       timeout = jiffies + msecs_to_jiffies(msecs);
+       while (1) {
+               read_state = read_physical_state(ppd->dd);
+               if ((read_state & 0xF0) != PLS_OFFLINE)
+                       break;
+               if (time_after(jiffies, timeout)) {
+                       dd_dev_err(ppd->dd,
+                                  "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n",
+                                  read_state, msecs);
+                       return -ETIMEDOUT;
+               }
+               usleep_range(1950, 2050); /* sleep 2ms-ish */
+       }
+
+       log_state_transition(ppd, read_state);
+       return read_state;
+}
+
 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
 (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
 
index c6163a347e939818ce32733b106a96350941dac3..c0800ea5a3f813078cae3bbd83a37ca71ea2b9da 100644 (file)
 #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_MASK 0x7FFull
 #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SHIFT 0
 #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SMASK 0x7FFull
+#define SEND_CTXT_CREDIT_STATUS (TXE + 0x000000100018)
+#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK 0x7FFull
+#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT 32
+#define SEND_CTXT_CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK 0x7FFull
 #define SEND_CTXT_CREDIT_FORCE (TXE + 0x000000100028)
 #define SEND_CTXT_CREDIT_FORCE_FORCE_RETURN_SMASK 0x1ull
 #define SEND_CTXT_CREDIT_RETURN_ADDR (TXE + 0x000000100020)
index 7108d4d9225920c25443806a93654d2709fadd6b..40d3cfb58bd1c697cecae3b2829486e56e2bd35b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
                                  HFI1_CAP_ALLOW_PERM_JKEY |            \
                                  HFI1_CAP_STATIC_RATE_CTRL |           \
                                  HFI1_CAP_PRINT_UNIMPL |               \
-                                 HFI1_CAP_TID_UNMAP)
+                                 HFI1_CAP_TID_UNMAP |                  \
+                                 HFI1_CAP_OPFN)
 /*
  * A set of capability bits that are "global" and are not allowed to be
  * set in the user bitmask.
  */
 #define HFI1_CAP_RESERVED_MASK   ((HFI1_CAP_SDMA |                     \
-                                 HFI1_CAP_USE_SDMA_HEAD |              \
-                                 HFI1_CAP_EXTENDED_PSN |               \
-                                 HFI1_CAP_PRINT_UNIMPL |               \
-                                 HFI1_CAP_NO_INTEGRITY |               \
-                                 HFI1_CAP_PKEY_CHECK) <<               \
-                                HFI1_CAP_USER_SHIFT)
+                                  HFI1_CAP_USE_SDMA_HEAD |             \
+                                  HFI1_CAP_EXTENDED_PSN |              \
+                                  HFI1_CAP_PRINT_UNIMPL |              \
+                                  HFI1_CAP_NO_INTEGRITY |              \
+                                  HFI1_CAP_PKEY_CHECK |                \
+                                  HFI1_CAP_TID_RDMA |                  \
+                                  HFI1_CAP_OPFN) <<                    \
+                                 HFI1_CAP_USER_SHIFT)
 /*
  * Set of capabilities that need to be enabled for kernel context in
  * order to be allowed for user contexts, as well.
index 9f992ae36c8913d8330be1b6d1bf07dcfcfc852e..0a557795563c77fb1671bd2d5e04087da35d3ffe 100644 (file)
@@ -407,6 +407,54 @@ DEBUGFS_SEQ_FILE_OPS(rcds);
 DEBUGFS_SEQ_FILE_OPEN(rcds)
 DEBUGFS_FILE_OPS(rcds);
 
+static void *_pios_seq_start(struct seq_file *s, loff_t *pos)
+{
+       struct hfi1_ibdev *ibd;
+       struct hfi1_devdata *dd;
+
+       ibd = (struct hfi1_ibdev *)s->private;
+       dd = dd_from_dev(ibd);
+       if (!dd->send_contexts || *pos >= dd->num_send_contexts)
+               return NULL;
+       return pos;
+}
+
+static void *_pios_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+       struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+       ++*pos;
+       if (!dd->send_contexts || *pos >= dd->num_send_contexts)
+               return NULL;
+       return pos;
+}
+
+static void _pios_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _pios_seq_show(struct seq_file *s, void *v)
+{
+       struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+       struct hfi1_devdata *dd = dd_from_dev(ibd);
+       struct send_context_info *sci;
+       loff_t *spos = v;
+       loff_t i = *spos;
+       unsigned long flags;
+
+       spin_lock_irqsave(&dd->sc_lock, flags);
+       sci = &dd->send_contexts[i];
+       if (sci && sci->type != SC_USER && sci->allocated && sci->sc)
+               seqfile_dump_sci(s, i, sci);
+       spin_unlock_irqrestore(&dd->sc_lock, flags);
+       return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(pios);
+DEBUGFS_SEQ_FILE_OPEN(pios)
+DEBUGFS_FILE_OPS(pios);
+
 /* read the per-device counters */
 static ssize_t dev_counters_read(struct file *file, char __user *buf,
                                 size_t count, loff_t *ppos)
@@ -1143,6 +1191,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
        DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
        DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
        DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd);
+       DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd);
        DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
        /* dev counter files */
        for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
index a41f85558312eeb86d9b046f4ac784765932804e..a8ad70730203995c4529e909e09be028556afc18 100644 (file)
@@ -430,40 +430,60 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = {
        [HFI1_PKT_TYPE_16B] = &return_cnp_16B
 };
 
-void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
-                              bool do_cnp)
+/**
+ * hfi1_process_ecn_slowpath - Process FECN or BECN bits
+ * @qp: The packet's destination QP
+ * @pkt: The packet itself.
+ * @prescan: Is the caller the RXQ prescan
+ *
+ * Process the packet's FECN or BECN bits. By now, the packet
+ * has already been evaluated whether processing of those bit should
+ * be done.
+ * The significance of the @prescan argument is that if the caller
+ * is the RXQ prescan, a CNP will be send out instead of waiting for the
+ * normal packet processing to send an ACK with BECN set (or a CNP).
+ */
+bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+                              bool prescan)
 {
        struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
        struct ib_other_headers *ohdr = pkt->ohdr;
        struct ib_grh *grh = pkt->grh;
-       u32 rqpn = 0, bth1;
+       u32 rqpn = 0;
        u16 pkey;
        u32 rlid, slid, dlid = 0;
-       u8 hdr_type, sc, svc_type;
-       bool is_mcast = false;
+       u8 hdr_type, sc, svc_type, opcode;
+       bool is_mcast = false, ignore_fecn = false, do_cnp = false,
+               fecn, becn;
 
        /* can be called from prescan */
        if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
-               is_mcast = hfi1_is_16B_mcast(dlid);
                pkey = hfi1_16B_get_pkey(pkt->hdr);
                sc = hfi1_16B_get_sc(pkt->hdr);
                dlid = hfi1_16B_get_dlid(pkt->hdr);
                slid = hfi1_16B_get_slid(pkt->hdr);
+               is_mcast = hfi1_is_16B_mcast(dlid);
+               opcode = ib_bth_get_opcode(ohdr);
                hdr_type = HFI1_PKT_TYPE_16B;
+               fecn = hfi1_16B_get_fecn(pkt->hdr);
+               becn = hfi1_16B_get_becn(pkt->hdr);
        } else {
-               is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
-                          (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
                pkey = ib_bth_get_pkey(ohdr);
                sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf);
-               dlid = ib_get_dlid(pkt->hdr);
+               dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(pkt->hdr) :
+                       ppd->lid;
                slid = ib_get_slid(pkt->hdr);
+               is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+                          (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
+               opcode = ib_bth_get_opcode(ohdr);
                hdr_type = HFI1_PKT_TYPE_9B;
+               fecn = ib_bth_get_fecn(ohdr);
+               becn = ib_bth_get_becn(ohdr);
        }
 
        switch (qp->ibqp.qp_type) {
        case IB_QPT_UD:
-               dlid = ppd->lid;
                rlid = slid;
                rqpn = ib_get_sqpn(pkt->ohdr);
                svc_type = IB_CC_SVCTYPE_UD;
@@ -485,22 +505,31 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
                svc_type = IB_CC_SVCTYPE_RC;
                break;
        default:
-               return;
+               return false;
        }
 
-       bth1 = be32_to_cpu(ohdr->bth[1]);
+       ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) ||
+               (opcode == IB_OPCODE_RC_ACKNOWLEDGE);
+       /*
+        * ACKNOWLEDGE packets do not get a CNP but this will be
+        * guarded by ignore_fecn above.
+        */
+       do_cnp = prescan ||
+               (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST &&
+                opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE);
+
        /* Call appropriate CNP handler */
-       if (do_cnp && (bth1 & IB_FECN_SMASK))
+       if (!ignore_fecn && do_cnp && fecn)
                hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey,
                                              dlid, rlid, sc, grh);
 
-       if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
-               u32 lqpn = bth1 & RVT_QPN_MASK;
+       if (becn) {
+               u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
                u8 sl = ibp->sc_to_sl[sc];
 
                process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
        }
-
+       return !ignore_fecn && fecn;
 }
 
 struct ps_mdata {
@@ -599,7 +628,6 @@ static void __prescan_rxq(struct hfi1_packet *packet)
                struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi;
                u64 rhf = rhf_to_cpu(rhf_addr);
                u32 etype = rhf_rcv_type(rhf), qpn, bth1;
-               int is_ecn = 0;
                u8 lnh;
 
                if (ps_done(&mdata, rhf, rcd))
@@ -625,12 +653,10 @@ static void __prescan_rxq(struct hfi1_packet *packet)
                        goto next; /* just in case */
                }
 
-               bth1 = be32_to_cpu(packet->ohdr->bth[1]);
-               is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
-
-               if (!is_ecn)
+               if (!hfi1_may_ecn(packet))
                        goto next;
 
+               bth1 = be32_to_cpu(packet->ohdr->bth[1]);
                qpn = bth1 & RVT_QPN_MASK;
                rcu_read_lock();
                qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
@@ -640,7 +666,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
                        goto next;
                }
 
-               process_ecn(qp, packet, true);
+               hfi1_process_ecn_slowpath(qp, packet, true);
                rcu_read_unlock();
 
                /* turn off BECN, FECN */
@@ -1400,7 +1426,7 @@ static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet)
        if ((!(hfi1_is_16B_mcast(packet->dlid))) &&
            (packet->dlid !=
                opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) {
-               if (packet->dlid != ppd->lid)
+               if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid)
                        return -EINVAL;
        }
 
index 1401b6ea4a287aab8c0cc48f5086c61aba82b588..ca572ad6061c0fc886d767d993bed861bf342b88 100644 (file)
@@ -1802,13 +1802,20 @@ static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd)
        return &rcd->ppd->ibport_data;
 }
 
-void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
-                              bool do_cnp);
-static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
-                              bool do_cnp)
+/**
+ * hfi1_may_ecn - Check whether FECN or BECN processing should be done
+ * @pkt: the packet to be evaluated
+ *
+ * Check whether the FECN or BECN bits in the packet's header are
+ * enabled, depending on packet type.
+ *
+ * This function only checks for FECN and BECN bits. Additional checks
+ * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to
+ * ensure correct handling.
+ */
+static inline bool hfi1_may_ecn(struct hfi1_packet *pkt)
 {
-       bool becn;
-       bool fecn;
+       bool fecn, becn;
 
        if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
                fecn = hfi1_16B_get_fecn(pkt->hdr);
@@ -1817,10 +1824,18 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
                fecn = ib_bth_get_fecn(pkt->ohdr);
                becn = ib_bth_get_becn(pkt->ohdr);
        }
-       if (unlikely(fecn || becn)) {
-               hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
-               return fecn;
-       }
+       return fecn || becn;
+}
+
+bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+                              bool prescan);
+static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt)
+{
+       bool do_work;
+
+       do_work = hfi1_may_ecn(pkt);
+       if (unlikely(do_work))
+               return hfi1_process_ecn_slowpath(qp, pkt, false);
        return false;
 }
 
index 9ab50d2308dc0655f1d83f1b2a1e8a783dd98521..dd5a5c030066072cfa821a54ca79e94777eecce7 100644 (file)
@@ -742,6 +742,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
        spin_lock_init(&sc->alloc_lock);
        spin_lock_init(&sc->release_lock);
        spin_lock_init(&sc->credit_ctrl_lock);
+       seqlock_init(&sc->waitlock);
        INIT_LIST_HEAD(&sc->piowait);
        INIT_WORK(&sc->halt_work, sc_halted);
        init_waitqueue_head(&sc->halt_wait);
@@ -1593,7 +1594,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
 static void sc_piobufavail(struct send_context *sc)
 {
        struct hfi1_devdata *dd = sc->dd;
-       struct hfi1_ibdev *dev = &dd->verbs_dev;
        struct list_head *list;
        struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
        struct rvt_qp *qp;
@@ -1612,7 +1612,7 @@ static void sc_piobufavail(struct send_context *sc)
         * could end up with QPs on the wait list with the interrupt
         * disabled.
         */
-       write_seqlock_irqsave(&dev->iowait_lock, flags);
+       write_seqlock_irqsave(&sc->waitlock, flags);
        while (!list_empty(list)) {
                struct iowait *wait;
 
@@ -1636,7 +1636,7 @@ static void sc_piobufavail(struct send_context *sc)
                if (!list_empty(list))
                        hfi1_sc_wantpiobuf_intr(sc, 1);
        }
-       write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+       write_sequnlock_irqrestore(&sc->waitlock, flags);
 
        /* Wake up the most starved one first */
        if (n)
@@ -2137,3 +2137,28 @@ void free_credit_return(struct hfi1_devdata *dd)
        kfree(dd->cr_base);
        dd->cr_base = NULL;
 }
+
+void seqfile_dump_sci(struct seq_file *s, u32 i,
+                     struct send_context_info *sci)
+{
+       struct send_context *sc = sci->sc;
+       u64 reg;
+
+       seq_printf(s, "SCI %u: type %u base %u credits %u\n",
+                  i, sci->type, sci->base, sci->credits);
+       seq_printf(s, "  flags 0x%x sw_inx %u hw_ctxt %u grp %u\n",
+                  sc->flags,  sc->sw_index, sc->hw_context, sc->group);
+       seq_printf(s, "  sr_size %u credits %u sr_head %u sr_tail %u\n",
+                  sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail);
+       seq_printf(s, "  fill %lu free %lu fill_wrap %u alloc_free %lu\n",
+                  sc->fill, sc->free, sc->fill_wrap, sc->alloc_free);
+       seq_printf(s, "  credit_intr_count %u credit_ctrl 0x%llx\n",
+                  sc->credit_intr_count, sc->credit_ctrl);
+       reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS));
+       seq_printf(s, "  *hw_free %llu CurrentFree %llu LastReturned %llu\n",
+                  (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >>
+                   CR_COUNTER_SHIFT,
+                  (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) &
+                   SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK),
+                  reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK));
+}
index aaf372c3e5d6a3cc0de82aaf9819c02b97bd195f..c9a58b642bdd6fea1f2ca767ca9e52eec80e36a2 100644 (file)
@@ -127,6 +127,8 @@ struct send_context {
        volatile __le64 *hw_free;       /* HW free counter */
        /* list for PIO waiters */
        struct list_head piowait  ____cacheline_aligned_in_smp;
+       seqlock_t waitlock;
+
        spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
        u32 credit_intr_count;          /* count of credit intr users */
        u64 credit_ctrl;                /* cache for credit control */
@@ -329,4 +331,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
 void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes);
 void seg_pio_copy_end(struct pio_buf *pbuf);
 
+void seqfile_dump_sci(struct seq_file *s, u32 i,
+                     struct send_context_info *sci);
+
 #endif /* _PIO_H */
index 6f3bc4dab858999740333d28f78c84f5adfa1117..e32fbfe029bc2991dc81a97b5d2c1de3783b61ad 100644 (file)
@@ -368,20 +368,18 @@ bool _hfi1_schedule_send(struct rvt_qp *qp)
 
 static void qp_pio_drain(struct rvt_qp *qp)
 {
-       struct hfi1_ibdev *dev;
        struct hfi1_qp_priv *priv = qp->priv;
 
        if (!priv->s_sendcontext)
                return;
-       dev = to_idev(qp->ibqp.device);
        while (iowait_pio_pending(&priv->s_iowait)) {
-               write_seqlock_irq(&dev->iowait_lock);
+               write_seqlock_irq(&priv->s_sendcontext->waitlock);
                hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
-               write_sequnlock_irq(&dev->iowait_lock);
+               write_sequnlock_irq(&priv->s_sendcontext->waitlock);
                iowait_pio_drain(&priv->s_iowait);
-               write_seqlock_irq(&dev->iowait_lock);
+               write_seqlock_irq(&priv->s_sendcontext->waitlock);
                hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
-               write_sequnlock_irq(&dev->iowait_lock);
+               write_sequnlock_irq(&priv->s_sendcontext->waitlock);
        }
 }
 
@@ -452,7 +450,6 @@ static int iowait_sleep(
        struct hfi1_qp_priv *priv;
        unsigned long flags;
        int ret = 0;
-       struct hfi1_ibdev *dev;
 
        qp = tx->qp;
        priv = qp->priv;
@@ -465,9 +462,8 @@ static int iowait_sleep(
                 * buffer and undoing the side effects of the copy.
                 */
                /* Make a common routine? */
-               dev = &sde->dd->verbs_dev;
                list_add_tail(&stx->list, &wait->tx_head);
-               write_seqlock(&dev->iowait_lock);
+               write_seqlock(&sde->waitlock);
                if (sdma_progress(sde, seq, stx))
                        goto eagain;
                if (list_empty(&priv->s_iowait.list)) {
@@ -478,11 +474,11 @@ static int iowait_sleep(
                        qp->s_flags |= RVT_S_WAIT_DMA_DESC;
                        iowait_queue(pkts_sent, &priv->s_iowait,
                                     &sde->dmawait);
-                       priv->s_iowait.lock = &dev->iowait_lock;
+                       priv->s_iowait.lock = &sde->waitlock;
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
                        rvt_get_qp(qp);
                }
-               write_sequnlock(&dev->iowait_lock);
+               write_sequnlock(&sde->waitlock);
                hfi1_qp_unbusy(qp, wait);
                spin_unlock_irqrestore(&qp->s_lock, flags);
                ret = -EBUSY;
@@ -492,7 +488,7 @@ static int iowait_sleep(
        }
        return ret;
 eagain:
-       write_sequnlock(&dev->iowait_lock);
+       write_sequnlock(&sde->waitlock);
        spin_unlock_irqrestore(&qp->s_lock, flags);
        list_del_init(&stx->list);
        return -EAGAIN;
index 188aa4f686a075bfcd77f2a8277fa5a4b55cad03..be603f35d7e47cb8c062b12fb30daf13725e49c2 100644 (file)
@@ -1157,6 +1157,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
                if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
                    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
                        break;
+               rvt_qp_wqe_unreserve(qp, wqe);
                s_last = qp->s_last;
                trace_hfi1_qp_send_completion(qp, wqe, s_last);
                if (++s_last >= qp->s_size)
@@ -1209,6 +1210,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
                u32 s_last;
 
                rvt_put_swqe(wqe);
+               rvt_qp_wqe_unreserve(qp, wqe);
                s_last = qp->s_last;
                trace_hfi1_qp_send_completion(qp, wqe, s_last);
                if (++s_last >= qp->s_size)
@@ -2049,8 +2051,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
        struct ib_reth *reth;
        unsigned long flags;
        int ret;
-       bool is_fecn = false;
-       bool copy_last = false;
+       bool copy_last = false, fecn;
        u32 rkey;
        u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
 
@@ -2059,7 +2060,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
        if (hfi1_ruc_check_hdr(ibp, packet))
                return;
 
-       is_fecn = process_ecn(qp, packet, false);
+       fecn = process_ecn(qp, packet);
 
        /*
         * Process responses (ACKs) before anything else.  Note that the
@@ -2070,8 +2071,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
        if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
            opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
                rc_rcv_resp(packet);
-               if (is_fecn)
-                       goto send_ack;
                return;
        }
 
@@ -2347,11 +2346,11 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
 
                /* Schedule the send engine. */
                qp->s_flags |= RVT_S_RESP_PENDING;
+               if (fecn)
+                       qp->s_flags |= RVT_S_ECN;
                hfi1_schedule_send(qp);
 
                spin_unlock_irqrestore(&qp->s_lock, flags);
-               if (is_fecn)
-                       goto send_ack;
                return;
        }
 
@@ -2413,11 +2412,11 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
 
                /* Schedule the send engine. */
                qp->s_flags |= RVT_S_RESP_PENDING;
+               if (fecn)
+                       qp->s_flags |= RVT_S_ECN;
                hfi1_schedule_send(qp);
 
                spin_unlock_irqrestore(&qp->s_lock, flags);
-               if (is_fecn)
-                       goto send_ack;
                return;
        }
 
@@ -2430,16 +2429,9 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
        qp->r_ack_psn = psn;
        qp->r_nak_state = 0;
        /* Send an ACK if requested or required. */
-       if (psn & IB_BTH_REQ_ACK) {
-               if (packet->numpkt == 0) {
-                       rc_cancel_ack(qp);
-                       goto send_ack;
-               }
-               if (qp->r_adefered >= HFI1_PSN_CREDIT) {
-                       rc_cancel_ack(qp);
-                       goto send_ack;
-               }
-               if (unlikely(is_fecn)) {
+       if (psn & IB_BTH_REQ_ACK || fecn) {
+               if (packet->numpkt == 0 || fecn ||
+                   qp->r_adefered >= HFI1_PSN_CREDIT) {
                        rc_cancel_ack(qp);
                        goto send_ack;
                }
@@ -2480,7 +2472,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
        qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
        qp->r_ack_psn = qp->r_psn;
 send_ack:
-       hfi1_send_rc_ack(packet, is_fecn);
+       hfi1_send_rc_ack(packet, fecn);
 }
 
 void hfi1_rc_hdrerr(
index 891d2386d1cac003652d939687cf3f7fbdc11b64..b84356e1a4c1c52745f3363dd5fa40e82d163787 100644 (file)
@@ -1424,6 +1424,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
                seqlock_init(&sde->head_lock);
                spin_lock_init(&sde->senddmactrl_lock);
                spin_lock_init(&sde->flushlist_lock);
+               seqlock_init(&sde->waitlock);
                /* insure there is always a zero bit */
                sde->ahg_bits = 0xfffffffe00000000ULL;
 
@@ -1758,7 +1759,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
        struct iowait *wait, *nw;
        struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
        uint i, n = 0, seq, max_idx = 0;
-       struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
        u8 max_starved_cnt = 0;
 
 #ifdef CONFIG_SDMA_VERBOSITY
@@ -1768,10 +1768,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
 #endif
 
        do {
-               seq = read_seqbegin(&dev->iowait_lock);
+               seq = read_seqbegin(&sde->waitlock);
                if (!list_empty(&sde->dmawait)) {
                        /* at least one item */
-                       write_seqlock(&dev->iowait_lock);
+                       write_seqlock(&sde->waitlock);
                        /* Harvest waiters wanting DMA descriptors */
                        list_for_each_entry_safe(
                                        wait,
@@ -1794,10 +1794,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
                                list_del_init(&wait->list);
                                waits[n++] = wait;
                        }
-                       write_sequnlock(&dev->iowait_lock);
+                       write_sequnlock(&sde->waitlock);
                        break;
                }
-       } while (read_seqretry(&dev->iowait_lock, seq));
+       } while (read_seqretry(&sde->waitlock, seq));
 
        /* Schedule the most starved one first */
        if (n)
index 6dc63d7c5685ec3f6aef7edad94bfea84468fd7b..1e2e40f79cb2050494a23c15c4fce7204bf6876e 100644 (file)
@@ -382,6 +382,7 @@ struct sdma_engine {
        u64                     progress_int_cnt;
 
        /* private: */
+       seqlock_t            waitlock;
        struct list_head      dmawait;
 
        /* CONFIG SDMA for now, just blindly duplicate */
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
new file mode 100644 (file)
index 0000000..da1ecb6
--- /dev/null
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+
+#include "hfi.h"
+#include "verbs.h"
+#include "tid_rdma.h"
+
+/**
+ * qp_to_rcd - determine the receive context used by a qp
+ * @qp - the qp
+ *
+ * This routine returns the receive context associated
+ * with a a qp's qpn.
+ *
+ * Returns the context.
+ */
+static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
+                                      struct rvt_qp *qp)
+{
+       struct hfi1_ibdev *verbs_dev = container_of(rdi,
+                                                   struct hfi1_ibdev,
+                                                   rdi);
+       struct hfi1_devdata *dd = container_of(verbs_dev,
+                                              struct hfi1_devdata,
+                                              verbs_dev);
+       unsigned int ctxt;
+
+       if (qp->ibqp.qp_num == 0)
+               ctxt = 0;
+       else
+               ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) %
+                       (dd->n_krcv_queues - 1)) + 1;
+
+       return dd->rcd[ctxt];
+}
+
+int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+                     struct ib_qp_init_attr *init_attr)
+{
+       struct hfi1_qp_priv *qpriv = qp->priv;
+
+       qpriv->rcd = qp_to_rcd(rdi, qp);
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
new file mode 100644 (file)
index 0000000..6fcd3ad
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#ifndef HFI1_TID_RDMA_H
+#define HFI1_TID_RDMA_H
+
+int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+                     struct ib_qp_init_attr *init_attr);
+
+#endif /* HFI1_TID_RDMA_H */
+
index 6aca0c5a7f97a1e180b9a11e2b81e95c79125751..6ba47037c4243d90039ca36d6790ce5c96cc3edf 100644 (file)
@@ -321,7 +321,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
        if (hfi1_ruc_check_hdr(ibp, packet))
                return;
 
-       process_ecn(qp, packet, true);
+       process_ecn(qp, packet);
 
        psn = ib_bth_get_psn(ohdr);
        /* Compare the PSN verses the expected PSN. */
index 4baa8f4d49de17607a031d8198f37190b79f1dcf..88242fe95eaae89f58c98882c80abd32d8ac7c3f 100644 (file)
@@ -51,6 +51,7 @@
 #include "hfi.h"
 #include "mad.h"
 #include "verbs_txreq.h"
+#include "trace_ibhdrs.h"
 #include "qp.h"
 
 /* We support only two types - 9B and 16B for now */
@@ -656,18 +657,19 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
        u32 bth0, plen, vl, hwords = 7;
        u16 len;
        u8 l4;
-       struct hfi1_16b_header hdr;
+       struct hfi1_opa_header hdr;
        struct ib_other_headers *ohdr;
        struct pio_buf *pbuf;
        struct send_context *ctxt = qp_to_send_context(qp, sc5);
        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
        u32 nwords;
 
+       hdr.hdr_type = HFI1_PKT_TYPE_16B;
        /* Populate length */
        nwords = ((hfi1_get_16b_padding(hwords << 2, 0) +
                   SIZE_OF_LT) >> 2) + SIZE_OF_CRC;
        if (old_grh) {
-               struct ib_grh *grh = &hdr.u.l.grh;
+               struct ib_grh *grh = &hdr.opah.u.l.grh;
 
                grh->version_tclass_flow = old_grh->version_tclass_flow;
                grh->paylen = cpu_to_be16(
@@ -675,11 +677,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
                grh->hop_limit = 0xff;
                grh->sgid = old_grh->dgid;
                grh->dgid = old_grh->sgid;
-               ohdr = &hdr.u.l.oth;
+               ohdr = &hdr.opah.u.l.oth;
                l4 = OPA_16B_L4_IB_GLOBAL;
                hwords += sizeof(struct ib_grh) / sizeof(u32);
        } else {
-               ohdr = &hdr.u.oth;
+               ohdr = &hdr.opah.u.oth;
                l4 = OPA_16B_L4_IB_LOCAL;
        }
 
@@ -693,7 +695,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
 
        /* Convert dwords to flits */
        len = (hwords + nwords) >> 1;
-       hfi1_make_16b_hdr(&hdr, slid, dlid, len, pkey, 1, 0, l4, sc5);
+       hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5);
 
        plen = 2 /* PBC */ + hwords + nwords;
        pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
@@ -701,9 +703,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
        pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
        if (ctxt) {
                pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
-               if (pbuf)
+               if (pbuf) {
+                       trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
                        ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
                                                 &hdr, hwords);
+               }
        }
 }
 
@@ -715,14 +719,15 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
        u32 bth0, plen, vl, hwords = 5;
        u16 lrh0;
        u8 sl = ibp->sc_to_sl[sc5];
-       struct ib_header hdr;
+       struct hfi1_opa_header hdr;
        struct ib_other_headers *ohdr;
        struct pio_buf *pbuf;
        struct send_context *ctxt = qp_to_send_context(qp, sc5);
        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
+       hdr.hdr_type = HFI1_PKT_TYPE_9B;
        if (old_grh) {
-               struct ib_grh *grh = &hdr.u.l.grh;
+               struct ib_grh *grh = &hdr.ibh.u.l.grh;
 
                grh->version_tclass_flow = old_grh->version_tclass_flow;
                grh->paylen = cpu_to_be16(
@@ -730,11 +735,11 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
                grh->hop_limit = 0xff;
                grh->sgid = old_grh->dgid;
                grh->dgid = old_grh->sgid;
-               ohdr = &hdr.u.l.oth;
+               ohdr = &hdr.ibh.u.l.oth;
                lrh0 = HFI1_LRH_GRH;
                hwords += sizeof(struct ib_grh) / sizeof(u32);
        } else {
-               ohdr = &hdr.u.oth;
+               ohdr = &hdr.ibh.u.oth;
                lrh0 = HFI1_LRH_BTH;
        }
 
@@ -746,16 +751,18 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
        ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
        ohdr->bth[2] = 0; /* PSN 0 */
 
-       hfi1_make_ib_hdr(&hdr, lrh0, hwords + SIZE_OF_CRC, dlid, slid);
+       hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid);
        plen = 2 /* PBC */ + hwords;
        pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
        vl = sc_to_vlt(ppd->dd, sc5);
        pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
        if (ctxt) {
                pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
-               if (pbuf)
+               if (pbuf) {
+                       trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
                        ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
                                                 &hdr, hwords);
+               }
        }
 }
 
@@ -912,7 +919,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
                src_qp = hfi1_16B_get_src_qpn(packet->mgmt);
        }
 
-       process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
+       process_ecn(qp, packet);
        /*
         * Get the number of bytes the message was padded by
         * and drop incomplete packets.
index 3f0aadccd9f6924da593fa7fc3574f49cfcd1704..e5e7fad09f3243d71b1058fc3e42bfd957734800 100644 (file)
@@ -130,7 +130,6 @@ static int defer_packet_queue(
 {
        struct hfi1_user_sdma_pkt_q *pq =
                container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
-       struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
        struct user_sdma_txreq *tx =
                container_of(txreq, struct user_sdma_txreq, txreq);
 
@@ -144,10 +143,10 @@ static int defer_packet_queue(
         * it is supposed to be enqueued.
         */
        xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
-       write_seqlock(&dev->iowait_lock);
+       write_seqlock(&sde->waitlock);
        if (list_empty(&pq->busy.list))
                iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
-       write_sequnlock(&dev->iowait_lock);
+       write_sequnlock(&sde->waitlock);
        return -EBUSY;
 eagain:
        return -EAGAIN;
index 48e11e51035888d46883a8a58bc43846df8ba803..28f754d2a8440004ab93f8c0a49dd39671fa7b4d 100644 (file)
@@ -765,7 +765,6 @@ static int pio_wait(struct rvt_qp *qp,
 {
        struct hfi1_qp_priv *priv = qp->priv;
        struct hfi1_devdata *dd = sc->dd;
-       struct hfi1_ibdev *dev = &dd->verbs_dev;
        unsigned long flags;
        int ret = 0;
 
@@ -777,7 +776,7 @@ static int pio_wait(struct rvt_qp *qp,
         */
        spin_lock_irqsave(&qp->s_lock, flags);
        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
-               write_seqlock(&dev->iowait_lock);
+               write_seqlock(&sc->waitlock);
                list_add_tail(&ps->s_txreq->txreq.list,
                              &ps->wait->tx_head);
                if (list_empty(&priv->s_iowait.list)) {
@@ -790,14 +789,14 @@ static int pio_wait(struct rvt_qp *qp,
                        was_empty = list_empty(&sc->piowait);
                        iowait_queue(ps->pkts_sent, &priv->s_iowait,
                                     &sc->piowait);
-                       priv->s_iowait.lock = &dev->iowait_lock;
+                       priv->s_iowait.lock = &sc->waitlock;
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
                        rvt_get_qp(qp);
                        /* counting: only call wantpiobuf_intr if first user */
                        if (was_empty)
                                hfi1_sc_wantpiobuf_intr(sc, 1);
                }
-               write_sequnlock(&dev->iowait_lock);
+               write_sequnlock(&sc->waitlock);
                hfi1_qp_unbusy(qp, ps->wait);
                ret = -EBUSY;
        }
@@ -919,6 +918,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 
                                if (slen > len)
                                        slen = len;
+                               if (slen > ss->sge.sge_length)
+                                       slen = ss->sge.sge_length;
                                rvt_update_sge(ss, slen, false);
                                seg_pio_copy_mid(pbuf, addr, slen);
                                len -= slen;
@@ -1704,6 +1705,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
        dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
 
        dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
+       dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init;
        dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
        dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
        dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
index 64c9054db5f32fe707f3dcd6313e2f335c4bfdcd..1ad0b14bdb3c86c417dfd0b27e8ab605b78efd57 100644 (file)
@@ -71,6 +71,7 @@ struct hfi1_devdata;
 struct hfi1_packet;
 
 #include "iowait.h"
+#include "tid_rdma.h"
 
 #define HFI1_MAX_RDMA_ATOMIC     16
 
@@ -156,6 +157,7 @@ struct hfi1_qp_priv {
        struct hfi1_ahg_info *s_ahg;              /* ahg info for next header */
        struct sdma_engine *s_sde;                /* current sde */
        struct send_context *s_sendcontext;       /* current sendcontext */
+       struct hfi1_ctxtdata *rcd;                /* QP's receive context */
        u8 s_sc;                                  /* SC[0..4] for next packet */
        struct iowait s_iowait;
        struct rvt_qp *owner;
index c9876d9e3cb9d16cf350da8a563740ce15934c4e..a922db58be1441020c665ee16ef081f5f3a2bfaf 100644 (file)
@@ -816,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
 
        size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
        netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
-                                 chip_sdma_engines(dd), dd->num_vnic_contexts);
+                                 dd->num_sdma, dd->num_vnic_contexts);
        if (!netdev)
                return ERR_PTR(-ENOMEM);
 
        rn = netdev_priv(netdev);
        vinfo = opa_vnic_dev_priv(netdev);
        vinfo->dd = dd;
-       vinfo->num_tx_q = chip_sdma_engines(dd);
+       vinfo->num_tx_q = dd->num_sdma;
        vinfo->num_rx_q = dd->num_vnic_contexts;
        vinfo->netdev = netdev;
        rn->free_rdma_netdev = hfi1_vnic_free_rn;
index 97bd940a056ac49f57ca3906f918e60e8fc161c6..1f81c480e02886007972f3e3018961eeb7ca2078 100644 (file)
@@ -57,7 +57,6 @@
 
 #define HFI1_VNIC_TXREQ_NAME_LEN   32
 #define HFI1_VNIC_SDMA_DESC_WTRMRK 64
-#define HFI1_VNIC_SDMA_RETRY_COUNT 1
 
 /*
  * struct vnic_txreq - VNIC transmit descriptor
@@ -67,7 +66,6 @@
  * @pad: pad buffer
  * @plen: pad length
  * @pbc_val: pbc value
- * @retry_count: tx retry count
  */
 struct vnic_txreq {
        struct sdma_txreq       txreq;
@@ -77,8 +75,6 @@ struct vnic_txreq {
        unsigned char           pad[HFI1_VNIC_MAX_PAD];
        u16                     plen;
        __le64                  pbc_val;
-
-       u32                     retry_count;
 };
 
 static void vnic_sdma_complete(struct sdma_txreq *txreq,
@@ -196,7 +192,6 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
        ret = build_vnic_tx_desc(sde, tx, pbc);
        if (unlikely(ret))
                goto free_desc;
-       tx->retry_count = 0;
 
        ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
                              &tx->txreq, vnic_sdma->pkts_sent);
@@ -237,18 +232,17 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
 {
        struct hfi1_vnic_sdma *vnic_sdma =
                container_of(wait->iow, struct hfi1_vnic_sdma, wait);
-       struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
-       struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
 
-       if (sdma_progress(sde, seq, txreq))
-               if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT)
-                       return -EAGAIN;
+       write_seqlock(&sde->waitlock);
+       if (sdma_progress(sde, seq, txreq)) {
+               write_sequnlock(&sde->waitlock);
+               return -EAGAIN;
+       }
 
        vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
-       write_seqlock(&dev->iowait_lock);
        if (list_empty(&vnic_sdma->wait.list))
                iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
-       write_sequnlock(&dev->iowait_lock);
+       write_sequnlock(&sde->waitlock);
        return -EBUSY;
 }
 
index cf03404b9d5865dd08f179442a0ebd97a60652f7..004c88b32e13c8383a205e37dfcc6c7e0b9e41f5 100644 (file)
@@ -7,7 +7,7 @@ ccflags-y :=  -Idrivers/net/ethernet/hisilicon/hns3
 obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
 hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
        hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
-       hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o
+       hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o
 obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
 hns-roce-hw-v1-objs := hns_roce_hw_v1.o
 obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
index 46f65f9f59d0a654a3ff137dea657387c948eecf..6300033a448f0b82b77291c0926bf2b02785c099 100644 (file)
@@ -239,6 +239,8 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
 
 void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
 {
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+               hns_roce_cleanup_srq_table(hr_dev);
        hns_roce_cleanup_qp_table(hr_dev);
        hns_roce_cleanup_cq_table(hr_dev);
        hns_roce_cleanup_mr_table(hr_dev);
index 9549ae51a0dd5dc009b4aa2119b444d158c0af9b..927701df5eff7b55e202655b7b7f5db292b1c524 100644 (file)
@@ -120,6 +120,10 @@ enum {
        HNS_ROCE_CMD_SQD2RTS_QP         = 0x20,
        HNS_ROCE_CMD_2RST_QP            = 0x21,
        HNS_ROCE_CMD_QUERY_QP           = 0x22,
+       HNS_ROCE_CMD_SW2HW_SRQ          = 0x70,
+       HNS_ROCE_CMD_MODIFY_SRQC        = 0x72,
+       HNS_ROCE_CMD_QUERY_SRQC         = 0x73,
+       HNS_ROCE_CMD_HW2SW_SRQ          = 0x74,
 };
 
 int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
index 93d4b4ec002d201f78cfc1b2477864c265785746..f4c92a7ac1ce7ffe60dbc59ffe3c792cf954463a 100644 (file)
 #define ROCEE_RX_CMQ_TAIL_REG                  0x07024
 #define ROCEE_RX_CMQ_HEAD_REG                  0x07028
 
-#define ROCEE_VF_MB_CFG0_REG                   0x40
-#define ROCEE_VF_MB_STATUS_REG                 0x58
-
 #define ROCEE_VF_EQ_DB_CFG0_REG                        0x238
 #define ROCEE_VF_EQ_DB_CFG1_REG                        0x23C
 
index d39bdfdb5de97d450b1354344ffd96e87ffc66af..779dd4c409cb821f13a73d83642dc25c932c29d3 100644 (file)
 #define PAGES_SHIFT_24                         24
 #define PAGES_SHIFT_32                         32
 
+#define HNS_ROCE_IDX_QUE_ENTRY_SZ              4
+#define SRQ_DB_REG                             0x230
+
 enum {
        HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
        HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
@@ -196,6 +199,7 @@ enum {
        HNS_ROCE_CAP_FLAG_RQ_INLINE             = BIT(2),
        HNS_ROCE_CAP_FLAG_RECORD_DB             = BIT(3),
        HNS_ROCE_CAP_FLAG_SQ_RECORD_DB          = BIT(4),
+       HNS_ROCE_CAP_FLAG_SRQ                   = BIT(5),
        HNS_ROCE_CAP_FLAG_MW                    = BIT(7),
        HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
        HNS_ROCE_CAP_FLAG_ATOMIC                = BIT(10),
@@ -204,6 +208,8 @@ enum {
 enum hns_roce_mtt_type {
        MTT_TYPE_WQE,
        MTT_TYPE_CQE,
+       MTT_TYPE_SRQWQE,
+       MTT_TYPE_IDX
 };
 
 enum {
@@ -339,6 +345,10 @@ struct hns_roce_mr_table {
        struct hns_roce_hem_table       mtpt_table;
        struct hns_roce_buddy           mtt_cqe_buddy;
        struct hns_roce_hem_table       mtt_cqe_table;
+       struct hns_roce_buddy           mtt_srqwqe_buddy;
+       struct hns_roce_hem_table       mtt_srqwqe_table;
+       struct hns_roce_buddy           mtt_idx_buddy;
+       struct hns_roce_hem_table       mtt_idx_table;
 };
 
 struct hns_roce_wq {
@@ -429,9 +439,37 @@ struct hns_roce_cq {
        struct completion               free;
 };
 
+struct hns_roce_idx_que {
+       struct hns_roce_buf             idx_buf;
+       int                             entry_sz;
+       u32                             buf_size;
+       struct ib_umem                  *umem;
+       struct hns_roce_mtt             mtt;
+       u64                             *bitmap;
+};
+
 struct hns_roce_srq {
        struct ib_srq           ibsrq;
-       int                     srqn;
+       void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
+       unsigned long           srqn;
+       int                     max;
+       int                     max_gs;
+       int                     wqe_shift;
+       void __iomem            *db_reg_l;
+
+       atomic_t                refcount;
+       struct completion       free;
+
+       struct hns_roce_buf     buf;
+       u64                    *wrid;
+       struct ib_umem         *umem;
+       struct hns_roce_mtt     mtt;
+       struct hns_roce_idx_que idx_que;
+       spinlock_t              lock;
+       int                     head;
+       int                     tail;
+       u16                     wqe_ctr;
+       struct mutex            mutex;
 };
 
 struct hns_roce_uar_table {
@@ -453,6 +491,12 @@ struct hns_roce_cq_table {
        struct hns_roce_hem_table       table;
 };
 
+struct hns_roce_srq_table {
+       struct hns_roce_bitmap          bitmap;
+       struct xarray                   xa;
+       struct hns_roce_hem_table       table;
+};
+
 struct hns_roce_raq_table {
        struct hns_roce_buf_list        *e_raq_buf;
 };
@@ -602,6 +646,12 @@ struct hns_roce_aeqe {
                        u32 rsv1;
                } qp_event;
 
+               struct {
+                       __le32 srq;
+                       u32 rsv0;
+                       u32 rsv1;
+               } srq_event;
+
                struct {
                        __le32 cq;
                        u32 rsv0;
@@ -679,7 +729,12 @@ struct hns_roce_caps {
        u32             max_extend_sg;
        int             num_qps;        /* 256k */
        int             reserved_qps;
+       u32             max_srq_sg;
+       int             num_srqs;
        u32             max_wqes;       /* 16k */
+       u32             max_srqs;
+       u32             max_srq_wrs;
+       u32             max_srq_sges;
        u32             max_sq_desc_sz; /* 64 */
        u32             max_rq_desc_sz; /* 64 */
        u32             max_srq_desc_sz;
@@ -690,12 +745,16 @@ struct hns_roce_caps {
        int             min_cqes;
        u32             min_wqes;
        int             reserved_cqs;
+       int             reserved_srqs;
+       u32             max_srqwqes;
        int             num_aeq_vectors;        /* 1 */
        int             num_comp_vectors;
        int             num_other_vectors;
        int             num_mtpts;
        u32             num_mtt_segs;
        u32             num_cqe_segs;
+       u32             num_srqwqe_segs;
+       u32             num_idx_segs;
        int             reserved_mrws;
        int             reserved_uars;
        int             num_pds;
@@ -709,6 +768,8 @@ struct hns_roce_caps {
        int             irrl_entry_sz;
        int             trrl_entry_sz;
        int             cqc_entry_sz;
+       int             srqc_entry_sz;
+       int             idx_entry_sz;
        u32             pbl_ba_pg_sz;
        u32             pbl_buf_pg_sz;
        u32             pbl_hop_num;
@@ -737,6 +798,12 @@ struct hns_roce_caps {
        u32             cqe_ba_pg_sz;
        u32             cqe_buf_pg_sz;
        u32             cqe_hop_num;
+       u32             srqwqe_ba_pg_sz;
+       u32             srqwqe_buf_pg_sz;
+       u32             srqwqe_hop_num;
+       u32             idx_ba_pg_sz;
+       u32             idx_buf_pg_sz;
+       u32             idx_hop_num;
        u32             eqe_ba_pg_sz;
        u32             eqe_buf_pg_sz;
        u32             eqe_hop_num;
@@ -805,6 +872,17 @@ struct hns_roce_hw {
        int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
        int (*init_eq)(struct hns_roce_dev *hr_dev);
        void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
+       void (*write_srqc)(struct hns_roce_dev *hr_dev,
+                          struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn,
+                          void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx,
+                          dma_addr_t dma_handle_wqe,
+                          dma_addr_t dma_handle_idx);
+       int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
+                      enum ib_srq_attr_mask srq_attr_mask,
+                      struct ib_udata *udata);
+       int (*query_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+       int (*post_srq_recv)(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+                            const struct ib_recv_wr **bad_wr);
 };
 
 struct hns_roce_dev {
@@ -839,6 +917,7 @@ struct hns_roce_dev {
        struct hns_roce_uar_table uar_table;
        struct hns_roce_mr_table  mr_table;
        struct hns_roce_cq_table  cq_table;
+       struct hns_roce_srq_table srq_table;
        struct hns_roce_qp_table  qp_table;
        struct hns_roce_eq_table  eq_table;
 
@@ -951,12 +1030,14 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
+int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
 
 void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
+void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev);
 
 int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
 void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
@@ -1011,6 +1092,14 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
 int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
                               struct hns_roce_mtt *mtt, struct ib_umem *umem);
 
+struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
+                                  struct ib_srq_init_attr *srq_init_attr,
+                                  struct ib_udata *udata);
+int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
+                       enum ib_srq_attr_mask srq_attr_mask,
+                       struct ib_udata *udata);
+int hns_roce_destroy_srq(struct ib_srq *ibsrq);
+
 struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
                                 struct ib_qp_init_attr *init_attr,
                                 struct ib_udata *udata);
@@ -1052,6 +1141,7 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db);
 void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
 void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
 void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
 int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
 int hns_roce_init(struct hns_roce_dev *hr_dev);
 void hns_roce_exit(struct hns_roce_dev *hr_dev);
index f6faefed96e8bedfab852f27540a62fe90c1e3dd..4cdbcafa59155633f4d0c2f8e9ec383b3c30deb4 100644 (file)
@@ -46,7 +46,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
            (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) ||
            (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) ||
            (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) ||
-           (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT))
+           (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) ||
+           (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) ||
+           (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX))
                return true;
 
        return false;
@@ -147,6 +149,22 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
                mhop->ba_l0_num = mhop->bt_chunk_size / 8;
                mhop->hop_num = hr_dev->caps.cqe_hop_num;
                break;
+       case HEM_TYPE_SRQWQE:
+               mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz
+                                           + PAGE_SHIFT);
+               mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
+                                           + PAGE_SHIFT);
+               mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+               mhop->hop_num = hr_dev->caps.srqwqe_hop_num;
+               break;
+       case HEM_TYPE_IDX:
+               mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz
+                                      + PAGE_SHIFT);
+               mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
+                                      + PAGE_SHIFT);
+               mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+               mhop->hop_num = hr_dev->caps.idx_hop_num;
+               break;
        default:
                dev_err(dev, "Table %d not support multi-hop addressing!\n",
                         table->type);
@@ -906,6 +924,18 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
                        bt_chunk_size = buf_chunk_size;
                        hop_num = hr_dev->caps.cqe_hop_num;
                        break;
+               case HEM_TYPE_SRQWQE:
+                       buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
+                                       + PAGE_SHIFT);
+                       bt_chunk_size = buf_chunk_size;
+                       hop_num = hr_dev->caps.srqwqe_hop_num;
+                       break;
+               case HEM_TYPE_IDX:
+                       buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
+                                       + PAGE_SHIFT);
+                       bt_chunk_size = buf_chunk_size;
+                       hop_num = hr_dev->caps.idx_hop_num;
+                       break;
                default:
                        dev_err(dev,
                          "Table %d not support to init hem table here!\n",
@@ -1041,6 +1071,15 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
 
 void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
 {
+       if ((hr_dev->caps.num_idx_segs))
+               hns_roce_cleanup_hem_table(hr_dev,
+                                          &hr_dev->mr_table.mtt_idx_table);
+       if (hr_dev->caps.num_srqwqe_segs)
+               hns_roce_cleanup_hem_table(hr_dev,
+                                          &hr_dev->mr_table.mtt_srqwqe_table);
+       if (hr_dev->caps.srqc_entry_sz)
+               hns_roce_cleanup_hem_table(hr_dev,
+                                          &hr_dev->srq_table.table);
        hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
        if (hr_dev->caps.trrl_entry_sz)
                hns_roce_cleanup_hem_table(hr_dev,
index e8850d59e7804caa45dd5e2cd77b140c7bfd7047..a650278c6fbdc6d128b7571094cb5cb723508b51 100644 (file)
@@ -48,6 +48,8 @@ enum {
         /* UNMAP HEM */
        HEM_TYPE_MTT,
        HEM_TYPE_CQE,
+       HEM_TYPE_SRQWQE,
+       HEM_TYPE_IDX,
        HEM_TYPE_IRRL,
        HEM_TYPE_TRRL,
 };
index 3beb1523e17c29c054da7ccb874912a754283d5d..5007fe7af2f9fb127953a5a442faee25ea44eaa5 100644 (file)
@@ -1082,6 +1082,33 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
        return 0;
 }
 
+static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
+                                                 int vf_id)
+{
+       struct hns_roce_cmq_desc desc;
+       struct hns_roce_vf_switch *swt;
+       int ret;
+
+       swt = (struct hns_roce_vf_switch *)desc.data;
+       hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
+       swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
+       roce_set_field(swt->fun_id,
+                       VF_SWITCH_DATA_FUN_ID_VF_ID_M,
+                       VF_SWITCH_DATA_FUN_ID_VF_ID_S,
+                       vf_id);
+       ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+       if (ret)
+               return ret;
+       desc.flag =
+               cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
+       desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
+       roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
+       roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1);
+       roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
+
+       return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
 static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
 {
        struct hns_roce_cmq_desc desc[2];
@@ -1269,6 +1296,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
                return ret;
        }
 
+       if (hr_dev->pci_dev->revision == 0x21) {
+               ret = hns_roce_set_vf_switch_param(hr_dev, 0);
+               if (ret) {
+                       dev_err(hr_dev->dev,
+                               "Set function switch param fail, ret = %d.\n",
+                               ret);
+                       return ret;
+               }
+       }
 
        hr_dev->vendor_part_id = hr_dev->pci_dev->device;
        hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
@@ -1276,11 +1312,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
        caps->num_qps           = HNS_ROCE_V2_MAX_QP_NUM;
        caps->max_wqes          = HNS_ROCE_V2_MAX_WQE_NUM;
        caps->num_cqs           = HNS_ROCE_V2_MAX_CQ_NUM;
+       caps->num_srqs          = HNS_ROCE_V2_MAX_SRQ_NUM;
        caps->max_cqes          = HNS_ROCE_V2_MAX_CQE_NUM;
+       caps->max_srqwqes       = HNS_ROCE_V2_MAX_SRQWQE_NUM;
        caps->max_sq_sg         = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
        caps->max_extend_sg     = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
        caps->max_rq_sg         = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
        caps->max_sq_inline     = HNS_ROCE_V2_MAX_SQ_INLINE;
+       caps->max_srq_sg        = HNS_ROCE_V2_MAX_SRQ_SGE_NUM;
        caps->num_uars          = HNS_ROCE_V2_UAR_NUM;
        caps->phy_num_uars      = HNS_ROCE_V2_PHY_UAR_NUM;
        caps->num_aeq_vectors   = HNS_ROCE_V2_AEQE_VEC_NUM;
@@ -1289,6 +1328,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
        caps->num_mtpts         = HNS_ROCE_V2_MAX_MTPT_NUM;
        caps->num_mtt_segs      = HNS_ROCE_V2_MAX_MTT_SEGS;
        caps->num_cqe_segs      = HNS_ROCE_V2_MAX_CQE_SEGS;
+       caps->num_srqwqe_segs   = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+       caps->num_idx_segs      = HNS_ROCE_V2_MAX_IDX_SEGS;
        caps->num_pds           = HNS_ROCE_V2_MAX_PD_NUM;
        caps->max_qp_init_rdma  = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
        caps->max_qp_dest_rdma  = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
@@ -1299,8 +1340,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
        caps->irrl_entry_sz     = HNS_ROCE_V2_IRRL_ENTRY_SZ;
        caps->trrl_entry_sz     = HNS_ROCE_V2_TRRL_ENTRY_SZ;
        caps->cqc_entry_sz      = HNS_ROCE_V2_CQC_ENTRY_SZ;
+       caps->srqc_entry_sz     = HNS_ROCE_V2_SRQC_ENTRY_SZ;
        caps->mtpt_entry_sz     = HNS_ROCE_V2_MTPT_ENTRY_SZ;
        caps->mtt_entry_sz      = HNS_ROCE_V2_MTT_ENTRY_SZ;
+       caps->idx_entry_sz      = 4;
        caps->cq_entry_sz       = HNS_ROCE_V2_CQE_ENTRY_SIZE;
        caps->page_size_cap     = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
        caps->reserved_lkey     = 0;
@@ -1308,6 +1351,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
        caps->reserved_mrws     = 1;
        caps->reserved_uars     = 0;
        caps->reserved_cqs      = 0;
+       caps->reserved_srqs     = 0;
        caps->reserved_qps      = HNS_ROCE_V2_RSV_QPS;
 
        caps->qpc_ba_pg_sz      = 0;
@@ -1331,6 +1375,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
        caps->cqe_ba_pg_sz      = 0;
        caps->cqe_buf_pg_sz     = 0;
        caps->cqe_hop_num       = HNS_ROCE_CQE_HOP_NUM;
+       caps->srqwqe_ba_pg_sz   = 0;
+       caps->srqwqe_buf_pg_sz  = 0;
+       caps->srqwqe_hop_num    = HNS_ROCE_SRQWQE_HOP_NUM;
+       caps->idx_ba_pg_sz      = 0;
+       caps->idx_buf_pg_sz     = 0;
+       caps->idx_hop_num       = HNS_ROCE_IDX_HOP_NUM;
        caps->eqe_ba_pg_sz      = 0;
        caps->eqe_buf_pg_sz     = 0;
        caps->eqe_hop_num       = HNS_ROCE_EQE_HOP_NUM;
@@ -1354,8 +1404,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
        caps->local_ca_ack_delay = 0;
        caps->max_mtu = IB_MTU_4096;
 
+       caps->max_srqs          = HNS_ROCE_V2_MAX_SRQ;
+       caps->max_srq_wrs       = HNS_ROCE_V2_MAX_SRQ_WR;
+       caps->max_srq_sges      = HNS_ROCE_V2_MAX_SRQ_SGE;
+
        if (hr_dev->pci_dev->revision == 0x21)
-               caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
+               caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
+                              HNS_ROCE_CAP_FLAG_SRQ;
 
        ret = hns_roce_v2_set_bt(hr_dev);
        if (ret)
@@ -1587,30 +1642,62 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
        hns_roce_free_link_table(hr_dev, &priv->tsq);
 }
 
+static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev)
+{
+       struct hns_roce_cmq_desc desc;
+       struct hns_roce_mbox_status *mb_st =
+                                      (struct hns_roce_mbox_status *)desc.data;
+       enum hns_roce_cmd_return_status status;
+
+       hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true);
+
+       status = hns_roce_cmq_send(hr_dev, &desc, 1);
+       if (status)
+               return status;
+
+       return cpu_to_le32(mb_st->mb_status_hw_run);
+}
+
 static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev)
 {
-       u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
+       u32 status = hns_roce_query_mbox_status(hr_dev);
 
        return status >> HNS_ROCE_HW_RUN_BIT_SHIFT;
 }
 
 static int hns_roce_v2_cmd_complete(struct hns_roce_dev *hr_dev)
 {
-       u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
+       u32 status = hns_roce_query_mbox_status(hr_dev);
 
        return status & HNS_ROCE_HW_MB_STATUS_MASK;
 }
 
+static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param,
+                             u64 out_param, u32 in_modifier, u8 op_modifier,
+                             u16 op, u16 token, int event)
+{
+       struct hns_roce_cmq_desc desc;
+       struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data;
+
+       hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
+
+       mb->in_param_l = cpu_to_le64(in_param);
+       mb->in_param_h = cpu_to_le64(in_param) >> 32;
+       mb->out_param_l = cpu_to_le64(out_param);
+       mb->out_param_h = cpu_to_le64(out_param) >> 32;
+       mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op);
+       mb->token_event_en = cpu_to_le32(event << 16 | token);
+
+       return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
 static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
                                 u64 out_param, u32 in_modifier, u8 op_modifier,
                                 u16 op, u16 token, int event)
 {
        struct device *dev = hr_dev->dev;
-       u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base +
-                                          ROCEE_VF_MB_CFG0_REG);
        unsigned long end;
-       u32 val0 = 0;
-       u32 val1 = 0;
+       int ret;
 
        end = msecs_to_jiffies(HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS) + jiffies;
        while (hns_roce_v2_cmd_pending(hr_dev)) {
@@ -1622,27 +1709,12 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
                cond_resched();
        }
 
-       roce_set_field(val0, HNS_ROCE_VF_MB4_TAG_MASK,
-                      HNS_ROCE_VF_MB4_TAG_SHIFT, in_modifier);
-       roce_set_field(val0, HNS_ROCE_VF_MB4_CMD_MASK,
-                      HNS_ROCE_VF_MB4_CMD_SHIFT, op);
-       roce_set_field(val1, HNS_ROCE_VF_MB5_EVENT_MASK,
-                      HNS_ROCE_VF_MB5_EVENT_SHIFT, event);
-       roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK,
-                      HNS_ROCE_VF_MB5_TOKEN_SHIFT, token);
-
-       writeq(in_param, hcr + 0);
-       writeq(out_param, hcr + 2);
-
-       /* Memory barrier */
-       wmb();
-
-       writel(val0, hcr + 4);
-       writel(val1, hcr + 5);
-
-       mmiowb();
+       ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier,
+                                op_modifier, op, token, event);
+       if (ret)
+               dev_err(dev, "Post mailbox fail(%d)\n", ret);
 
-       return 0;
+       return ret;
 }
 
 static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
@@ -2007,6 +2079,27 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
        return get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
 }
 
+static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+{
+       return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift);
+}
+
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
+{
+       u32 bitmap_num;
+       int bit_num;
+
+       /* always called with interrupts disabled. */
+       spin_lock(&srq->lock);
+
+       bitmap_num = wqe_index / (sizeof(u64) * 8);
+       bit_num = wqe_index % (sizeof(u64) * 8);
+       srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
+       srq->tail++;
+
+       spin_unlock(&srq->lock);
+}
+
 static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
 {
        *hr_cq->set_ci_db = cons_index & 0xffffff;
@@ -2018,6 +2111,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
        struct hns_roce_v2_cqe *cqe, *dest;
        u32 prod_index;
        int nfreed = 0;
+       int wqe_index;
        u8 owner_bit;
 
        for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index);
@@ -2035,7 +2129,13 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
                if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M,
                                    V2_CQE_BYTE_16_LCL_QPN_S) &
                                    HNS_ROCE_V2_CQE_QPN_MASK) == qpn) {
-                       /* In v1 engine, not support SRQ */
+                       if (srq &&
+                           roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S)) {
+                               wqe_index = roce_get_field(cqe->byte_4,
+                                                    V2_CQE_BYTE_4_WQE_INDX_M,
+                                                    V2_CQE_BYTE_4_WQE_INDX_S);
+                               hns_roce_free_srq_wqe(srq, wqe_index);
+                       }
                        ++nfreed;
                } else if (nfreed) {
                        dest = get_cqe_v2(hr_cq, (prod_index + nfreed) &
@@ -2212,6 +2312,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
 static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
                                struct hns_roce_qp **cur_qp, struct ib_wc *wc)
 {
+       struct hns_roce_srq *srq = NULL;
        struct hns_roce_dev *hr_dev;
        struct hns_roce_v2_cqe *cqe;
        struct hns_roce_qp *hr_qp;
@@ -2254,6 +2355,37 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
        wc->qp = &(*cur_qp)->ibqp;
        wc->vendor_err = 0;
 
+       if (is_send) {
+               wq = &(*cur_qp)->sq;
+               if ((*cur_qp)->sq_signal_bits) {
+                       /*
+                        * If sg_signal_bit is 1,
+                        * firstly tail pointer updated to wqe
+                        * which current cqe correspond to
+                        */
+                       wqe_ctr = (u16)roce_get_field(cqe->byte_4,
+                                                     V2_CQE_BYTE_4_WQE_INDX_M,
+                                                     V2_CQE_BYTE_4_WQE_INDX_S);
+                       wq->tail += (wqe_ctr - (u16)wq->tail) &
+                                   (wq->wqe_cnt - 1);
+               }
+
+               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+               ++wq->tail;
+       } else if ((*cur_qp)->ibqp.srq) {
+               srq = to_hr_srq((*cur_qp)->ibqp.srq);
+               wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4,
+                                                    V2_CQE_BYTE_4_WQE_INDX_M,
+                                                    V2_CQE_BYTE_4_WQE_INDX_S));
+               wc->wr_id = srq->wrid[wqe_ctr];
+               hns_roce_free_srq_wqe(srq, wqe_ctr);
+       } else {
+               /* Update tail pointer, record wr_id */
+               wq = &(*cur_qp)->rq;
+               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+               ++wq->tail;
+       }
+
        status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
                                V2_CQE_BYTE_4_STATUS_S);
        switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) {
@@ -2373,23 +2505,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
                        wc->status = IB_WC_GENERAL_ERR;
                        break;
                }
-
-               wq = &(*cur_qp)->sq;
-               if ((*cur_qp)->sq_signal_bits) {
-                       /*
-                        * If sg_signal_bit is 1,
-                        * firstly tail pointer updated to wqe
-                        * which current cqe correspond to
-                        */
-                       wqe_ctr = (u16)roce_get_field(cqe->byte_4,
-                                                     V2_CQE_BYTE_4_WQE_INDX_M,
-                                                     V2_CQE_BYTE_4_WQE_INDX_S);
-                       wq->tail += (wqe_ctr - (u16)wq->tail) &
-                                   (wq->wqe_cnt - 1);
-               }
-
-               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
-               ++wq->tail;
        } else {
                /* RQ correspond to CQE */
                wc->byte_len = le32_to_cpu(cqe->byte_cnt);
@@ -2434,11 +2549,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
                                return -EAGAIN;
                }
 
-               /* Update tail pointer, record wr_id */
-               wq = &(*cur_qp)->rq;
-               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
-               ++wq->tail;
-
                wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M,
                                            V2_CQE_BYTE_32_SL_S);
                wc->src_qp = (u8)roce_get_field(cqe->byte_32,
@@ -2747,6 +2857,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 
        roce_set_field(context->byte_20_smac_sgid_idx,
                       V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+                      (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+                      hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
                       ilog2((unsigned int)hr_qp->rq.wqe_cnt));
        roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
                       V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
@@ -3088,6 +3200,8 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
 
        roce_set_field(context->byte_20_smac_sgid_idx,
                       V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+                      (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+                      hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
                       ilog2((unsigned int)hr_qp->rq.wqe_cnt));
        roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
                       V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
@@ -3601,6 +3715,21 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
        return 0;
 }
 
+static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state,
+                                            enum ib_qp_state new_state)
+{
+
+       if ((cur_state != IB_QPS_RESET &&
+           (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) ||
+           ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) &&
+           (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) ||
+           (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS))
+               return true;
+
+       return false;
+
+}
+
 static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                                 const struct ib_qp_attr *attr,
                                 int attr_mask, enum ib_qp_state cur_state,
@@ -3626,6 +3755,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
         */
        memset(qpc_mask, 0xff, sizeof(*qpc_mask));
        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+               memset(qpc_mask, 0, sizeof(*qpc_mask));
                modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
                                        qpc_mask);
        } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@ -3641,21 +3771,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
                                           qpc_mask);
                if (ret)
                        goto out;
-       } else if ((cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) ||
-                  (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS) ||
-                  (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD) ||
-                  (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD) ||
-                  (cur_state == IB_QPS_SQD && new_state == IB_QPS_RTS) ||
-                  (cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) ||
-                  (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) ||
-                  (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) ||
-                  (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) ||
-                  (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) ||
-                  (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
-                  (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
-                  (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) ||
-                  (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) ||
-                  (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) {
+       } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) {
                /* Nothing */
                ;
        } else {
@@ -3789,6 +3905,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
        if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
                set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
 
+       roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
+                    ibqp->srq ? 1 : 0);
+       roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
+                    V2_QPC_BYTE_108_INV_CREDIT_S, 0);
+
        /* Every status migrate must change state */
        roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
                       V2_QPC_BYTE_60_QP_ST_S, new_state);
@@ -4074,7 +4195,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
                        hns_roce_free_db(hr_dev, &hr_qp->rdb);
        }
 
-       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+            hr_qp->rq.wqe_cnt) {
                kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
                kfree(hr_qp->rq_inl_buf.wqe_list);
        }
@@ -4384,6 +4506,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
        int aeqe_found = 0;
        int event_type;
        int sub_type;
+       u32 srqn;
        u32 qpn;
        u32 cqn;
 
@@ -4406,6 +4529,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
                cqn = roce_get_field(aeqe->event.cq_event.cq,
                                     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
                                     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+               srqn = roce_get_field(aeqe->event.srq_event.srq,
+                                    HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+                                    HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
 
                switch (event_type) {
                case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -4413,13 +4539,14 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
                case HNS_ROCE_EVENT_TYPE_COMM_EST:
                case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
                case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+               case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
                case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
                case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
                        hns_roce_qp_event(hr_dev, qpn, event_type);
                        break;
                case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
-               case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
                case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+                       hns_roce_srq_event(hr_dev, srqn, event_type);
                        break;
                case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
                case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
@@ -4964,13 +5091,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
                                eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
                                size = (eq->entries - eqe_alloc) * eq->eqe_size;
                        }
-                       eq->buf[i] = dma_alloc_coherent(dev, size,
+                       eq->buf[i] = dma_zalloc_coherent(dev, size,
                                                        &(eq->buf_dma[i]),
                                                        GFP_KERNEL);
                        if (!eq->buf[i])
                                goto err_dma_alloc_buf;
 
-                       memset(eq->buf[i], 0, size);
                        *(eq->bt_l0 + i) = eq->buf_dma[i];
 
                        eq_buf_cnt++;
@@ -5000,13 +5126,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
                                        size = (eq->entries - eqe_alloc)
                                                * eq->eqe_size;
                                }
-                               eq->buf[idx] = dma_alloc_coherent(dev, size,
+                               eq->buf[idx] = dma_zalloc_coherent(dev, size,
                                                            &(eq->buf_dma[idx]),
                                                            GFP_KERNEL);
                                if (!eq->buf[idx])
                                        goto err_dma_alloc_buf;
 
-                               memset(eq->buf[idx], 0, size);
                                *(eq->bt_l1[i] + j) = eq->buf_dma[idx];
 
                                eq_buf_cnt++;
@@ -5116,7 +5241,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
                        goto free_cmd_mbox;
                }
 
-               eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz,
+               eq->buf_list->buf = dma_zalloc_coherent(dev, buf_chk_sz,
                                                       &(eq->buf_list->map),
                                                       GFP_KERNEL);
                if (!eq->buf_list->buf) {
@@ -5124,7 +5249,6 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
                        goto err_alloc_buf;
                }
 
-               memset(eq->buf_list->buf, 0, buf_chk_sz);
        } else {
                ret = hns_roce_mhop_alloc_eq(hr_dev, eq);
                if (ret) {
@@ -5332,6 +5456,284 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
        destroy_workqueue(hr_dev->irq_workq);
 }
 
+static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
+                                  struct hns_roce_srq *srq, u32 pdn, u16 xrcd,
+                                  u32 cqn, void *mb_buf, u64 *mtts_wqe,
+                                  u64 *mtts_idx, dma_addr_t dma_handle_wqe,
+                                  dma_addr_t dma_handle_idx)
+{
+       struct hns_roce_srq_context *srq_context;
+
+       srq_context = mb_buf;
+       memset(srq_context, 0, sizeof(*srq_context));
+
+       roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M,
+                      SRQC_BYTE_4_SRQ_ST_S, 1);
+
+       roce_set_field(srq_context->byte_4_srqn_srqst,
+                      SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M,
+                      SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S,
+                      (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+                      hr_dev->caps.srqwqe_hop_num));
+       roce_set_field(srq_context->byte_4_srqn_srqst,
+                      SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
+                      ilog2(srq->max));
+
+       roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
+                      SRQC_BYTE_4_SRQN_S, srq->srqn);
+
+       roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+                      SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+
+       roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M,
+                      SRQC_BYTE_12_SRQ_XRCD_S, xrcd);
+
+       srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));
+
+       roce_set_field(srq_context->byte_24_wqe_bt_ba,
+                      SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
+                      SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
+                      cpu_to_le32(dma_handle_wqe >> 35));
+
+       roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
+                      SRQC_BYTE_28_PD_S, pdn);
+       roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M,
+                      SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
+                      fls(srq->max_gs - 1));
+
+       srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3);
+       srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba);
+       roce_set_field(srq_context->rsv_idx_bt_ba,
+                      SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
+                      SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
+                      cpu_to_le32(dma_handle_idx >> 35));
+
+       srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT);
+       srq_context->idx_cur_blk_addr =
+                                    cpu_to_le32(srq_context->idx_cur_blk_addr);
+       roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+                      SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
+                      SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
+                      cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT)));
+       roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+                      SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
+                      SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
+                      hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+                      hr_dev->caps.idx_hop_num);
+
+       roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+                      SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
+                      SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
+                      hr_dev->caps.idx_ba_pg_sz);
+       roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+                      SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
+                      SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
+                      hr_dev->caps.idx_buf_pg_sz);
+
+       srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT);
+       srq_context->idx_nxt_blk_addr =
+                                  cpu_to_le32(srq_context->idx_nxt_blk_addr);
+       roce_set_field(srq_context->rsv_idxnxtblkaddr,
+                      SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
+                      SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
+                      cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT)));
+       roce_set_field(srq_context->byte_56_xrc_cqn,
+                      SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
+                      cqn);
+       roce_set_field(srq_context->byte_56_xrc_cqn,
+                      SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M,
+                      SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S,
+                      hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET);
+       roce_set_field(srq_context->byte_56_xrc_cqn,
+                      SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M,
+                      SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S,
+                      hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET);
+
+       roce_set_bit(srq_context->db_record_addr_record_en,
+                    SRQC_BYTE_60_SRQ_RECORD_EN_S, 0);
+}
+
+static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
+                                 struct ib_srq_attr *srq_attr,
+                                 enum ib_srq_attr_mask srq_attr_mask,
+                                 struct ib_udata *udata)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+       struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+       struct hns_roce_srq_context *srq_context;
+       struct hns_roce_srq_context *srqc_mask;
+       struct hns_roce_cmd_mailbox *mailbox;
+       int ret;
+
+       if (srq_attr_mask & IB_SRQ_LIMIT) {
+               if (srq_attr->srq_limit >= srq->max)
+                       return -EINVAL;
+
+               mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+               if (IS_ERR(mailbox))
+                       return PTR_ERR(mailbox);
+
+               srq_context = mailbox->buf;
+               srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
+
+               memset(srqc_mask, 0xff, sizeof(*srqc_mask));
+
+               roce_set_field(srq_context->byte_8_limit_wl,
+                              SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+                              SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit);
+               roce_set_field(srqc_mask->byte_8_limit_wl,
+                              SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+                              SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+
+               ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0,
+                                       HNS_ROCE_CMD_MODIFY_SRQC,
+                                       HNS_ROCE_CMD_TIMEOUT_MSECS);
+               hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+               if (ret) {
+                       dev_err(hr_dev->dev,
+                               "MODIFY SRQ Failed to cmd mailbox.\n");
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+       struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+       struct hns_roce_srq_context *srq_context;
+       struct hns_roce_cmd_mailbox *mailbox;
+       int limit_wl;
+       int ret;
+
+       mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+       if (IS_ERR(mailbox))
+               return PTR_ERR(mailbox);
+
+       srq_context = mailbox->buf;
+       ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0,
+                               HNS_ROCE_CMD_QUERY_SRQC,
+                               HNS_ROCE_CMD_TIMEOUT_MSECS);
+       if (ret) {
+               dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n");
+               goto out;
+       }
+
+       limit_wl = roce_get_field(srq_context->byte_8_limit_wl,
+                                 SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+                                 SRQC_BYTE_8_SRQ_LIMIT_WL_S);
+
+       attr->srq_limit = limit_wl;
+       attr->max_wr    = srq->max - 1;
+       attr->max_sge   = srq->max_gs;
+
+       memcpy(srq_context, mailbox->buf, sizeof(*srq_context));
+
+out:
+       hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+       return ret;
+}
+
+static int find_empty_entry(struct hns_roce_idx_que *idx_que)
+{
+       int bit_num;
+       int i;
+
+       /* bitmap[i] is set zero if all bits are allocated */
+       for (i = 0; idx_que->bitmap[i] == 0; ++i)
+               ;
+       bit_num = ffs(idx_que->bitmap[i]);
+       idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1));
+
+       return i * sizeof(u64) * 8 + (bit_num - 1);
+}
+
+static void fill_idx_queue(struct hns_roce_idx_que *idx_que,
+                          int cur_idx, int wqe_idx)
+{
+       unsigned int *addr;
+
+       addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf,
+                                                  cur_idx * idx_que->entry_sz);
+       *addr = wqe_idx;
+}
+
+static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
+                                    const struct ib_recv_wr *wr,
+                                    const struct ib_recv_wr **bad_wr)
+{
+       struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+       struct hns_roce_v2_wqe_data_seg *dseg;
+       struct hns_roce_v2_db srq_db;
+       unsigned long flags;
+       int ret = 0;
+       int wqe_idx;
+       void *wqe;
+       int nreq;
+       int ind;
+       int i;
+
+       spin_lock_irqsave(&srq->lock, flags);
+
+       ind = srq->head & (srq->max - 1);
+
+       for (nreq = 0; wr; ++nreq, wr = wr->next) {
+               if (unlikely(wr->num_sge > srq->max_gs)) {
+                       ret = -EINVAL;
+                       *bad_wr = wr;
+                       break;
+               }
+
+               if (unlikely(srq->head == srq->tail)) {
+                       ret = -ENOMEM;
+                       *bad_wr = wr;
+                       break;
+               }
+
+               wqe_idx = find_empty_entry(&srq->idx_que);
+               fill_idx_queue(&srq->idx_que, ind, wqe_idx);
+               wqe = get_srq_wqe(srq, wqe_idx);
+               dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
+
+               for (i = 0; i < wr->num_sge; ++i) {
+                       dseg[i].len = cpu_to_le32(wr->sg_list[i].length);
+                       dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey);
+                       dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
+               }
+
+               if (i < srq->max_gs) {
+                       dseg->len = 0;
+                       dseg->lkey = cpu_to_le32(0x100);
+                       dseg->addr = 0;
+               }
+
+               srq->wrid[wqe_idx] = wr->wr_id;
+               ind = (ind + 1) & (srq->max - 1);
+       }
+
+       if (likely(nreq)) {
+               srq->head += nreq;
+
+               /*
+                * Make sure that descriptors are written before
+                * doorbell record.
+                */
+               wmb();
+
+               srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn;
+               srq_db.parameter = srq->head;
+
+               hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l);
+
+       }
+
+       spin_unlock_irqrestore(&srq->lock, flags);
+
+       return ret;
+}
+
 static const struct hns_roce_hw hns_roce_hw_v2 = {
        .cmq_init = hns_roce_v2_cmq_init,
        .cmq_exit = hns_roce_v2_cmq_exit,
@@ -5359,6 +5761,10 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
        .poll_cq = hns_roce_v2_poll_cq,
        .init_eq = hns_roce_v2_init_eq_table,
        .cleanup_eq = hns_roce_v2_cleanup_eq_table,
+       .write_srqc = hns_roce_v2_write_srqc,
+       .modify_srq = hns_roce_v2_modify_srq,
+       .query_srq = hns_roce_v2_query_srq,
+       .post_srq_recv = hns_roce_v2_post_srq_recv,
 };
 
 static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
index 8bc820635bbd29e5b27bc9155685daf9db110c70..b72d0443c835349f66120ecd7735192cbd856d21 100644 (file)
 
 #define HNS_ROCE_V2_MAX_QP_NUM                 0x2000
 #define HNS_ROCE_V2_MAX_WQE_NUM                        0x8000
+#define        HNS_ROCE_V2_MAX_SRQ                     0x100000
+#define HNS_ROCE_V2_MAX_SRQ_WR                 0x8000
+#define HNS_ROCE_V2_MAX_SRQ_SGE                        0x100
 #define HNS_ROCE_V2_MAX_CQ_NUM                 0x8000
+#define HNS_ROCE_V2_MAX_SRQ_NUM                        0x100000
 #define HNS_ROCE_V2_MAX_CQE_NUM                        0x10000
+#define HNS_ROCE_V2_MAX_SRQWQE_NUM             0x8000
 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM             0x100
 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM             0xff
+#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM            0x100
 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM         0x200000
 #define HNS_ROCE_V2_MAX_SQ_INLINE              0x20
 #define HNS_ROCE_V2_UAR_NUM                    256
@@ -61,6 +67,8 @@
 #define HNS_ROCE_V2_MAX_MTPT_NUM               0x8000
 #define HNS_ROCE_V2_MAX_MTT_SEGS               0x1000000
 #define HNS_ROCE_V2_MAX_CQE_SEGS               0x1000000
+#define HNS_ROCE_V2_MAX_SRQWQE_SEGS            0x1000000
+#define HNS_ROCE_V2_MAX_IDX_SEGS               0x1000000
 #define HNS_ROCE_V2_MAX_PD_NUM                 0x1000000
 #define HNS_ROCE_V2_MAX_QP_INIT_RDMA           128
 #define HNS_ROCE_V2_MAX_QP_DEST_RDMA           128
@@ -71,6 +79,7 @@
 #define HNS_ROCE_V2_IRRL_ENTRY_SZ              64
 #define HNS_ROCE_V2_TRRL_ENTRY_SZ              48
 #define HNS_ROCE_V2_CQC_ENTRY_SZ               64
+#define HNS_ROCE_V2_SRQC_ENTRY_SZ              64
 #define HNS_ROCE_V2_MTPT_ENTRY_SZ              64
 #define HNS_ROCE_V2_MTT_ENTRY_SZ               64
 #define HNS_ROCE_V2_CQE_ENTRY_SIZE             32
 #define HNS_ROCE_CONTEXT_HOP_NUM               1
 #define HNS_ROCE_MTT_HOP_NUM                   1
 #define HNS_ROCE_CQE_HOP_NUM                   1
+#define HNS_ROCE_SRQWQE_HOP_NUM                        1
 #define HNS_ROCE_PBL_HOP_NUM                   2
 #define HNS_ROCE_EQE_HOP_NUM                   2
+#define HNS_ROCE_IDX_HOP_NUM                   1
 
 #define HNS_ROCE_V2_GID_INDEX_NUM              256
 
        ((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
        (step_idx == 1 && hop_num == 1) || \
        (step_idx == 2 && hop_num == 2))
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT     0
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL   BIT(HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT)
 
 #define CMD_CSQ_DESC_NUM               1024
 #define CMD_CRQ_DESC_NUM               1024
@@ -213,7 +226,10 @@ enum hns_roce_opcode_type {
        HNS_ROCE_OPC_CFG_TMOUT_LLM                      = 0x8404,
        HNS_ROCE_OPC_CFG_SGID_TB                        = 0x8500,
        HNS_ROCE_OPC_CFG_SMAC_TB                        = 0x8501,
+       HNS_ROCE_OPC_POST_MB                            = 0x8504,
+       HNS_ROCE_OPC_QUERY_MB_ST                        = 0x8505,
        HNS_ROCE_OPC_CFG_BT_ATTR                        = 0x8506,
+       HNS_SWITCH_PARAMETER_CFG                        = 0x1033,
 };
 
 enum {
@@ -325,6 +341,90 @@ struct hns_roce_v2_cq_context {
 #define        V2_CQC_BYTE_64_SE_CQE_IDX_S 0
 #define        V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)
 
+struct hns_roce_srq_context {
+       __le32  byte_4_srqn_srqst;
+       __le32  byte_8_limit_wl;
+       __le32  byte_12_xrcd;
+       __le32  byte_16_pi_ci;
+       __le32  wqe_bt_ba;
+       __le32  byte_24_wqe_bt_ba;
+       __le32  byte_28_rqws_pd;
+       __le32  idx_bt_ba;
+       __le32  rsv_idx_bt_ba;
+       __le32  idx_cur_blk_addr;
+       __le32  byte_44_idxbufpgsz_addr;
+       __le32  idx_nxt_blk_addr;
+       __le32  rsv_idxnxtblkaddr;
+       __le32  byte_56_xrc_cqn;
+       __le32  db_record_addr_record_en;
+       __le32  db_record_addr;
+};
+
+#define SRQC_BYTE_4_SRQ_ST_S 0
+#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0)
+
+#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S 2
+#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M GENMASK(3, 2)
+
+#define SRQC_BYTE_4_SRQ_SHIFT_S 4
+#define SRQC_BYTE_4_SRQ_SHIFT_M GENMASK(7, 4)
+
+#define SRQC_BYTE_4_SRQN_S 8
+#define SRQC_BYTE_4_SRQN_M GENMASK(31, 8)
+
+#define SRQC_BYTE_8_SRQ_LIMIT_WL_S 0
+#define SRQC_BYTE_8_SRQ_LIMIT_WL_M GENMASK(15, 0)
+
+#define SRQC_BYTE_12_SRQ_XRCD_S 0
+#define SRQC_BYTE_12_SRQ_XRCD_M GENMASK(23, 0)
+
+#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_S 0
+#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_M GENMASK(15, 0)
+
+#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_S 0
+#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_M GENMASK(31, 16)
+
+#define SRQC_BYTE_24_SRQ_WQE_BT_BA_S 0
+#define SRQC_BYTE_24_SRQ_WQE_BT_BA_M GENMASK(28, 0)
+
+#define SRQC_BYTE_28_PD_S 0
+#define SRQC_BYTE_28_PD_M GENMASK(23, 0)
+
+#define SRQC_BYTE_28_RQWS_S 24
+#define SRQC_BYTE_28_RQWS_M GENMASK(27, 24)
+
+#define SRQC_BYTE_36_SRQ_IDX_BT_BA_S 0
+#define SRQC_BYTE_36_SRQ_IDX_BT_BA_M GENMASK(28, 0)
+
+#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S 0
+#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M GENMASK(19, 0)
+
+#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S 22
+#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M GENMASK(23, 22)
+
+#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S 24
+#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M GENMASK(27, 24)
+
+#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S 28
+#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M GENMASK(31, 28)
+
+#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S 0
+#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M GENMASK(19, 0)
+
+#define SRQC_BYTE_56_SRQ_XRC_CQN_S 0
+#define SRQC_BYTE_56_SRQ_XRC_CQN_M GENMASK(23, 0)
+
+#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S 24
+#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M GENMASK(27, 24)
+
+#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S 28
+#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M GENMASK(31, 28)
+
+#define SRQC_BYTE_60_SRQ_RECORD_EN_S 0
+
+#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1
+#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1)
+
 enum{
        V2_MPT_ST_VALID = 0x1,
        V2_MPT_ST_FREE  = 0x2,
@@ -1289,6 +1389,36 @@ struct hns_roce_vf_res_b {
 #define VF_RES_B_DATA_3_VF_SL_NUM_S 16
 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16)
 
+struct hns_roce_vf_switch {
+       __le32 rocee_sel;
+       __le32 fun_id;
+       __le32 cfg;
+       __le32 resv1;
+       __le32 resv2;
+       __le32 resv3;
+};
+
+#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3
+#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3)
+
+#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1
+#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2
+#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3
+
+struct hns_roce_post_mbox {
+       __le32  in_param_l;
+       __le32  in_param_h;
+       __le32  out_param_l;
+       __le32  out_param_h;
+       __le32  cmd_tag;
+       __le32  token_event_en;
+};
+
+struct hns_roce_mbox_status {
+       __le32  mb_status_hw_run;
+       __le32  rsv[5];
+};
+
 struct hns_roce_cfg_bt_attr {
        __le32 vf_qpc_cfg;
        __le32 vf_srqc_cfg;
@@ -1372,18 +1502,6 @@ struct hns_roce_cmq_desc {
 #define HNS_ROCE_HW_RUN_BIT_SHIFT      31
 #define HNS_ROCE_HW_MB_STATUS_MASK     0xFF
 
-#define HNS_ROCE_VF_MB4_TAG_MASK       0xFFFFFF00
-#define HNS_ROCE_VF_MB4_TAG_SHIFT      8
-
-#define HNS_ROCE_VF_MB4_CMD_MASK       0xFF
-#define HNS_ROCE_VF_MB4_CMD_SHIFT      0
-
-#define HNS_ROCE_VF_MB5_EVENT_MASK     0x10000
-#define HNS_ROCE_VF_MB5_EVENT_SHIFT    16
-
-#define HNS_ROCE_VF_MB5_TOKEN_MASK     0xFFFF
-#define HNS_ROCE_VF_MB5_TOKEN_SHIFT    0
-
 struct hns_roce_v2_cmq_ring {
        dma_addr_t desc_dma_addr;
        struct hns_roce_cmq_desc *desc;
index 1b3ee514f2ef1f1f21c1b4e84d2df12d176253d9..65ba43cee81075cf1565f1903553c60639b9360d 100644 (file)
@@ -220,6 +220,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
                            IB_ATOMIC_HCA : IB_ATOMIC_NONE;
        props->max_pkeys = 1;
        props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+               props->max_srq = hr_dev->caps.max_srqs;
+               props->max_srq_wr = hr_dev->caps.max_srq_wrs;
+               props->max_srq_sge = hr_dev->caps.max_srq_sges;
+       }
 
        return 0;
 }
@@ -541,6 +546,21 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
                ib_dev->map_mr_sg               = hns_roce_map_mr_sg;
        }
 
+       /* SRQ */
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+               ib_dev->create_srq = hns_roce_create_srq;
+               ib_dev->modify_srq = hr_dev->hw->modify_srq;
+               ib_dev->query_srq = hr_dev->hw->query_srq;
+               ib_dev->destroy_srq = hns_roce_destroy_srq;
+               ib_dev->post_srq_recv = hr_dev->hw->post_srq_recv;
+               ib_dev->uverbs_cmd_mask |=
+                               (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) |
+                               (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+                               (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) |
+                               (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+                               (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+       }
+
        /* OTHERS */
        ib_dev->get_port_immutable      = hns_roce_port_immutable;
        ib_dev->disassociate_ucontext   = hns_roce_disassociate_ucontext;
@@ -646,8 +666,58 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
                goto err_unmap_trrl;
        }
 
+       if (hr_dev->caps.srqc_entry_sz) {
+               ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table,
+                                             HEM_TYPE_SRQC,
+                                             hr_dev->caps.srqc_entry_sz,
+                                             hr_dev->caps.num_srqs, 1);
+               if (ret) {
+                       dev_err(dev,
+                             "Failed to init SRQ context memory, aborting.\n");
+                       goto err_unmap_cq;
+               }
+       }
+
+       if (hr_dev->caps.num_srqwqe_segs) {
+               ret = hns_roce_init_hem_table(hr_dev,
+                                            &hr_dev->mr_table.mtt_srqwqe_table,
+                                            HEM_TYPE_SRQWQE,
+                                            hr_dev->caps.mtt_entry_sz,
+                                            hr_dev->caps.num_srqwqe_segs, 1);
+               if (ret) {
+                       dev_err(dev,
+                               "Failed to init MTT srqwqe memory, aborting.\n");
+                       goto err_unmap_srq;
+               }
+       }
+
+       if (hr_dev->caps.num_idx_segs) {
+               ret = hns_roce_init_hem_table(hr_dev,
+                                             &hr_dev->mr_table.mtt_idx_table,
+                                             HEM_TYPE_IDX,
+                                             hr_dev->caps.idx_entry_sz,
+                                             hr_dev->caps.num_idx_segs, 1);
+               if (ret) {
+                       dev_err(dev,
+                               "Failed to init MTT idx memory, aborting.\n");
+                       goto err_unmap_srqwqe;
+               }
+       }
+
        return 0;
 
+err_unmap_srqwqe:
+       if (hr_dev->caps.num_srqwqe_segs)
+               hns_roce_cleanup_hem_table(hr_dev,
+                                          &hr_dev->mr_table.mtt_srqwqe_table);
+
+err_unmap_srq:
+       if (hr_dev->caps.srqc_entry_sz)
+               hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table);
+
+err_unmap_cq:
+       hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
+
 err_unmap_trrl:
        if (hr_dev->caps.trrl_entry_sz)
                hns_roce_cleanup_hem_table(hr_dev,
@@ -727,8 +797,21 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
                goto err_cq_table_free;
        }
 
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+               ret = hns_roce_init_srq_table(hr_dev);
+               if (ret) {
+                       dev_err(dev,
+                               "Failed to init share receive queue table.\n");
+                       goto err_qp_table_free;
+               }
+       }
+
        return 0;
 
+err_qp_table_free:
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+               hns_roce_cleanup_qp_table(hr_dev);
+
 err_cq_table_free:
        hns_roce_cleanup_cq_table(hr_dev);
 
index 521ad2aa3a4ef6df2820b2735cdeff91eabe007f..ee5991bd4171cd3d88bb5f608669eee059128211 100644 (file)
@@ -184,12 +184,27 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
        struct hns_roce_buddy *buddy;
        int ret;
 
-       if (mtt_type == MTT_TYPE_WQE) {
+       switch (mtt_type) {
+       case MTT_TYPE_WQE:
                buddy = &mr_table->mtt_buddy;
                table = &mr_table->mtt_table;
-       } else {
+               break;
+       case MTT_TYPE_CQE:
                buddy = &mr_table->mtt_cqe_buddy;
                table = &mr_table->mtt_cqe_table;
+               break;
+       case MTT_TYPE_SRQWQE:
+               buddy = &mr_table->mtt_srqwqe_buddy;
+               table = &mr_table->mtt_srqwqe_table;
+               break;
+       case MTT_TYPE_IDX:
+               buddy = &mr_table->mtt_idx_buddy;
+               table = &mr_table->mtt_idx_table;
+               break;
+       default:
+               dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n",
+                       mtt_type);
+               return -EINVAL;
        }
 
        ret = hns_roce_buddy_alloc(buddy, order, seg);
@@ -242,18 +257,40 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
        if (mtt->order < 0)
                return;
 
-       if (mtt->mtt_type == MTT_TYPE_WQE) {
+       switch (mtt->mtt_type) {
+       case MTT_TYPE_WQE:
                hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
                                    mtt->order);
                hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
                                        mtt->first_seg,
                                        mtt->first_seg + (1 << mtt->order) - 1);
-       } else {
+               break;
+       case MTT_TYPE_CQE:
                hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
                                    mtt->order);
                hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
                                        mtt->first_seg,
                                        mtt->first_seg + (1 << mtt->order) - 1);
+               break;
+       case MTT_TYPE_SRQWQE:
+               hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg,
+                                   mtt->order);
+               hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table,
+                                       mtt->first_seg,
+                                       mtt->first_seg + (1 << mtt->order) - 1);
+               break;
+       case MTT_TYPE_IDX:
+               hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg,
+                                   mtt->order);
+               hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table,
+                                       mtt->first_seg,
+                                       mtt->first_seg + (1 << mtt->order) - 1);
+               break;
+       default:
+               dev_err(hr_dev->dev,
+                       "Unsupport mtt type %d, clean mtt failed\n",
+                       mtt->mtt_type);
+               break;
        }
 }
 EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup);
@@ -713,10 +750,26 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
        u32 bt_page_size;
        u32 i;
 
-       if (mtt->mtt_type == MTT_TYPE_WQE)
+       switch (mtt->mtt_type) {
+       case MTT_TYPE_WQE:
+               table = &hr_dev->mr_table.mtt_table;
                bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
-       else
+               break;
+       case MTT_TYPE_CQE:
+               table = &hr_dev->mr_table.mtt_cqe_table;
                bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
+               break;
+       case MTT_TYPE_SRQWQE:
+               table = &hr_dev->mr_table.mtt_srqwqe_table;
+               bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
+               break;
+       case MTT_TYPE_IDX:
+               table = &hr_dev->mr_table.mtt_idx_table;
+               bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
+               break;
+       default:
+               return -EINVAL;
+       }
 
        /* All MTTs must fit in the same page */
        if (start_index / (bt_page_size / sizeof(u64)) !=
@@ -726,11 +779,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
        if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
                return -EINVAL;
 
-       if (mtt->mtt_type == MTT_TYPE_WQE)
-               table = &hr_dev->mr_table.mtt_table;
-       else
-               table = &hr_dev->mr_table.mtt_cqe_table;
-
        mtts = hns_roce_table_find(hr_dev, table,
                                mtt->first_seg + s / hr_dev->caps.mtt_entry_sz,
                                &dma_handle);
@@ -759,10 +807,25 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
        if (mtt->order < 0)
                return -EINVAL;
 
-       if (mtt->mtt_type == MTT_TYPE_WQE)
+       switch (mtt->mtt_type) {
+       case MTT_TYPE_WQE:
                bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
-       else
+               break;
+       case MTT_TYPE_CQE:
                bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
+               break;
+       case MTT_TYPE_SRQWQE:
+               bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
+               break;
+       case MTT_TYPE_IDX:
+               bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
+               break;
+       default:
+               dev_err(hr_dev->dev,
+                       "Unsupport mtt type %d, write mtt failed\n",
+                       mtt->mtt_type);
+               return -EINVAL;
+       }
 
        while (npages > 0) {
                chunk = min_t(int, bt_page_size / sizeof(u64), npages);
@@ -828,8 +891,31 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
                if (ret)
                        goto err_buddy_cqe;
        }
+
+       if (hr_dev->caps.num_srqwqe_segs) {
+               ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy,
+                                         ilog2(hr_dev->caps.num_srqwqe_segs));
+               if (ret)
+                       goto err_buddy_srqwqe;
+       }
+
+       if (hr_dev->caps.num_idx_segs) {
+               ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy,
+                                         ilog2(hr_dev->caps.num_idx_segs));
+               if (ret)
+                       goto err_buddy_idx;
+       }
+
        return 0;
 
+err_buddy_idx:
+       if (hr_dev->caps.num_srqwqe_segs)
+               hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
+
+err_buddy_srqwqe:
+       if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
+               hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
+
 err_buddy_cqe:
        hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
 
@@ -842,6 +928,10 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
 {
        struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 
+       if (hr_dev->caps.num_idx_segs)
+               hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy);
+       if (hr_dev->caps.num_srqwqe_segs)
+               hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
        hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
        if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
                hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
@@ -897,8 +987,25 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
        u32 bt_page_size;
        u32 n;
 
-       order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz :
-               hr_dev->caps.cqe_ba_pg_sz;
+       switch (mtt->mtt_type) {
+       case MTT_TYPE_WQE:
+               order = hr_dev->caps.mtt_ba_pg_sz;
+               break;
+       case MTT_TYPE_CQE:
+               order = hr_dev->caps.cqe_ba_pg_sz;
+               break;
+       case MTT_TYPE_SRQWQE:
+               order = hr_dev->caps.srqwqe_ba_pg_sz;
+               break;
+       case MTT_TYPE_IDX:
+               order = hr_dev->caps.idx_ba_pg_sz;
+               break;
+       default:
+               dev_err(dev, "Unsupport mtt type %d, write mtt failed\n",
+                       mtt->mtt_type);
+               return -EINVAL;
+       }
+
        bt_page_size = 1 << (order + PAGE_SHIFT);
 
        pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
@@ -1021,14 +1128,14 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                        goto err_umem;
                }
        } else {
-               int pbl_size = 1;
+               u64 pbl_size = 1;
 
                bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8;
                for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
                        pbl_size *= bt_size;
                if (n > pbl_size) {
                        dev_err(dev,
-                           " MR len %lld err. MR page num is limited to %d!\n",
+                           " MR len %lld err. MR page num is limited to %lld!\n",
                            length, pbl_size);
                        ret = -EINVAL;
                        goto err_umem;
index 5ebf481a39d9ddc71b18d3e55a3c18e87225a552..52d2b299b3be252b90ddf040c43a9fc8632183c8 100644 (file)
@@ -280,7 +280,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
 EXPORT_SYMBOL_GPL(hns_roce_release_range_qp);
 
 static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
-                               struct ib_qp_cap *cap, int is_user, int has_srq,
+                               struct ib_qp_cap *cap, int is_user, int has_rq,
                                struct hns_roce_qp *hr_qp)
 {
        struct device *dev = hr_dev->dev;
@@ -294,14 +294,12 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
                return -EINVAL;
        }
 
-       /* If srq exit, set zero for relative number of rq */
-       if (has_srq) {
-               if (cap->max_recv_wr) {
-                       dev_dbg(dev, "srq no need config max_recv_wr\n");
-                       return -EINVAL;
-               }
-
-               hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0;
+       /* If srq exist, set zero for relative number of rq */
+       if (!has_rq) {
+               hr_qp->rq.wqe_cnt = 0;
+               hr_qp->rq.max_gs = 0;
+               cap->max_recv_wr = 0;
+               cap->max_recv_sge = 0;
        } else {
                if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
                        dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n");
@@ -563,13 +561,14 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR);
 
        ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject,
-                                  !!init_attr->srq, hr_qp);
+                                  hns_roce_qp_has_rq(init_attr), hr_qp);
        if (ret) {
                dev_err(dev, "hns_roce_set_rq_size failed\n");
                goto err_out;
        }
 
-       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+       if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+           hns_roce_qp_has_rq(init_attr)) {
                /* allocate recv inline buf */
                hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt,
                                               sizeof(struct hns_roce_rinl_wqe),
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
new file mode 100644 (file)
index 0000000..463df60
--- /dev/null
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018 Hisilicon Limited.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/hns-abi.h>
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_hem.h"
+
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
+{
+       struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+       struct hns_roce_srq *srq;
+
+       xa_lock(&srq_table->xa);
+       srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1));
+       if (srq)
+               atomic_inc(&srq->refcount);
+       xa_unlock(&srq_table->xa);
+
+       if (!srq) {
+               dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn);
+               return;
+       }
+
+       srq->event(srq, event_type);
+
+       if (atomic_dec_and_test(&srq->refcount))
+               complete(&srq->free);
+}
+EXPORT_SYMBOL_GPL(hns_roce_srq_event);
+
+static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
+                                 enum hns_roce_event event_type)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+       struct ib_srq *ibsrq = &srq->ibsrq;
+       struct ib_event event;
+
+       if (ibsrq->event_handler) {
+               event.device      = ibsrq->device;
+               event.element.srq = ibsrq;
+               switch (event_type) {
+               case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+                       event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+                       break;
+               case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+                       event.event = IB_EVENT_SRQ_ERR;
+                       break;
+               default:
+                       dev_err(hr_dev->dev,
+                          "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
+                          event_type, srq->srqn);
+                       return;
+               }
+
+               ibsrq->event_handler(&event, ibsrq->srq_context);
+       }
+}
+
+static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev,
+                             struct hns_roce_cmd_mailbox *mailbox,
+                             unsigned long srq_num)
+{
+       return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0,
+                                HNS_ROCE_CMD_SW2HW_SRQ,
+                                HNS_ROCE_CMD_TIMEOUT_MSECS);
+}
+
+static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev,
+                            struct hns_roce_cmd_mailbox *mailbox,
+                            unsigned long srq_num)
+{
+       return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
+                                mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ,
+                                HNS_ROCE_CMD_TIMEOUT_MSECS);
+}
+
+int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, u16 xrcd,
+                      struct hns_roce_mtt *hr_mtt, u64 db_rec_addr,
+                      struct hns_roce_srq *srq)
+{
+       struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+       struct hns_roce_cmd_mailbox *mailbox;
+       dma_addr_t dma_handle_wqe;
+       dma_addr_t dma_handle_idx;
+       u64 *mtts_wqe;
+       u64 *mtts_idx;
+       int ret;
+
+       /* Get the physical address of srq buf */
+       mtts_wqe = hns_roce_table_find(hr_dev,
+                                      &hr_dev->mr_table.mtt_srqwqe_table,
+                                      srq->mtt.first_seg,
+                                      &dma_handle_wqe);
+       if (!mtts_wqe) {
+               dev_err(hr_dev->dev,
+                       "SRQ alloc.Failed to find srq buf addr.\n");
+               return -EINVAL;
+       }
+
+       /* Get physical address of idx que buf */
+       mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table,
+                                      srq->idx_que.mtt.first_seg,
+                                      &dma_handle_idx);
+       if (!mtts_idx) {
+               dev_err(hr_dev->dev,
+                       "SRQ alloc.Failed to find idx que buf addr.\n");
+               return -EINVAL;
+       }
+
+       ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
+       if (ret == -1) {
+               dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n");
+               return -ENOMEM;
+       }
+
+       ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
+       if (ret)
+               goto err_out;
+
+       ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+       if (ret)
+               goto err_put;
+
+       mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+       if (IS_ERR(mailbox)) {
+               ret = PTR_ERR(mailbox);
+               goto err_xa;
+       }
+
+       hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf,
+                              mtts_wqe, mtts_idx, dma_handle_wqe,
+                              dma_handle_idx);
+
+       ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn);
+       hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+       if (ret)
+               goto err_xa;
+
+       atomic_set(&srq->refcount, 1);
+       init_completion(&srq->free);
+       return ret;
+
+err_xa:
+       xa_erase(&srq_table->xa, srq->srqn);
+
+err_put:
+       hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+
+err_out:
+       hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
+       return ret;
+}
+
+void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+       struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+       int ret;
+
+       ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn);
+       if (ret)
+               dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n",
+                       ret, srq->srqn);
+
+       xa_erase(&srq_table->xa, srq->srqn);
+
+       if (atomic_dec_and_test(&srq->refcount))
+               complete(&srq->free);
+       wait_for_completion(&srq->free);
+
+       hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+       hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
+}
+
+static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
+                                  u32 page_shift)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+       struct hns_roce_idx_que *idx_que = &srq->idx_que;
+       u32 bitmap_num;
+       int i;
+
+       bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64));
+
+       idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL);
+       if (!idx_que->bitmap)
+               return -ENOMEM;
+
+       bitmap_num = bitmap_num / (8 * sizeof(u64));
+
+       idx_que->buf_size = srq->idx_que.buf_size;
+
+       if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2,
+                              &idx_que->idx_buf, page_shift)) {
+               kfree(idx_que->bitmap);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < bitmap_num; i++)
+               idx_que->bitmap[i] = ~(0UL);
+
+       return 0;
+}
+
+struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
+                                  struct ib_srq_init_attr *srq_init_attr,
+                                  struct ib_udata *udata)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+       struct hns_roce_srq *srq;
+       int srq_desc_size;
+       int srq_buf_size;
+       u32 page_shift;
+       int ret = 0;
+       u32 npages;
+       u32 cqn;
+
+       /* Check the actual SRQ wqe and SRQ sge num */
+       if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
+           srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
+               return ERR_PTR(-EINVAL);
+
+       srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+       if (!srq)
+               return ERR_PTR(-ENOMEM);
+
+       mutex_init(&srq->mutex);
+       spin_lock_init(&srq->lock);
+
+       srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
+       srq->max_gs = srq_init_attr->attr.max_sge;
+
+       srq_desc_size = max(16, 16 * srq->max_gs);
+
+       srq->wqe_shift = ilog2(srq_desc_size);
+
+       srq_buf_size = srq->max * srq_desc_size;
+
+       srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ;
+       srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz;
+       srq->mtt.mtt_type = MTT_TYPE_SRQWQE;
+       srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX;
+
+       if (udata) {
+               struct hns_roce_ib_create_srq  ucmd;
+
+               if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+                       ret = -EFAULT;
+                       goto err_srq;
+               }
+
+               srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+                                       srq_buf_size, 0, 0);
+               if (IS_ERR(srq->umem)) {
+                       ret = PTR_ERR(srq->umem);
+                       goto err_srq;
+               }
+
+               if (hr_dev->caps.srqwqe_buf_pg_sz) {
+                       npages = (ib_umem_page_count(srq->umem) +
+                                 (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
+                                 (1 << hr_dev->caps.srqwqe_buf_pg_sz);
+                       page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+                       ret = hns_roce_mtt_init(hr_dev, npages,
+                                               page_shift,
+                                               &srq->mtt);
+               } else
+                       ret = hns_roce_mtt_init(hr_dev,
+                                               ib_umem_page_count(srq->umem),
+                                               srq->umem->page_shift,
+                                               &srq->mtt);
+               if (ret)
+                       goto err_buf;
+
+               ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
+               if (ret)
+                       goto err_srq_mtt;
+
+               /* config index queue BA */
+               srq->idx_que.umem = ib_umem_get(pd->uobject->context,
+                                               ucmd.que_addr,
+                                               srq->idx_que.buf_size, 0, 0);
+               if (IS_ERR(srq->idx_que.umem)) {
+                       dev_err(hr_dev->dev,
+                               "ib_umem_get error for index queue\n");
+                       goto err_srq_mtt;
+               }
+
+               if (hr_dev->caps.idx_buf_pg_sz) {
+                       npages = (ib_umem_page_count(srq->idx_que.umem) +
+                                 (1 << hr_dev->caps.idx_buf_pg_sz) - 1) /
+                                 (1 << hr_dev->caps.idx_buf_pg_sz);
+                       page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+                       ret = hns_roce_mtt_init(hr_dev, npages,
+                                               page_shift, &srq->idx_que.mtt);
+               } else {
+                       ret = hns_roce_mtt_init(hr_dev,
+                                      ib_umem_page_count(srq->idx_que.umem),
+                                      srq->idx_que.umem->page_shift,
+                                      &srq->idx_que.mtt);
+               }
+
+               if (ret) {
+                       dev_err(hr_dev->dev,
+                               "hns_roce_mtt_init error for idx que\n");
+                       goto err_idx_mtt;
+               }
+
+               ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
+                                                srq->idx_que.umem);
+               if (ret) {
+                       dev_err(hr_dev->dev,
+                             "hns_roce_ib_umem_write_mtt error for idx que\n");
+                       goto err_idx_buf;
+               }
+       } else {
+               page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+               if (hns_roce_buf_alloc(hr_dev, srq_buf_size,
+                                     (1 << page_shift) * 2,
+                                     &srq->buf, page_shift)) {
+                       ret = -ENOMEM;
+                       goto err_srq;
+               }
+
+               srq->head = 0;
+               srq->tail = srq->max - 1;
+
+               ret = hns_roce_mtt_init(hr_dev, srq->buf.npages,
+                                       srq->buf.page_shift, &srq->mtt);
+               if (ret)
+                       goto err_buf;
+
+               ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
+               if (ret)
+                       goto err_srq_mtt;
+
+               page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+               ret = hns_roce_create_idx_que(pd, srq, page_shift);
+               if (ret) {
+                       dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n",
+                               ret);
+                       goto err_srq_mtt;
+               }
+
+               /* Init mtt table for idx_que */
+               ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
+                                       srq->idx_que.idx_buf.page_shift,
+                                       &srq->idx_que.mtt);
+               if (ret)
+                       goto err_create_idx;
+
+               /* Write buffer address into the mtt table */
+               ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
+                                            &srq->idx_que.idx_buf);
+               if (ret)
+                       goto err_idx_buf;
+
+               srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
+               if (!srq->wrid) {
+                       ret = -ENOMEM;
+                       goto err_idx_buf;
+               }
+       }
+
+       cqn = ib_srq_has_cq(srq_init_attr->srq_type) ?
+             to_hr_cq(srq_init_attr->ext.cq)->cqn : 0;
+
+       srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
+
+       ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(pd)->pdn, cqn, 0,
+                                &srq->mtt, 0, srq);
+       if (ret)
+               goto err_wrid;
+
+       srq->event = hns_roce_ib_srq_event;
+       srq->ibsrq.ext.xrc.srq_num = srq->srqn;
+
+       if (pd->uobject) {
+               if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
+                       ret = -EFAULT;
+                       goto err_wrid;
+               }
+       }
+
+       return &srq->ibsrq;
+
+err_wrid:
+       kvfree(srq->wrid);
+
+err_idx_buf:
+       hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+
+err_idx_mtt:
+       if (udata)
+               ib_umem_release(srq->idx_que.umem);
+
+err_create_idx:
+       hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
+                         &srq->idx_que.idx_buf);
+       kfree(srq->idx_que.bitmap);
+
+err_srq_mtt:
+       hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+err_buf:
+       if (udata)
+               ib_umem_release(srq->umem);
+       else
+               hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+
+err_srq:
+       kfree(srq);
+       return ERR_PTR(ret);
+}
+
+int hns_roce_destroy_srq(struct ib_srq *ibsrq)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+       struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+
+       hns_roce_srq_free(hr_dev, srq);
+       hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+       if (ibsrq->uobject) {
+               hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+               ib_umem_release(srq->idx_que.umem);
+               ib_umem_release(srq->umem);
+       } else {
+               kvfree(srq->wrid);
+               hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift,
+                                 &srq->buf);
+       }
+
+       kfree(srq);
+
+       return 0;
+}
+
+int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
+{
+       struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+
+       xa_init(&srq_table->xa);
+
+       return hns_roce_bitmap_init(&srq_table->bitmap, hr_dev->caps.num_srqs,
+                                   hr_dev->caps.num_srqs - 1,
+                                   hr_dev->caps.reserved_srqs, 0);
+}
+
+void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev)
+{
+       hns_roce_bitmap_cleanup(&hr_dev->srq_table.bitmap);
+}
index 102875872bea7aaa40c1c200d6f57723beb07508..a773d1edf7fd80ed0b1fc1af7ba92b0dd621c0f8 100644 (file)
@@ -2721,25 +2721,6 @@ static int i40iw_query_pkey(struct ib_device *ibdev,
        return 0;
 }
 
-/**
- * i40iw_get_vector_affinity - report IRQ affinity mask
- * @ibdev: IB device
- * @comp_vector: completion vector index
- */
-static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev,
-                                                      int comp_vector)
-{
-       struct i40iw_device *iwdev = to_iwdev(ibdev);
-       struct i40iw_msix_vector *msix_vec;
-
-       if (iwdev->msix_shared)
-               msix_vec = &iwdev->iw_msixtbl[comp_vector];
-       else
-               msix_vec = &iwdev->iw_msixtbl[comp_vector + 1];
-
-       return irq_get_affinity_mask(msix_vec->irq);
-}
-
 /**
  * i40iw_init_rdma_device - initialization of iwarp device
  * @iwdev: iwarp device
@@ -2832,7 +2813,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
        iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
        iwibdev->ibdev.post_send = i40iw_post_send;
        iwibdev->ibdev.post_recv = i40iw_post_recv;
-       iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity;
 
        return iwibdev;
 }
index 0def2323459cffc0b199819c148430fe173fd010..b73b5fa1822a7f5ae872b526eb11da133d8963b4 100644 (file)
@@ -2352,6 +2352,32 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
                     event == NETDEV_UP || event == NETDEV_CHANGE))
                        update_qps_port = port;
 
+               if (dev == iboe->netdevs[port - 1] &&
+                   (event == NETDEV_UP || event == NETDEV_DOWN)) {
+                       enum ib_port_state port_state;
+                       struct ib_event ibev = { };
+
+                       if (ib_get_cached_port_state(&ibdev->ib_dev, port,
+                                                    &port_state))
+                               continue;
+
+                       if (event == NETDEV_UP &&
+                           (port_state != IB_PORT_ACTIVE ||
+                            iboe->last_port_state[port - 1] != IB_PORT_DOWN))
+                               continue;
+                       if (event == NETDEV_DOWN &&
+                           (port_state != IB_PORT_DOWN ||
+                            iboe->last_port_state[port - 1] != IB_PORT_ACTIVE))
+                               continue;
+                       iboe->last_port_state[port - 1] = port_state;
+
+                       ibev.device = &ibdev->ib_dev;
+                       ibev.element.port_num = port;
+                       ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
+                                                         IB_EVENT_PORT_ERR;
+                       ib_dispatch_event(&ibev);
+               }
+
        }
        spin_unlock_bh(&iboe->lock);
 
@@ -2710,6 +2736,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        for (i = 0; i < ibdev->num_ports; ++i) {
                mutex_init(&ibdev->counters_table[i].mutex);
                INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
+               iboe->last_port_state[i] = IB_PORT_DOWN;
        }
 
        num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
index 8850dfc3826d01813f8d6dd30ede5a70509bd643..5cb52424912e2fdcc6171f20d63f6fa65a29f9e7 100644 (file)
@@ -519,6 +519,7 @@ struct mlx4_ib_iboe {
        atomic64_t              mac[MLX4_MAX_PORTS];
        struct notifier_block   nb;
        struct mlx4_port_gid_table gids[MLX4_MAX_PORTS];
+       enum ib_port_state      last_port_state[MLX4_MAX_PORTS];
 };
 
 struct pkey_mgt {
index 752bdd536130bf5ea31c578325c09751554ecbd8..ea1f3a081b05a7ae5a42c9d9b2fe4374e7c02b7b 100644 (file)
@@ -353,16 +353,12 @@ static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
 
 static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
 {
-       char base_name[9];
-
-       /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
-       strlcpy(name, pci_name(dev->dev->persist->pdev), max);
-       strncpy(base_name, name, 8); /*till xxxx:yy:*/
-       base_name[8] = '\0';
-       /* with no ARI only 3 last bits are used so when the fn is higher than 8
+       /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n
+        * with no ARI only 3 last bits are used so when the fn is higher than 8
         * need to add it to the dev num, so count in the last number will be
         * modulo 8 */
-       sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
+       snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev),
+                i / 8, i % 8);
 }
 
 struct mlx4_port {
index b8e4b15e2674b963428d137b00978b732179dffe..33f5adb14e4ef17075a9c9a3aaa6d5741cc0c0de 100644 (file)
@@ -1,6 +1,8 @@
 obj-$(CONFIG_MLX5_INFINIBAND)  += mlx5_ib.o
 
-mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
+mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
+               srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \
+               cong.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
 mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
 mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
index 7d769b5538b4a275796dcfd8b7545152239667f5..0b99f7d0630dcb77364d5adc06429685c2933ce9 100644 (file)
@@ -35,6 +35,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_cache.h>
 #include "mlx5_ib.h"
+#include "srq.h"
 
 static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
 {
@@ -177,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
                struct mlx5_core_srq *msrq = NULL;
 
                if (qp->ibqp.xrcd) {
-                       msrq = mlx5_core_get_srq(dev->mdev,
-                                                be32_to_cpu(cqe->srqn));
+                       msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
                        srq = to_mibsrq(msrq);
                } else {
                        srq = to_msrq(qp->ibqp.srq);
index 61aab7c0c5135b2ea67a719943c2211de7a20932..5271469aad10cd8ad5a620d325a7003bf2277ef7 100644 (file)
@@ -9,6 +9,7 @@
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
 #include <rdma/ib_umem.h>
+#include <rdma/uverbs_std_types.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_ib.h"
@@ -40,29 +41,37 @@ struct devx_umem_reg_cmd {
        u32                             out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
 };
 
-static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
+static struct mlx5_ib_ucontext *
+devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
 {
-       return to_mucontext(ib_uverbs_get_ucontext(file));
+       return to_mucontext(ib_uverbs_get_ucontext(attrs));
 }
 
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
 {
        u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
        u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
        u64 general_obj_types;
-       void *hdr;
+       void *hdr, *uctx;
        int err;
        u16 uid;
+       u32 cap = 0;
 
        hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
+       uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
 
        general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types);
        if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) ||
            !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
                return -EINVAL;
 
+       if (is_user && capable(CAP_NET_RAW) &&
+           (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
+               cap |= MLX5_UCTX_CAP_RAW_TX;
+
        MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
        MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
+       MLX5_SET(uctx, uctx, cap, cap);
 
        err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
        if (err)
@@ -106,6 +115,21 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
        }
 }
 
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id)
+{
+       struct devx_obj *devx_obj = obj;
+       u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+       if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+               *counter_id = MLX5_GET(dealloc_flow_counter_in,
+                                      devx_obj->dinbox,
+                                      flow_counter_id);
+               return true;
+       }
+
+       return false;
+}
+
 /*
  * As the obj_id in the firmware is not globally unique the object type
  * must be considered upon checking for a valid object id.
@@ -116,7 +140,7 @@ static u64 get_enc_obj_id(u16 opcode, u32 obj_id)
        return ((u64)opcode << 32) | obj_id;
 }
 
-static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
+static u64 devx_get_obj_id(const void *in)
 {
        u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
        u64 obj_id;
@@ -290,6 +314,8 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
                                        MLX5_GET(query_dct_in, in, dctn));
                break;
        case MLX5_CMD_OP_QUERY_XRQ:
+       case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+       case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
                obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
                                        MLX5_GET(query_xrq_in, in, xrqn));
                break;
@@ -316,17 +342,107 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
                                        MLX5_GET(drain_dct_in, in, dctn));
                break;
        case MLX5_CMD_OP_ARM_XRQ:
+       case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
                obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
                                        MLX5_GET(arm_xrq_in, in, xrqn));
                break;
+       case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+               obj_id = get_enc_obj_id
+                               (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
+                                MLX5_GET(query_packet_reformat_context_in,
+                                         in, packet_reformat_id));
+               break;
        default:
+               obj_id = 0;
+       }
+
+       return obj_id;
+}
+
+static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in)
+{
+       u64 obj_id = devx_get_obj_id(in);
+
+       if (!obj_id)
                return false;
+
+       switch (uobj_get_object_id(uobj)) {
+       case UVERBS_OBJECT_CQ:
+               return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+                                     to_mcq(uobj->object)->mcq.cqn) ==
+                                     obj_id;
+
+       case UVERBS_OBJECT_SRQ:
+       {
+               struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
+               struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+               u16 opcode;
+
+               switch (srq->common.res) {
+               case MLX5_RES_XSRQ:
+                       opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
+                       break;
+               case MLX5_RES_XRQ:
+                       opcode = MLX5_CMD_OP_CREATE_XRQ;
+                       break;
+               default:
+                       if (!dev->mdev->issi)
+                               opcode = MLX5_CMD_OP_CREATE_SRQ;
+                       else
+                               opcode = MLX5_CMD_OP_CREATE_RMP;
+               }
+
+               return get_enc_obj_id(opcode,
+                                     to_msrq(uobj->object)->msrq.srqn) ==
+                                     obj_id;
        }
 
-       if (obj_id == obj->obj_id)
-               return true;
+       case UVERBS_OBJECT_QP:
+       {
+               struct mlx5_ib_qp *qp = to_mqp(uobj->object);
+               enum ib_qp_type qp_type = qp->ibqp.qp_type;
+
+               if (qp_type == IB_QPT_RAW_PACKET ||
+                   (qp->flags & MLX5_IB_QP_UNDERLAY)) {
+                       struct mlx5_ib_raw_packet_qp *raw_packet_qp =
+                                                        &qp->raw_packet_qp;
+                       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+                       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+
+                       return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+                                              rq->base.mqp.qpn) == obj_id ||
+                               get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+                                              sq->base.mqp.qpn) == obj_id ||
+                               get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+                                              rq->tirn) == obj_id ||
+                               get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+                                              sq->tisn) == obj_id);
+               }
+
+               if (qp_type == MLX5_IB_QPT_DCT)
+                       return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+                                             qp->dct.mdct.mqp.qpn) == obj_id;
+
+               return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+                                     qp->ibqp.qp_num) == obj_id;
+       }
 
-       return false;
+       case UVERBS_OBJECT_WQ:
+               return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+                                     to_mrwq(uobj->object)->core_qp.qpn) ==
+                                     obj_id;
+
+       case UVERBS_OBJECT_RWQ_IND_TBL:
+               return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+                                     to_mrwq_ind_table(uobj->object)->rqtn) ==
+                                     obj_id;
+
+       case MLX5_IB_OBJECT_DEVX_OBJ:
+               return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
+
+       default:
+               return false;
+       }
 }
 
 static void devx_set_umem_valid(const void *in)
@@ -494,6 +610,7 @@ static bool devx_is_obj_modify_cmd(const void *in)
        case MLX5_CMD_OP_DRAIN_DCT:
        case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
        case MLX5_CMD_OP_ARM_XRQ:
+       case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
                return true;
        case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
        {
@@ -535,6 +652,9 @@ static bool devx_is_obj_query_cmd(const void *in)
        case MLX5_CMD_OP_QUERY_XRC_SRQ:
        case MLX5_CMD_OP_QUERY_DCT:
        case MLX5_CMD_OP_QUERY_XRQ:
+       case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+       case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+       case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
                return true;
        default:
                return false;
@@ -572,15 +692,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
        if (!c->devx_uid)
                return -EINVAL;
 
-       if (!capable(CAP_NET_RAW))
-               return -EPERM;
-
        return c->devx_uid;
 }
 static bool devx_is_general_cmd(void *in)
 {
        u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
 
+       if (opcode >= MLX5_CMD_OP_GENERAL_START &&
+           opcode < MLX5_CMD_OP_GENERAL_END)
+               return true;
+
        switch (opcode) {
        case MLX5_CMD_OP_QUERY_HCA_CAP:
        case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
@@ -603,7 +724,7 @@ static bool devx_is_general_cmd(void *in)
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct mlx5_ib_ucontext *c;
        struct mlx5_ib_dev *dev;
@@ -616,7 +737,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
                             MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
                return -EFAULT;
 
-       c = devx_ufile2uctx(file);
+       c = devx_ufile2uctx(attrs);
        if (IS_ERR(c))
                return PTR_ERR(c);
        dev = to_mdev(c->ibucontext.device);
@@ -653,14 +774,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
  * queue or arm its CQ for event generation), no further harm is expected.
  */
 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct mlx5_ib_ucontext *c;
        struct mlx5_ib_dev *dev;
        u32 user_idx;
        s32 dev_idx;
 
-       c = devx_ufile2uctx(file);
+       c = devx_ufile2uctx(attrs);
        if (IS_ERR(c))
                return PTR_ERR(c);
        dev = to_mdev(c->ibucontext.device);
@@ -681,7 +802,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct mlx5_ib_ucontext *c;
        struct mlx5_ib_dev *dev;
@@ -693,7 +814,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
        int err;
        int uid;
 
-       c = devx_ufile2uctx(file);
+       c = devx_ufile2uctx(attrs);
        if (IS_ERR(c))
                return PTR_ERR(c);
        dev = to_mdev(c->ibucontext.device);
@@ -908,7 +1029,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
        int cmd_out_len =  uverbs_attr_get_len(attrs,
@@ -970,7 +1091,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
        int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -978,7 +1099,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
        struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
                                                          MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
        struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
-       struct devx_obj *obj = uobj->object;
+       struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
        void *cmd_out;
        int err;
        int uid;
@@ -990,7 +1111,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
        if (!devx_is_obj_modify_cmd(cmd_in))
                return -EINVAL;
 
-       if (!devx_is_valid_obj_id(obj, cmd_in))
+       if (!devx_is_valid_obj_id(uobj, cmd_in))
                return -EINVAL;
 
        cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -1000,7 +1121,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
        MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
        devx_set_umem_valid(cmd_in);
 
-       err = mlx5_cmd_exec(obj->mdev, cmd_in,
+       err = mlx5_cmd_exec(mdev->mdev, cmd_in,
                            uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
                            cmd_out, cmd_out_len);
        if (err)
@@ -1011,7 +1132,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
        int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -1019,10 +1140,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
        struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
                                                          MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
        struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
-       struct devx_obj *obj = uobj->object;
        void *cmd_out;
        int err;
        int uid;
+       struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
 
        uid = devx_get_uid(c, cmd_in);
        if (uid < 0)
@@ -1031,7 +1152,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
        if (!devx_is_obj_query_cmd(cmd_in))
                return -EINVAL;
 
-       if (!devx_is_valid_obj_id(obj, cmd_in))
+       if (!devx_is_valid_obj_id(uobj, cmd_in))
                return -EINVAL;
 
        cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -1039,7 +1160,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
                return PTR_ERR(cmd_out);
 
        MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
-       err = mlx5_cmd_exec(obj->mdev, cmd_in,
+       err = mlx5_cmd_exec(mdev->mdev, cmd_in,
                            uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
                            cmd_out, cmd_out_len);
        if (err)
@@ -1125,7 +1246,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct devx_umem_reg_cmd cmd;
        struct devx_umem *obj;
@@ -1139,9 +1260,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
        if (!c->devx_uid)
                return -EINVAL;
 
-       if (!capable(CAP_NET_RAW))
-               return -EPERM;
-
        obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
        if (!obj)
                return -ENOMEM;
@@ -1277,7 +1395,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 DECLARE_UVERBS_NAMED_METHOD(
        MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
        UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
-                       MLX5_IB_OBJECT_DEVX_OBJ,
+                       UVERBS_IDR_ANY_OBJECT,
                        UVERBS_ACCESS_WRITE,
                        UA_MANDATORY),
        UVERBS_ATTR_PTR_IN(
@@ -1293,7 +1411,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 DECLARE_UVERBS_NAMED_METHOD(
        MLX5_IB_METHOD_DEVX_OBJ_QUERY,
        UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
-                       MLX5_IB_OBJECT_DEVX_OBJ,
+                       UVERBS_IDR_ANY_OBJECT,
                        UVERBS_ACCESS_READ,
                        UA_MANDATORY),
        UVERBS_ATTR_PTR_IN(
@@ -1323,12 +1441,23 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
                            &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
                            &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
 
-DECLARE_UVERBS_OBJECT_TREE(devx_objects,
-                          &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
-                          &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
-                          &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
-
-const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void)
+static bool devx_is_supported(struct ib_device *device)
 {
-       return &devx_objects;
+       struct mlx5_ib_dev *dev = to_mdev(device);
+
+       return !dev->rep && MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+                                   MLX5_GENERAL_OBJ_TYPES_CAP_UCTX;
 }
+
+const struct uapi_definition mlx5_ib_devx_defs[] = {
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+               MLX5_IB_OBJECT_DEVX,
+               UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+               MLX5_IB_OBJECT_DEVX_OBJ,
+               UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+               MLX5_IB_OBJECT_DEVX_UMEM,
+               UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+       {},
+};
index f86cdcafdafc7cd8cda69671e9c63a611eae58be..b73756bd38bd14ab623dc2728c5c789632310572 100644 (file)
@@ -60,7 +60,7 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
 
 #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
        struct mlx5_ib_flow_handler *flow_handler;
@@ -77,6 +77,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
                uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
        struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
        int len, ret, i;
+       u32 counter_id = 0;
 
        if (!capable(CAP_NET_RAW))
                return -EPERM;
@@ -92,10 +93,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
            ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)))
                return -EINVAL;
 
-       if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS &&
-           (dest_devx || dest_qp))
-               return -EINVAL;
-
        if (dest_devx) {
                devx_obj = uverbs_attr_get_obj(
                        attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
@@ -128,9 +125,22 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
                dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
        }
 
+       len = uverbs_attr_get_uobjs_arr(attrs,
+               MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
+       if (len) {
+               devx_obj = arr_flow_actions[0]->object;
+
+               if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id))
+                       return -EINVAL;
+               flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+       }
        if (dev->rep)
                return -ENOTSUPP;
 
+       if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+           fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+               return -EINVAL;
+
        cmd_in = uverbs_attr_get_alloced_ptr(
                attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
        inlen = uverbs_attr_get_len(attrs,
@@ -164,6 +174,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
        }
 
        flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+                                              counter_id,
                                               cmd_in, inlen,
                                               dest_id, dest_type);
        if (IS_ERR(flow_handler)) {
@@ -194,7 +205,7 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
-       struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+       struct uverbs_attr_bundle *attrs)
 {
        struct ib_uobject *uobj = uverbs_attr_get_uobject(
                attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
@@ -313,7 +324,6 @@ static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
-       struct ib_uverbs_file *file,
        struct uverbs_attr_bundle *attrs)
 {
        struct ib_uobject *uobj = uverbs_attr_get_uobject(
@@ -435,7 +445,6 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx(
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
-       struct ib_uverbs_file *file,
        struct uverbs_attr_bundle *attrs)
 {
        struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
@@ -526,7 +535,11 @@ DECLARE_UVERBS_NAMED_METHOD(
                             UA_OPTIONAL),
        UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
                           UVERBS_ATTR_TYPE(u32),
-                          UA_OPTIONAL));
+                          UA_OPTIONAL),
+       UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
+                            MLX5_IB_OBJECT_DEVX_OBJ,
+                            UVERBS_ACCESS_READ, 1, 1,
+                            UA_OPTIONAL));
 
 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
        MLX5_IB_METHOD_DESTROY_FLOW,
@@ -610,16 +623,10 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
                            &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
                            &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
 
-DECLARE_UVERBS_OBJECT_TREE(flow_objects,
-                          &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER));
-
-int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
-{
-       int i = 0;
-
-       root[i++] = &flow_objects;
-       root[i++] = &mlx5_ib_fs;
-       root[i++] = &mlx5_ib_flow_actions;
-
-       return i;
-}
+const struct uapi_definition mlx5_ib_flow_defs[] = {
+       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_FLOW_MATCHER),
+       UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW, &mlx5_ib_fs),
+       UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+                               &mlx5_ib_flow_actions),
+       {},
+};
index 584ff2ea7810465a1fb5971bfb8a517d1dcd013c..8a682d86d63471d2af4dbfefbf14c5b5c2333ac8 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include "ib_rep.h"
+#include "srq.h"
 
 static const struct mlx5_ib_profile rep_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_INIT,
@@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_ROCE,
                     mlx5_ib_stage_rep_roce_init,
                     mlx5_ib_stage_rep_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+                    mlx5_init_srq_table,
+                    mlx5_cleanup_srq_table),
        STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
                     mlx5_ib_stage_dev_res_init,
                     mlx5_ib_stage_dev_res_cleanup),
index 32a9e9228b13554c2d1d5db057a5a484efc4f2bf..a2735f246d5c64a90445cdbc96f8bd04b301b69c 100644 (file)
@@ -568,6 +568,10 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
        props->max_vl_num       = out_mad->data[37] >> 4;
        props->init_type_reply  = out_mad->data[41] >> 4;
 
+       if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
+               props->port_cap_flags2 =
+                       be16_to_cpup((__be16 *)(out_mad->data + 60));
+
        /* Check if extended speeds (EDR/FDR/...) are supported */
        if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
                ext_active_speed = out_mad->data[62] >> 4;
@@ -579,6 +583,11 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
                case 2:
                        props->active_speed = 32; /* EDR */
                        break;
+               case 4:
+                       if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+                           props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP)
+                               props->active_speed = IB_SPEED_HDR;
+                       break;
                }
        }
 
index 3569fda07e07f47b9286b7e1251c2716f9169203..5edd8d5f151f9650064fb2b52e71f1a167f7abea 100644 (file)
@@ -60,6 +60,7 @@
 #include "mlx5_ib.h"
 #include "ib_rep.h"
 #include "cmd.h"
+#include "srq.h"
 #include <linux/mlx5/fs_helpers.h>
 #include <linux/mlx5/accel.h>
 #include <rdma/uverbs_std_types.h>
@@ -82,10 +83,13 @@ static char mlx5_version[] =
 
 struct mlx5_ib_event_work {
        struct work_struct      work;
-       struct mlx5_core_dev    *dev;
-       void                    *context;
-       enum mlx5_dev_event     event;
-       unsigned long           param;
+       union {
+               struct mlx5_ib_dev            *dev;
+               struct mlx5_ib_multiport_info *mpi;
+       };
+       bool                    is_slave;
+       unsigned int            event;
+       void                    *param;
 };
 
 enum {
@@ -1014,6 +1018,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 
                if (MLX5_CAP_GEN(mdev, cqe_128_always))
                        resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD;
+               if (MLX5_CAP_GEN(mdev, qp_packet_based))
+                       resp.flags |=
+                               MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
        }
 
        if (field_avail(typeof(resp), sw_parsing_caps,
@@ -1216,6 +1223,9 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
        props->subnet_timeout   = rep->subnet_timeout;
        props->init_type_reply  = rep->init_type_reply;
 
+       if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
+               props->port_cap_flags2 = rep->cap_mask2;
+
        err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
        if (err)
                goto out;
@@ -1752,7 +1762,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 #endif
 
        if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
-               err = mlx5_ib_devx_create(dev);
+               err = mlx5_ib_devx_create(dev, true);
                if (err < 0)
                        goto out_uars;
                context->devx_uid = err;
@@ -3706,7 +3716,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
                      struct mlx5_flow_destination *dst,
                      struct mlx5_ib_flow_matcher  *fs_matcher,
                      struct mlx5_flow_act *flow_act,
-                     void *cmd_in, int inlen)
+                     void *cmd_in, int inlen,
+                     int dst_num)
 {
        struct mlx5_ib_flow_handler *handler;
        struct mlx5_flow_spec *spec;
@@ -3728,7 +3739,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
        spec->match_criteria_enable = fs_matcher->match_criteria_enable;
 
        handler->rule = mlx5_add_flow_rules(ft, spec,
-                                           flow_act, dst, 1);
+                                           flow_act, dst, dst_num);
 
        if (IS_ERR(handler->rule)) {
                err = PTR_ERR(handler->rule);
@@ -3791,12 +3802,14 @@ struct mlx5_ib_flow_handler *
 mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
                        struct mlx5_ib_flow_matcher *fs_matcher,
                        struct mlx5_flow_act *flow_act,
+                       u32 counter_id,
                        void *cmd_in, int inlen, int dest_id,
                        int dest_type)
 {
        struct mlx5_flow_destination *dst;
        struct mlx5_ib_flow_prio *ft_prio;
        struct mlx5_ib_flow_handler *handler;
+       int dst_num = 0;
        bool mcast;
        int err;
 
@@ -3806,7 +3819,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
        if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
                return ERR_PTR(-ENOMEM);
 
-       dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+       dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL);
        if (!dst)
                return ERR_PTR(-ENOMEM);
 
@@ -3820,20 +3833,28 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
        }
 
        if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
-               dst->type = dest_type;
-               dst->tir_num = dest_id;
+               dst[dst_num].type = dest_type;
+               dst[dst_num].tir_num = dest_id;
                flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
-               dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
-               dst->ft_num = dest_id;
+               dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+               dst[dst_num].ft_num = dest_id;
                flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        } else {
-               dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+               dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
                flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
        }
 
+       dst_num++;
+
+       if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+               dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+               dst[dst_num].counter_id = counter_id;
+               dst_num++;
+       }
+
        handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
-                                       cmd_in, inlen);
+                                       cmd_in, inlen, dst_num);
 
        if (IS_ERR(handler)) {
                err = PTR_ERR(handler);
@@ -4226,6 +4247,63 @@ static void delay_drop_handler(struct work_struct *work)
        mutex_unlock(&delay_drop->lock);
 }
 
+static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+                                struct ib_event *ibev)
+{
+       switch (eqe->sub_type) {
+       case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
+               schedule_work(&ibdev->delay_drop.delay_drop_work);
+               break;
+       default: /* do nothing */
+               return;
+       }
+}
+
+static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+                             struct ib_event *ibev)
+{
+       u8 port = (eqe->data.port.port >> 4) & 0xf;
+
+       ibev->element.port_num = port;
+
+       switch (eqe->sub_type) {
+       case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+       case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+       case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+               /* In RoCE, port up/down events are handled in
+                * mlx5_netdev_event().
+                */
+               if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
+                                           IB_LINK_LAYER_ETHERNET)
+                       return -EINVAL;
+
+               ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ?
+                               IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+               break;
+
+       case MLX5_PORT_CHANGE_SUBTYPE_LID:
+               ibev->event = IB_EVENT_LID_CHANGE;
+               break;
+
+       case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+               ibev->event = IB_EVENT_PKEY_CHANGE;
+               schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
+               break;
+
+       case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+               ibev->event = IB_EVENT_GID_CHANGE;
+               break;
+
+       case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+               ibev->event = IB_EVENT_CLIENT_REREGISTER;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static void mlx5_ib_handle_event(struct work_struct *_work)
 {
        struct mlx5_ib_event_work *work =
@@ -4233,65 +4311,37 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
        struct mlx5_ib_dev *ibdev;
        struct ib_event ibev;
        bool fatal = false;
-       u8 port = (u8)work->param;
 
-       if (mlx5_core_is_mp_slave(work->dev)) {
-               ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
+       if (work->is_slave) {
+               ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
                if (!ibdev)
                        goto out;
        } else {
-               ibdev = work->context;
+               ibdev = work->dev;
        }
 
        switch (work->event) {
        case MLX5_DEV_EVENT_SYS_ERROR:
                ibev.event = IB_EVENT_DEVICE_FATAL;
                mlx5_ib_handle_internal_error(ibdev);
+               ibev.element.port_num  = (u8)(unsigned long)work->param;
                fatal = true;
                break;
-
-       case MLX5_DEV_EVENT_PORT_UP:
-       case MLX5_DEV_EVENT_PORT_DOWN:
-       case MLX5_DEV_EVENT_PORT_INITIALIZED:
-               /* In RoCE, port up/down events are handled in
-                * mlx5_netdev_event().
-                */
-               if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
-                       IB_LINK_LAYER_ETHERNET)
+       case MLX5_EVENT_TYPE_PORT_CHANGE:
+               if (handle_port_change(ibdev, work->param, &ibev))
                        goto out;
-
-               ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ?
-                            IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
-               break;
-
-       case MLX5_DEV_EVENT_LID_CHANGE:
-               ibev.event = IB_EVENT_LID_CHANGE;
-               break;
-
-       case MLX5_DEV_EVENT_PKEY_CHANGE:
-               ibev.event = IB_EVENT_PKEY_CHANGE;
-               schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
-               break;
-
-       case MLX5_DEV_EVENT_GUID_CHANGE:
-               ibev.event = IB_EVENT_GID_CHANGE;
-               break;
-
-       case MLX5_DEV_EVENT_CLIENT_REREG:
-               ibev.event = IB_EVENT_CLIENT_REREGISTER;
                break;
-       case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
-               schedule_work(&ibdev->delay_drop.delay_drop_work);
-               goto out;
+       case MLX5_EVENT_TYPE_GENERAL_EVENT:
+               handle_general_event(ibdev, work->param, &ibev);
+               /* fall through */
        default:
                goto out;
        }
 
-       ibev.device           = &ibdev->ib_dev;
-       ibev.element.port_num = port;
+       ibev.device = &ibdev->ib_dev;
 
-       if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
-               mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
+       if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
+               mlx5_ib_warn(ibdev, "warning: event on port %d\n",  ibev.element.port_num);
                goto out;
        }
 
@@ -4304,22 +4354,43 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
        kfree(work);
 }
 
-static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
-                         enum mlx5_dev_event event, unsigned long param)
+static int mlx5_ib_event(struct notifier_block *nb,
+                        unsigned long event, void *param)
 {
        struct mlx5_ib_event_work *work;
 
        work = kmalloc(sizeof(*work), GFP_ATOMIC);
        if (!work)
-               return;
+               return NOTIFY_DONE;
 
        INIT_WORK(&work->work, mlx5_ib_handle_event);
-       work->dev = dev;
+       work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events);
+       work->is_slave = false;
        work->param = param;
-       work->context = context;
        work->event = event;
 
        queue_work(mlx5_ib_event_wq, &work->work);
+
+       return NOTIFY_OK;
+}
+
+static int mlx5_ib_event_slave_port(struct notifier_block *nb,
+                                   unsigned long event, void *param)
+{
+       struct mlx5_ib_event_work *work;
+
+       work = kmalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work)
+               return NOTIFY_DONE;
+
+       INIT_WORK(&work->work, mlx5_ib_handle_event);
+       work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events);
+       work->is_slave = true;
+       work->param = param;
+       work->event = event;
+       queue_work(mlx5_ib_event_wq, &work->work);
+
+       return NOTIFY_OK;
 }
 
 static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -5325,14 +5396,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev)
                mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
 }
 
-static const struct cpumask *
-mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
-{
-       struct mlx5_ib_dev *dev = to_mdev(ibdev);
-
-       return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
-}
-
 /* The mlx5_ib_multiport_mutex should be held when calling this function */
 static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
                                      struct mlx5_ib_multiport_info *mpi)
@@ -5350,6 +5413,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
                spin_unlock(&port->mp.mpi_lock);
                return;
        }
+
+       if (mpi->mdev_events.notifier_call)
+               mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
+       mpi->mdev_events.notifier_call = NULL;
+
        mpi->ibdev = NULL;
 
        spin_unlock(&port->mp.mpi_lock);
@@ -5405,6 +5473,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
 
        ibdev->port[port_num].mp.mpi = mpi;
        mpi->ibdev = ibdev;
+       mpi->mdev_events.notifier_call = NULL;
        spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
 
        err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
@@ -5422,6 +5491,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
                goto unbind;
        }
 
+       mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
+       mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
+
        err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
        if (err)
                goto unbind;
@@ -5551,30 +5623,17 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
        UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
                             enum mlx5_ib_uapi_flow_action_flags));
 
-static int populate_specs_root(struct mlx5_ib_dev *dev)
-{
-       const struct uverbs_object_tree_def **trees = dev->driver_trees;
-       size_t num_trees = 0;
-
-       if (mlx5_accel_ipsec_device_caps(dev->mdev) &
-           MLX5_ACCEL_IPSEC_CAP_DEVICE)
-               trees[num_trees++] = &mlx5_ib_flow_action;
-
-       if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
-               trees[num_trees++] = &mlx5_ib_dm;
-
-       if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
-           MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
-               trees[num_trees++] = mlx5_ib_get_devx_tree();
-
-       num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
-
-       WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
-       trees[num_trees] = NULL;
-       dev->ib_dev.driver_specs = trees;
+static const struct uapi_definition mlx5_ib_defs[] = {
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+       UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
+       UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
+#endif
 
-       return 0;
-}
+       UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+                               &mlx5_ib_flow_action),
+       UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+       {}
+};
 
 static int mlx5_ib_read_counters(struct ib_counters *counters,
                                 struct ib_counters_read_attr *read_attr,
@@ -5694,8 +5753,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
        dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
        dev->ib_dev.local_dma_lkey      = 0 /* not supported for now */;
        dev->ib_dev.phys_port_cnt       = dev->num_ports;
-       dev->ib_dev.num_comp_vectors    =
-               dev->mdev->priv.eq_table.num_comp_vectors;
+       dev->ib_dev.num_comp_vectors    = mlx5_comp_vectors_count(mdev);
        dev->ib_dev.dev.parent          = &mdev->pdev->dev;
 
        mutex_init(&dev->cap_mask_mutex);
@@ -5838,7 +5896,6 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
        dev->ib_dev.map_mr_sg           = mlx5_ib_map_mr_sg;
        dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
        dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
-       dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
        if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
            IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
                dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params;
@@ -5881,14 +5938,22 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
        dev->ib_dev.uverbs_ex_cmd_mask |=
                        (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
                        (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
-       dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp;
+       if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+           MLX5_ACCEL_IPSEC_CAP_DEVICE) {
+               dev->ib_dev.create_flow_action_esp =
+                       mlx5_ib_create_flow_action_esp;
+               dev->ib_dev.modify_flow_action_esp =
+                       mlx5_ib_modify_flow_action_esp;
+       }
        dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
-       dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
        dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
        dev->ib_dev.create_counters = mlx5_ib_create_counters;
        dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
        dev->ib_dev.read_counters = mlx5_ib_read_counters;
 
+       if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+               dev->ib_dev.driver_def = mlx5_ib_defs;
+
        err = init_node_data(dev);
        if (err)
                return err;
@@ -6034,6 +6099,11 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
        return mlx5_ib_odp_init_one(dev);
 }
 
+void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_odp_cleanup_one(dev);
+}
+
 int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 {
        if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
@@ -6096,11 +6166,6 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
        mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 }
 
-static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
-{
-       return populate_specs_root(dev);
-}
-
 int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 {
        const char *name;
@@ -6152,6 +6217,34 @@ static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
        mlx5_ib_unregister_vport_reps(dev);
 }
 
+static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
+{
+       dev->mdev_events.notifier_call = mlx5_ib_event;
+       mlx5_notifier_register(dev->mdev, &dev->mdev_events);
+       return 0;
+}
+
+static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
+{
+       mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
+}
+
+static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
+{
+       int uid;
+
+       uid = mlx5_ib_devx_create(dev, false);
+       if (uid > 0)
+               dev->devx_whitelist_uid = uid;
+
+       return 0;
+}
+static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
+{
+       if (dev->devx_whitelist_uid)
+               mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
+}
+
 void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
                      const struct mlx5_ib_profile *profile,
                      int stage)
@@ -6163,8 +6256,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
                        profile->stage[stage].cleanup(dev);
        }
 
-       if (dev->devx_whitelist_uid)
-               mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
        ib_dealloc_device((struct ib_device *)dev);
 }
 
@@ -6173,7 +6264,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
 {
        int err;
        int i;
-       int uid;
 
        for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
                if (profile->stage[i].init) {
@@ -6183,10 +6273,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
                }
        }
 
-       uid = mlx5_ib_devx_create(dev);
-       if (uid > 0)
-               dev->devx_whitelist_uid = uid;
-
        dev->profile = profile;
        dev->ib_active = true;
 
@@ -6214,12 +6300,18 @@ static const struct mlx5_ib_profile pf_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_ROCE,
                     mlx5_ib_stage_roce_init,
                     mlx5_ib_stage_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+                    mlx5_init_srq_table,
+                    mlx5_cleanup_srq_table),
        STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
                     mlx5_ib_stage_dev_res_init,
                     mlx5_ib_stage_dev_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+                    mlx5_ib_stage_dev_notifier_init,
+                    mlx5_ib_stage_dev_notifier_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_ODP,
                     mlx5_ib_stage_odp_init,
-                    NULL),
+                    mlx5_ib_stage_odp_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
                     mlx5_ib_stage_counters_init,
                     mlx5_ib_stage_counters_cleanup),
@@ -6235,9 +6327,9 @@ static const struct mlx5_ib_profile pf_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
                     NULL,
                     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
-       STAGE_CREATE(MLX5_IB_STAGE_SPECS,
-                    mlx5_ib_stage_populate_specs,
-                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
+                    mlx5_ib_stage_devx_init,
+                    mlx5_ib_stage_devx_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
                     mlx5_ib_stage_ib_reg_init,
                     mlx5_ib_stage_ib_reg_cleanup),
@@ -6265,9 +6357,15 @@ static const struct mlx5_ib_profile nic_rep_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_ROCE,
                     mlx5_ib_stage_rep_roce_init,
                     mlx5_ib_stage_rep_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+                    mlx5_init_srq_table,
+                    mlx5_cleanup_srq_table),
        STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
                     mlx5_ib_stage_dev_res_init,
                     mlx5_ib_stage_dev_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+                    mlx5_ib_stage_dev_notifier_init,
+                    mlx5_ib_stage_dev_notifier_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
                     mlx5_ib_stage_counters_init,
                     mlx5_ib_stage_counters_cleanup),
@@ -6280,9 +6378,6 @@ static const struct mlx5_ib_profile nic_rep_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
                     NULL,
                     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
-       STAGE_CREATE(MLX5_IB_STAGE_SPECS,
-                    mlx5_ib_stage_populate_specs,
-                    NULL),
        STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
                     mlx5_ib_stage_ib_reg_init,
                     mlx5_ib_stage_ib_reg_cleanup),
@@ -6388,10 +6483,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 static struct mlx5_interface mlx5_ib_interface = {
        .add            = mlx5_ib_add,
        .remove         = mlx5_ib_remove,
-       .event          = mlx5_ib_event,
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       .pfault         = mlx5_ib_pfault,
-#endif
        .protocol       = MLX5_INTERFACE_PROTOCOL_IB,
 };
 
index b651a7a6fde9e6d3a44bcde7372d40afb66c3e36..7145f512f94876a5efbf57756d82bea8b6e8d3db 100644 (file)
@@ -41,7 +41,6 @@
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
 #include <linux/mlx5/fs.h>
 #include <linux/types.h>
 #include <linux/mlx5/transobj.h>
@@ -50,6 +49,8 @@
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
 
+#include "srq.h"
+
 #define mlx5_ib_dbg(_dev, format, arg...)                                      \
        dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__,      \
                __LINE__, current->pid, ##arg)
@@ -257,6 +258,7 @@ enum mlx5_ib_rq_flags {
 };
 
 struct mlx5_ib_wq {
+       struct mlx5_frag_buf_ctrl fbc;
        u64                    *wrid;
        u32                    *wr_data;
        struct wr_list         *w_list;
@@ -275,7 +277,7 @@ struct mlx5_ib_wq {
        unsigned                tail;
        u16                     cur_post;
        u16                     last_poll;
-       void                   *qend;
+       void                    *cur_edge;
 };
 
 enum mlx5_ib_wq_flags {
@@ -460,6 +462,7 @@ enum mlx5_ib_qp_flags {
        MLX5_IB_QP_UNDERLAY                     = 1 << 10,
        MLX5_IB_QP_PCI_WRITE_END_PADDING        = 1 << 11,
        MLX5_IB_QP_TUNNEL_OFFLOAD               = 1 << 12,
+       MLX5_IB_QP_PACKET_BASED_CREDIT          = 1 << 13,
 };
 
 struct mlx5_umr_wr {
@@ -523,6 +526,7 @@ struct mlx5_ib_srq {
        struct mlx5_core_srq    msrq;
        struct mlx5_frag_buf    buf;
        struct mlx5_db          db;
+       struct mlx5_frag_buf_ctrl fbc;
        u64                    *wrid;
        /* protect SRQ hanlding
         */
@@ -540,7 +544,6 @@ struct mlx5_ib_srq {
 struct mlx5_ib_xrcd {
        struct ib_xrcd          ibxrcd;
        u32                     xrcdn;
-       u16                     uid;
 };
 
 enum mlx5_ib_mtt_access_flags {
@@ -774,14 +777,16 @@ enum mlx5_ib_stages {
        MLX5_IB_STAGE_CAPS,
        MLX5_IB_STAGE_NON_DEFAULT_CB,
        MLX5_IB_STAGE_ROCE,
+       MLX5_IB_STAGE_SRQ,
        MLX5_IB_STAGE_DEVICE_RESOURCES,
+       MLX5_IB_STAGE_DEVICE_NOTIFIER,
        MLX5_IB_STAGE_ODP,
        MLX5_IB_STAGE_COUNTERS,
        MLX5_IB_STAGE_CONG_DEBUGFS,
        MLX5_IB_STAGE_UAR,
        MLX5_IB_STAGE_BFREG,
        MLX5_IB_STAGE_PRE_IB_REG_UMR,
-       MLX5_IB_STAGE_SPECS,
+       MLX5_IB_STAGE_WHITELIST_UID,
        MLX5_IB_STAGE_IB_REG,
        MLX5_IB_STAGE_POST_IB_REG_UMR,
        MLX5_IB_STAGE_DELAY_DROP,
@@ -806,6 +811,7 @@ struct mlx5_ib_multiport_info {
        struct list_head list;
        struct mlx5_ib_dev *ibdev;
        struct mlx5_core_dev *mdev;
+       struct notifier_block mdev_events;
        struct completion unref_comp;
        u64 sys_image_guid;
        u32 mdev_refcnt;
@@ -880,10 +886,19 @@ struct mlx5_ib_lb_state {
        bool                    enabled;
 };
 
+struct mlx5_ib_pf_eq {
+       struct mlx5_ib_dev *dev;
+       struct mlx5_eq *core;
+       struct work_struct work;
+       spinlock_t lock; /* Pagefaults spinlock */
+       struct workqueue_struct *wq;
+       mempool_t *pool;
+};
+
 struct mlx5_ib_dev {
        struct ib_device                ib_dev;
-       const struct uverbs_object_tree_def *driver_trees[7];
        struct mlx5_core_dev            *mdev;
+       struct notifier_block           mdev_events;
        struct mlx5_roce                roce[MLX5_MAX_PORTS];
        int                             num_ports;
        /* serialize update of capability mask
@@ -902,6 +917,8 @@ struct mlx5_ib_dev {
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        struct ib_odp_caps      odp_caps;
        u64                     odp_max_size;
+       struct mlx5_ib_pf_eq    odp_pf_eq;
+
        /*
         * Sleepable RCU that prevents destruction of MRs while they are still
         * being used by a page fault handler.
@@ -927,6 +944,7 @@ struct mlx5_ib_dev {
        u64                     sys_image_guid;
        struct mlx5_memic       memic;
        u16                     devx_whitelist_uid;
+       struct mlx5_srq_table   srq_table;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1158,9 +1176,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
-void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
-                   struct mlx5_pagefault *pfault);
 int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
 int __init mlx5_ib_odp_init(void);
 void mlx5_ib_odp_cleanup(void);
 void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
@@ -1175,6 +1192,7 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 }
 
 static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
 static inline int mlx5_ib_odp_init(void) { return 0; }
 static inline void mlx5_ib_odp_cleanup(void)                               {}
 static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
@@ -1250,32 +1268,29 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
                                  u8 port_num);
 
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev);
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
 const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
+extern const struct uapi_definition mlx5_ib_devx_defs[];
+extern const struct uapi_definition mlx5_ib_flow_defs[];
 struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
        struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
-       struct mlx5_flow_act *flow_act, void *cmd_in, int inlen,
-       int dest_id, int dest_type);
+       struct mlx5_flow_act *flow_act, u32 counter_id,
+       void *cmd_in, int inlen, int dest_id, int dest_type);
 bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id);
 int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
 void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
 #else
 static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; };
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+                          bool is_user) { return -EOPNOTSUPP; }
 static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
-static inline const struct uverbs_object_tree_def *
-mlx5_ib_get_devx_tree(void) { return NULL; }
 static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
                                             int *dest_type)
 {
        return false;
 }
-static inline int
-mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
-{
-       return 0;
-}
 static inline void
 mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
 {
index 2cc3d69ab6f64dde00ee48c82ff93c5edca697f4..4ead8c0fff5ad46d6d68f676194badda71938f4f 100644 (file)
 #include "mlx5_ib.h"
 #include "cmd.h"
 
+#include <linux/mlx5/eq.h>
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+       u32                     bytes_committed;
+       u32                     token;
+       u8                      event_subtype;
+       u8                      type;
+       union {
+               /* Initiator or send message responder pagefault details. */
+               struct {
+                       /* Received packet size, only valid for responders. */
+                       u32     packet_size;
+                       /*
+                        * Number of resource holding WQE, depends on type.
+                        */
+                       u32     wq_num;
+                       /*
+                        * WQE index. Refers to either the send queue or
+                        * receive queue, according to event_subtype.
+                        */
+                       u16     wqe_index;
+               } wqe;
+               /* RDMA responder pagefault details */
+               struct {
+                       u32     r_key;
+                       /*
+                        * Received packet size, minimal size page fault
+                        * resolution required for forward progress.
+                        */
+                       u32     packet_size;
+                       u32     rdma_op_len;
+                       u64     rdma_va;
+               } rdma;
+       };
+
+       struct mlx5_ib_pf_eq    *eq;
+       struct work_struct      work;
+};
+
 #define MAX_PREFETCH_LEN (4*1024*1024U)
 
 /* Timeout in ms to wait for an active mmu notifier to complete when handling
@@ -304,14 +344,20 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
 {
        int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
                     pfault->wqe.wq_num : pfault->token;
-       int ret = mlx5_core_page_fault_resume(dev->mdev,
-                                             pfault->token,
-                                             wq_num,
-                                             pfault->type,
-                                             error);
-       if (ret)
-               mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n",
-                           wq_num);
+       u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { };
+       u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)]   = { };
+       int err;
+
+       MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME);
+       MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type);
+       MLX5_SET(page_fault_resume_in, in, token, pfault->token);
+       MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
+       MLX5_SET(page_fault_resume_in, in, error, !!error);
+
+       err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+       if (err)
+               mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n",
+                           wq_num, err);
 }
 
 static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
@@ -607,8 +653,8 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
                        if (!wait_for_completion_timeout(
                                        &odp->notifier_completion,
                                        timeout)) {
-                               mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
-                                            current_seq, odp->notifiers_seq);
+                               mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
+                                            current_seq, odp->notifiers_seq, odp->notifiers_count);
                        }
                } else {
                        /* The MR is being killed, kill the QP as well. */
@@ -1026,16 +1072,31 @@ static int mlx5_ib_mr_responder_pfault_handler(
        return 0;
 }
 
-static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev,
-                                             u32 wq_num)
+static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
+                                                      u32 wq_num, int pf_type)
 {
-       struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num);
+       enum mlx5_res_type res_type;
 
-       if (!mqp) {
-               mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num);
+       switch (pf_type) {
+       case MLX5_WQE_PF_TYPE_RMP:
+               res_type = MLX5_RES_SRQ;
+               break;
+       case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
+       case MLX5_WQE_PF_TYPE_RESP:
+       case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
+               res_type = MLX5_RES_QP;
+               break;
+       default:
                return NULL;
        }
 
+       return mlx5_core_res_hold(dev->mdev, wq_num, res_type);
+}
+
+static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
+{
+       struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res;
+
        return to_mibqp(mqp);
 }
 
@@ -1049,18 +1110,30 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
        int resume_with_error = 1;
        u16 wqe_index = pfault->wqe.wqe_index;
        int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
+       struct mlx5_core_rsc_common *res;
        struct mlx5_ib_qp *qp;
 
+       res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type);
+       if (!res) {
+               mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num);
+               return;
+       }
+
+       switch (res->res) {
+       case MLX5_RES_QP:
+               qp = res_to_qp(res);
+               break;
+       default:
+               mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type);
+               goto resolve_page_fault;
+       }
+
        buffer = (char *)__get_free_page(GFP_KERNEL);
        if (!buffer) {
                mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
                goto resolve_page_fault;
        }
 
-       qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num);
-       if (!qp)
-               goto resolve_page_fault;
-
        ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
                                    PAGE_SIZE, &qp->trans_qp.base);
        if (ret < 0) {
@@ -1100,6 +1173,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
        mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
                    pfault->wqe.wq_num, resume_with_error,
                    pfault->type);
+       mlx5_core_res_put(res);
        free_page((unsigned long)buffer);
 }
 
@@ -1178,10 +1252,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
        }
 }
 
-void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
-                   struct mlx5_pagefault *pfault)
+static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault)
 {
-       struct mlx5_ib_dev *dev = context;
        u8 event_subtype = pfault->event_subtype;
 
        switch (event_subtype) {
@@ -1198,6 +1270,203 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
        }
 }
 
+static void mlx5_ib_eqe_pf_action(struct work_struct *work)
+{
+       struct mlx5_pagefault *pfault = container_of(work,
+                                                    struct mlx5_pagefault,
+                                                    work);
+       struct mlx5_ib_pf_eq *eq = pfault->eq;
+
+       mlx5_ib_pfault(eq->dev, pfault);
+       mempool_free(pfault, eq->pool);
+}
+
+static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
+{
+       struct mlx5_eqe_page_fault *pf_eqe;
+       struct mlx5_pagefault *pfault;
+       struct mlx5_eqe *eqe;
+       int cc = 0;
+
+       while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) {
+               pfault = mempool_alloc(eq->pool, GFP_ATOMIC);
+               if (!pfault) {
+                       schedule_work(&eq->work);
+                       break;
+               }
+
+               pf_eqe = &eqe->data.page_fault;
+               pfault->event_subtype = eqe->sub_type;
+               pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
+
+               mlx5_ib_dbg(eq->dev,
+                           "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
+                           eqe->sub_type, pfault->bytes_committed);
+
+               switch (eqe->sub_type) {
+               case MLX5_PFAULT_SUBTYPE_RDMA:
+                       /* RDMA based event */
+                       pfault->type =
+                               be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
+                       pfault->token =
+                               be32_to_cpu(pf_eqe->rdma.pftype_token) &
+                               MLX5_24BIT_MASK;
+                       pfault->rdma.r_key =
+                               be32_to_cpu(pf_eqe->rdma.r_key);
+                       pfault->rdma.packet_size =
+                               be16_to_cpu(pf_eqe->rdma.packet_length);
+                       pfault->rdma.rdma_op_len =
+                               be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+                       pfault->rdma.rdma_va =
+                               be64_to_cpu(pf_eqe->rdma.rdma_va);
+                       mlx5_ib_dbg(eq->dev,
+                                   "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
+                                   pfault->type, pfault->token,
+                                   pfault->rdma.r_key);
+                       mlx5_ib_dbg(eq->dev,
+                                   "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
+                                   pfault->rdma.rdma_op_len,
+                                   pfault->rdma.rdma_va);
+                       break;
+
+               case MLX5_PFAULT_SUBTYPE_WQE:
+                       /* WQE based event */
+                       pfault->type =
+                               (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
+                       pfault->token =
+                               be32_to_cpu(pf_eqe->wqe.token);
+                       pfault->wqe.wq_num =
+                               be32_to_cpu(pf_eqe->wqe.pftype_wq) &
+                               MLX5_24BIT_MASK;
+                       pfault->wqe.wqe_index =
+                               be16_to_cpu(pf_eqe->wqe.wqe_index);
+                       pfault->wqe.packet_size =
+                               be16_to_cpu(pf_eqe->wqe.packet_length);
+                       mlx5_ib_dbg(eq->dev,
+                                   "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
+                                   pfault->type, pfault->token,
+                                   pfault->wqe.wq_num,
+                                   pfault->wqe.wqe_index);
+                       break;
+
+               default:
+                       mlx5_ib_warn(eq->dev,
+                                    "Unsupported page fault event sub-type: 0x%02hhx\n",
+                                    eqe->sub_type);
+                       /* Unsupported page faults should still be
+                        * resolved by the page fault handler
+                        */
+               }
+
+               pfault->eq = eq;
+               INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action);
+               queue_work(eq->wq, &pfault->work);
+
+               cc = mlx5_eq_update_cc(eq->core, ++cc);
+       }
+
+       mlx5_eq_update_ci(eq->core, cc, 1);
+}
+
+static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr)
+{
+       struct mlx5_ib_pf_eq *eq = eq_ptr;
+       unsigned long flags;
+
+       if (spin_trylock_irqsave(&eq->lock, flags)) {
+               mlx5_ib_eq_pf_process(eq);
+               spin_unlock_irqrestore(&eq->lock, flags);
+       } else {
+               schedule_work(&eq->work);
+       }
+
+       return IRQ_HANDLED;
+}
+
+/* mempool_refill() was proposed but unfortunately wasn't accepted
+ * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
+ * Cheap workaround.
+ */
+static void mempool_refill(mempool_t *pool)
+{
+       while (pool->curr_nr < pool->min_nr)
+               mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
+}
+
+static void mlx5_ib_eq_pf_action(struct work_struct *work)
+{
+       struct mlx5_ib_pf_eq *eq =
+               container_of(work, struct mlx5_ib_pf_eq, work);
+
+       mempool_refill(eq->pool);
+
+       spin_lock_irq(&eq->lock);
+       mlx5_ib_eq_pf_process(eq);
+       spin_unlock_irq(&eq->lock);
+}
+
+enum {
+       MLX5_IB_NUM_PF_EQE      = 0x1000,
+       MLX5_IB_NUM_PF_DRAIN    = 64,
+};
+
+static int
+mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+{
+       struct mlx5_eq_param param = {};
+       int err;
+
+       INIT_WORK(&eq->work, mlx5_ib_eq_pf_action);
+       spin_lock_init(&eq->lock);
+       eq->dev = dev;
+
+       eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN,
+                                              sizeof(struct mlx5_pagefault));
+       if (!eq->pool)
+               return -ENOMEM;
+
+       eq->wq = alloc_workqueue("mlx5_ib_page_fault",
+                                WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM,
+                                MLX5_NUM_CMD_EQE);
+       if (!eq->wq) {
+               err = -ENOMEM;
+               goto err_mempool;
+       }
+
+       param = (struct mlx5_eq_param) {
+               .index = MLX5_EQ_PFAULT_IDX,
+               .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
+               .nent = MLX5_IB_NUM_PF_EQE,
+               .context = eq,
+               .handler = mlx5_ib_eq_pf_int
+       };
+       eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", &param);
+       if (IS_ERR(eq->core)) {
+               err = PTR_ERR(eq->core);
+               goto err_wq;
+       }
+
+       return 0;
+err_wq:
+       destroy_workqueue(eq->wq);
+err_mempool:
+       mempool_destroy(eq->pool);
+       return err;
+}
+
+static int
+mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+{
+       int err;
+
+       err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
+       cancel_work_sync(&eq->work);
+       destroy_workqueue(eq->wq);
+       mempool_destroy(eq->pool);
+
+       return err;
+}
+
 void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
 {
        if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
@@ -1226,7 +1495,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
 
 int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
 {
-       int ret;
+       int ret = 0;
 
        if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
                ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
@@ -1236,7 +1505,20 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
                }
        }
 
-       return 0;
+       if (!MLX5_CAP_GEN(dev->mdev, pg))
+               return ret;
+
+       ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq);
+
+       return ret;
+}
+
+void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
+{
+       if (!MLX5_CAP_GEN(dev->mdev, pg))
+               return;
+
+       mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq);
 }
 
 int mlx5_ib_odp_init(void)
@@ -1246,4 +1528,3 @@ int mlx5_ib_odp_init(void)
 
        return 0;
 }
-
index 3747cc681b18a54fb2a841f2539064e266ee5587..4a85748aeeea4b56f4858e0dfc9f54a60ba09d69 100644 (file)
@@ -108,21 +108,6 @@ static int is_sqp(enum ib_qp_type qp_type)
        return is_qp0(qp_type) || is_qp1(qp_type);
 }
 
-static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
-{
-       return mlx5_buf_offset(&qp->buf, offset);
-}
-
-static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
-{
-       return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
-}
-
-void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
-{
-       return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
-}
-
 /**
  * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
  *
@@ -790,6 +775,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        __be64 *pas;
        void *qpc;
        int err;
+       u16 uid;
 
        err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
        if (err) {
@@ -851,7 +837,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                goto err_umem;
        }
 
-       MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid);
+       uid = (attr->qp_type != IB_QPT_XRC_TGT) ? to_mpd(pd)->uid : 0;
+       MLX5_SET(create_qp_in, *in, uid, uid);
        pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
        if (ubuffer->umem)
                mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
@@ -917,6 +904,30 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
 }
 
+/* get_sq_edge - Get the next nearby edge.
+ *
+ * An 'edge' is defined as the first following address after the end
+ * of the fragment or the SQ. Accordingly, during the WQE construction
+ * which repetitively increases the pointer to write the next data, it
+ * simply should check if it gets to an edge.
+ *
+ * @sq - SQ buffer.
+ * @idx - Stride index in the SQ buffer.
+ *
+ * Return:
+ *     The new edge.
+ */
+static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
+{
+       void *fragment_end;
+
+       fragment_end = mlx5_frag_buf_get_wqe
+               (&sq->fbc,
+                mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx));
+
+       return fragment_end + MLX5_SEND_WQE_BB;
+}
+
 static int create_kernel_qp(struct mlx5_ib_dev *dev,
                            struct ib_qp_init_attr *init_attr,
                            struct mlx5_ib_qp *qp,
@@ -955,13 +966,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
        qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
        base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
 
-       err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
+       err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size,
+                                      &qp->buf, dev->mdev->priv.numa_node);
        if (err) {
                mlx5_ib_dbg(dev, "err %d\n", err);
                return err;
        }
 
-       qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
+       if (qp->rq.wqe_cnt)
+               mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift,
+                             ilog2(qp->rq.wqe_cnt), &qp->rq.fbc);
+
+       if (qp->sq.wqe_cnt) {
+               int sq_strides_offset = (qp->sq.offset  & (PAGE_SIZE - 1)) /
+                                       MLX5_SEND_WQE_BB;
+               mlx5_init_fbc_offset(qp->buf.frags +
+                                    (qp->sq.offset / PAGE_SIZE),
+                                    ilog2(MLX5_SEND_WQE_BB),
+                                    ilog2(qp->sq.wqe_cnt),
+                                    sq_strides_offset, &qp->sq.fbc);
+
+               qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
+       }
+
        *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
                 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
        *in = kvzalloc(*inlen, GFP_KERNEL);
@@ -983,8 +1010,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
                qp->flags |= MLX5_IB_QP_SQPN_QP1;
        }
 
-       mlx5_fill_page_array(&qp->buf,
-                            (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
+       mlx5_fill_page_frag_array(&qp->buf,
+                                 (__be64 *)MLX5_ADDR_OF(create_qp_in,
+                                                        *in, pas));
 
        err = mlx5_db_alloc(dev->mdev, &qp->db);
        if (err) {
@@ -1024,7 +1052,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
        kvfree(*in);
 
 err_buf:
-       mlx5_buf_free(dev->mdev, &qp->buf);
+       mlx5_frag_buf_free(dev->mdev, &qp->buf);
        return err;
 }
 
@@ -1036,7 +1064,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
        kvfree(qp->sq.wr_data);
        kvfree(qp->rq.wrid);
        mlx5_db_free(dev->mdev, &qp->db);
-       mlx5_buf_free(dev->mdev, &qp->buf);
+       mlx5_frag_buf_free(dev->mdev, &qp->buf);
 }
 
 static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
@@ -1889,7 +1917,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                                              MLX5_QP_FLAG_BFREG_INDEX |
                                              MLX5_QP_FLAG_TYPE_DCT |
                                              MLX5_QP_FLAG_TYPE_DCI |
-                                             MLX5_QP_FLAG_ALLOW_SCATTER_CQE))
+                                             MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
+                                             MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE))
                        return -EINVAL;
 
                err = get_qp_user_index(to_mucontext(pd->uobject->context),
@@ -1925,6 +1954,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                        qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
                }
 
+               if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) {
+                       if (init_attr->qp_type != IB_QPT_RC ||
+                               !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) {
+                               mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n");
+                               return -EOPNOTSUPP;
+                       }
+                       qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT;
+               }
+
                if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
                        if (init_attr->qp_type != IB_QPT_UD ||
                            (MLX5_CAP_GEN(dev->mdev, port_type) !=
@@ -2021,7 +2059,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                MLX5_SET(qpc, qpc, cd_slave_send, 1);
        if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
                MLX5_SET(qpc, qpc, cd_slave_receive, 1);
-
+       if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT)
+               MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1);
        if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
                configure_responder_scat_cqe(init_attr, qpc);
                configure_requester_scat_cqe(dev, init_attr,
@@ -2663,7 +2702,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
        if (rate == IB_RATE_PORT_CURRENT)
                return 0;
 
-       if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS)
+       if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
                return -EINVAL;
 
        while (rate != IB_RATE_PORT_CURRENT &&
@@ -3475,6 +3514,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                qp->sq.head = 0;
                qp->sq.tail = 0;
                qp->sq.cur_post = 0;
+               if (qp->sq.wqe_cnt)
+                       qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
                qp->sq.last_poll = 0;
                qp->db.db[MLX5_RCV_DBR] = 0;
                qp->db.db[MLX5_SND_DBR] = 0;
@@ -3515,7 +3556,7 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new
                return is_valid_mask(attr_mask, req, opt);
        } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
                req |= IB_QP_PATH_MTU;
-               opt = IB_QP_PKEY_INDEX;
+               opt = IB_QP_PKEY_INDEX | IB_QP_AV;
                return is_valid_mask(attr_mask, req, opt);
        } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
                req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
@@ -3749,6 +3790,62 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        return err;
 }
 
+static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+                                  u32 wqe_sz, void **cur_edge)
+{
+       u32 idx;
+
+       idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
+       *cur_edge = get_sq_edge(sq, idx);
+
+       *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
+}
+
+/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
+ * next nearby edge and get new address translation for current WQE position.
+ * @sq - SQ buffer.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @cur_edge: Updated current edge.
+ */
+static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+                                        u32 wqe_sz, void **cur_edge)
+{
+       if (likely(*seg != *cur_edge))
+               return;
+
+       _handle_post_send_edge(sq, seg, wqe_sz, cur_edge);
+}
+
+/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
+ * pointers. At the end @seg is aligned to 16B regardless the copied size.
+ * @sq - SQ buffer.
+ * @cur_edge: Updated current edge.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @src: Pointer to copy from.
+ * @n: Number of bytes to copy.
+ */
+static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
+                                  void **seg, u32 *wqe_sz, const void *src,
+                                  size_t n)
+{
+       while (likely(n)) {
+               size_t leftlen = *cur_edge - *seg;
+               size_t copysz = min_t(size_t, leftlen, n);
+               size_t stride;
+
+               memcpy(*seg, src, copysz);
+
+               n -= copysz;
+               src += copysz;
+               stride = !n ? ALIGN(copysz, 16) : copysz;
+               *seg += stride;
+               *wqe_sz += stride >> 4;
+               handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
+       }
+}
+
 static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
 {
        struct mlx5_ib_cq *cq;
@@ -3774,11 +3871,10 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
        rseg->reserved = 0;
 }
 
-static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
-                        const struct ib_send_wr *wr, void *qend,
-                        struct mlx5_ib_qp *qp, int *size)
+static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+                       void **seg, int *size, void **cur_edge)
 {
-       void *seg = eseg;
+       struct mlx5_wqe_eth_seg *eseg = *seg;
 
        memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
 
@@ -3786,45 +3882,41 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
                eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
                                 MLX5_ETH_WQE_L4_CSUM;
 
-       seg += sizeof(struct mlx5_wqe_eth_seg);
-       *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
-
        if (wr->opcode == IB_WR_LSO) {
                struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
-               int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start);
-               u64 left, leftlen, copysz;
+               size_t left, copysz;
                void *pdata = ud_wr->header;
+               size_t stride;
 
                left = ud_wr->hlen;
                eseg->mss = cpu_to_be16(ud_wr->mss);
                eseg->inline_hdr.sz = cpu_to_be16(left);
 
-               /*
-                * check if there is space till the end of queue, if yes,
-                * copy all in one shot, otherwise copy till the end of queue,
-                * rollback and than the copy the left
+               /* memcpy_send_wqe should get a 16B align address. Hence, we
+                * first copy up to the current edge and then, if needed,
+                * fall-through to memcpy_send_wqe.
                 */
-               leftlen = qend - (void *)eseg->inline_hdr.start;
-               copysz = min_t(u64, leftlen, left);
-
-               memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
-
-               if (likely(copysz > size_of_inl_hdr_start)) {
-                       seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
-                       *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
-               }
-
-               if (unlikely(copysz < left)) { /* the last wqe in the queue */
-                       seg = mlx5_get_send_wqe(qp, 0);
+               copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
+                              left);
+               memcpy(eseg->inline_hdr.start, pdata, copysz);
+               stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
+                              sizeof(eseg->inline_hdr.start) + copysz, 16);
+               *size += stride / 16;
+               *seg += stride;
+
+               if (copysz < left) {
+                       handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
                        left -= copysz;
                        pdata += copysz;
-                       memcpy(seg, pdata, left);
-                       seg += ALIGN(left, 16);
-                       *size += ALIGN(left, 16) / 16;
+                       memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
+                                       left);
                }
+
+               return;
        }
 
-       return seg;
+       *seg += sizeof(struct mlx5_wqe_eth_seg);
+       *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
 }
 
 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
@@ -4083,24 +4175,6 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
        dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
 }
 
-static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp,
-                                  struct mlx5_ib_mr *mr, int mr_list_size)
-{
-       void *qend = qp->sq.qend;
-       void *addr = mr->descs;
-       int copy;
-
-       if (unlikely(seg + mr_list_size > qend)) {
-               copy = qend - seg;
-               memcpy(seg, addr, copy);
-               addr += copy;
-               mr_list_size -= copy;
-               seg = mlx5_get_send_wqe(qp, 0);
-       }
-       memcpy(seg, addr, mr_list_size);
-       seg += mr_list_size;
-}
-
 static __be32 send_ieth(const struct ib_send_wr *wr)
 {
        switch (wr->opcode) {
@@ -4134,40 +4208,48 @@ static u8 wq_sig(void *wqe)
 }
 
 static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
-                           void *wqe, int *sz)
+                           void **wqe, int *wqe_sz, void **cur_edge)
 {
        struct mlx5_wqe_inline_seg *seg;
-       void *qend = qp->sq.qend;
-       void *addr;
+       size_t offset;
        int inl = 0;
-       int copy;
-       int len;
        int i;
 
-       seg = wqe;
-       wqe += sizeof(*seg);
+       seg = *wqe;
+       *wqe += sizeof(*seg);
+       offset = sizeof(*seg);
+
        for (i = 0; i < wr->num_sge; i++) {
-               addr = (void *)(unsigned long)(wr->sg_list[i].addr);
-               len  = wr->sg_list[i].length;
+               size_t len  = wr->sg_list[i].length;
+               void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+
                inl += len;
 
                if (unlikely(inl > qp->max_inline_data))
                        return -ENOMEM;
 
-               if (unlikely(wqe + len > qend)) {
-                       copy = qend - wqe;
-                       memcpy(wqe, addr, copy);
-                       addr += copy;
-                       len -= copy;
-                       wqe = mlx5_get_send_wqe(qp, 0);
+               while (likely(len)) {
+                       size_t leftlen;
+                       size_t copysz;
+
+                       handle_post_send_edge(&qp->sq, wqe,
+                                             *wqe_sz + (offset >> 4),
+                                             cur_edge);
+
+                       leftlen = *cur_edge - *wqe;
+                       copysz = min_t(size_t, leftlen, len);
+
+                       memcpy(*wqe, addr, copysz);
+                       len -= copysz;
+                       addr += copysz;
+                       *wqe += copysz;
+                       offset += copysz;
                }
-               memcpy(wqe, addr, len);
-               wqe += len;
        }
 
        seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
 
-       *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
+       *wqe_sz +=  ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
 
        return 0;
 }
@@ -4280,7 +4362,8 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 }
 
 static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
-                               struct mlx5_ib_qp *qp, void **seg, int *size)
+                               struct mlx5_ib_qp *qp, void **seg,
+                               int *size, void **cur_edge)
 {
        struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
        struct ib_mr *sig_mr = wr->sig_mr;
@@ -4364,8 +4447,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
 
        *seg += wqe_size;
        *size += wqe_size / 16;
-       if (unlikely((*seg == qp->sq.qend)))
-               *seg = mlx5_get_send_wqe(qp, 0);
+       handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 
        bsf = *seg;
        ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
@@ -4374,8 +4456,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
 
        *seg += sizeof(*bsf);
        *size += sizeof(*bsf) / 16;
-       if (unlikely((*seg == qp->sq.qend)))
-               *seg = mlx5_get_send_wqe(qp, 0);
+       handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 
        return 0;
 }
@@ -4413,7 +4494,8 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 
 
 static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
-                         struct mlx5_ib_qp *qp, void **seg, int *size)
+                         struct mlx5_ib_qp *qp, void **seg, int *size,
+                         void **cur_edge)
 {
        const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
        struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
@@ -4445,16 +4527,14 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
        set_sig_umr_segment(*seg, xlt_size);
        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
-       if (unlikely((*seg == qp->sq.qend)))
-               *seg = mlx5_get_send_wqe(qp, 0);
+       handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 
        set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
        *seg += sizeof(struct mlx5_mkey_seg);
        *size += sizeof(struct mlx5_mkey_seg) / 16;
-       if (unlikely((*seg == qp->sq.qend)))
-               *seg = mlx5_get_send_wqe(qp, 0);
+       handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 
-       ret = set_sig_data_segment(wr, qp, seg, size);
+       ret = set_sig_data_segment(wr, qp, seg, size, cur_edge);
        if (ret)
                return ret;
 
@@ -4491,11 +4571,11 @@ static int set_psv_wr(struct ib_sig_domain *domain,
 
 static int set_reg_wr(struct mlx5_ib_qp *qp,
                      const struct ib_reg_wr *wr,
-                     void **seg, int *size)
+                     void **seg, int *size, void **cur_edge)
 {
        struct mlx5_ib_mr *mr = to_mmr(wr->mr);
        struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
-       int mr_list_size = mr->ndescs * mr->desc_size;
+       size_t mr_list_size = mr->ndescs * mr->desc_size;
        bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
 
        if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
@@ -4507,18 +4587,17 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
        set_reg_umr_seg(*seg, mr, umr_inline);
        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
-       if (unlikely((*seg == qp->sq.qend)))
-               *seg = mlx5_get_send_wqe(qp, 0);
+       handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 
        set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
        *seg += sizeof(struct mlx5_mkey_seg);
        *size += sizeof(struct mlx5_mkey_seg) / 16;
-       if (unlikely((*seg == qp->sq.qend)))
-               *seg = mlx5_get_send_wqe(qp, 0);
+       handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 
        if (umr_inline) {
-               set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size);
-               *size += get_xlt_octo(mr_list_size);
+               memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
+                               mr_list_size);
+               *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
        } else {
                set_reg_data_seg(*seg, mr, pd);
                *seg += sizeof(struct mlx5_wqe_data_seg);
@@ -4527,32 +4606,31 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
        return 0;
 }
 
-static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
+static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
+                       void **cur_edge)
 {
        set_linv_umr_seg(*seg);
        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
-       if (unlikely((*seg == qp->sq.qend)))
-               *seg = mlx5_get_send_wqe(qp, 0);
+       handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
        set_linv_mkey_seg(*seg);
        *seg += sizeof(struct mlx5_mkey_seg);
        *size += sizeof(struct mlx5_mkey_seg) / 16;
-       if (unlikely((*seg == qp->sq.qend)))
-               *seg = mlx5_get_send_wqe(qp, 0);
+       handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 }
 
-static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
+static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
 {
        __be32 *p = NULL;
-       int tidx = idx;
+       u32 tidx = idx;
        int i, j;
 
-       pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
+       pr_debug("dump WQE index %u:\n", idx);
        for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
                if ((i & 0xf) == 0) {
-                       void *buf = mlx5_get_send_wqe(qp, tidx);
                        tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
-                       p = buf;
+                       p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx);
+                       pr_debug("WQBB at %p:\n", (void *)p);
                        j = 0;
                }
                pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
@@ -4562,15 +4640,16 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
 }
 
 static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
-                    struct mlx5_wqe_ctrl_seg **ctrl,
-                    const struct ib_send_wr *wr, unsigned *idx,
-                    int *size, int nreq, bool send_signaled, bool solicited)
+                      struct mlx5_wqe_ctrl_seg **ctrl,
+                      const struct ib_send_wr *wr, unsigned int *idx,
+                      int *size, void **cur_edge, int nreq,
+                      bool send_signaled, bool solicited)
 {
        if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
                return -ENOMEM;
 
        *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
-       *seg = mlx5_get_send_wqe(qp, *idx);
+       *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
        *ctrl = *seg;
        *(uint32_t *)(*seg + 8) = 0;
        (*ctrl)->imm = send_ieth(wr);
@@ -4580,6 +4659,7 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 
        *seg += sizeof(**ctrl);
        *size = sizeof(**ctrl) / 16;
+       *cur_edge = qp->sq.cur_edge;
 
        return 0;
 }
@@ -4587,17 +4667,18 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
                     struct mlx5_wqe_ctrl_seg **ctrl,
                     const struct ib_send_wr *wr, unsigned *idx,
-                    int *size, int nreq)
+                    int *size, void **cur_edge, int nreq)
 {
-       return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq,
+       return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
                           wr->send_flags & IB_SEND_SIGNALED,
                           wr->send_flags & IB_SEND_SOLICITED);
 }
 
 static void finish_wqe(struct mlx5_ib_qp *qp,
                       struct mlx5_wqe_ctrl_seg *ctrl,
-                      u8 size, unsigned idx, u64 wr_id,
-                      int nreq, u8 fence, u32 mlx5_opcode)
+                      void *seg, u8 size, void *cur_edge,
+                      unsigned int idx, u64 wr_id, int nreq, u8 fence,
+                      u32 mlx5_opcode)
 {
        u8 opmod = 0;
 
@@ -4613,6 +4694,15 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
        qp->sq.wqe_head[idx] = qp->sq.head + nreq;
        qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
        qp->sq.w_list[idx].next = qp->sq.cur_post;
+
+       /* We save the edge which was possibly updated during the WQE
+        * construction, into SQ's cache.
+        */
+       seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
+       qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
+                         get_sq_edge(&qp->sq, qp->sq.cur_post &
+                                     (qp->sq.wqe_cnt - 1)) :
+                         cur_edge;
 }
 
 static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
@@ -4623,11 +4713,10 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
        struct mlx5_core_dev *mdev = dev->mdev;
        struct mlx5_ib_qp *qp;
        struct mlx5_ib_mr *mr;
-       struct mlx5_wqe_data_seg *dpseg;
        struct mlx5_wqe_xrc_seg *xrc;
        struct mlx5_bf *bf;
+       void *cur_edge;
        int uninitialized_var(size);
-       void *qend;
        unsigned long flags;
        unsigned idx;
        int err = 0;
@@ -4649,7 +4738,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 
        qp = to_mqp(ibqp);
        bf = &qp->bf;
-       qend = qp->sq.qend;
 
        spin_lock_irqsave(&qp->sq.lock, flags);
 
@@ -4669,7 +4757,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                        goto out;
                }
 
-               err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
+               err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
+                               nreq);
                if (err) {
                        mlx5_ib_warn(dev, "\n");
                        err = -ENOMEM;
@@ -4719,14 +4808,15 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                        case IB_WR_LOCAL_INV:
                                qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
                                ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
-                               set_linv_wr(qp, &seg, &size);
+                               set_linv_wr(qp, &seg, &size, &cur_edge);
                                num_sge = 0;
                                break;
 
                        case IB_WR_REG_MR:
                                qp->sq.wr_data[idx] = IB_WR_REG_MR;
                                ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
-                               err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
+                               err = set_reg_wr(qp, reg_wr(wr), &seg, &size,
+                                                &cur_edge);
                                if (err) {
                                        *bad_wr = wr;
                                        goto out;
@@ -4739,21 +4829,24 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                                mr = to_mmr(sig_handover_wr(wr)->sig_mr);
 
                                ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
-                               err = set_sig_umr_wr(wr, qp, &seg, &size);
+                               err = set_sig_umr_wr(wr, qp, &seg, &size,
+                                                    &cur_edge);
                                if (err) {
                                        mlx5_ib_warn(dev, "\n");
                                        *bad_wr = wr;
                                        goto out;
                                }
 
-                               finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
-                                          fence, MLX5_OPCODE_UMR);
+                               finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+                                          wr->wr_id, nreq, fence,
+                                          MLX5_OPCODE_UMR);
                                /*
                                 * SET_PSV WQEs are not signaled and solicited
                                 * on error
                                 */
                                err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
-                                                 &size, nreq, false, true);
+                                                 &size, &cur_edge, nreq, false,
+                                                 true);
                                if (err) {
                                        mlx5_ib_warn(dev, "\n");
                                        err = -ENOMEM;
@@ -4770,10 +4863,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                                        goto out;
                                }
 
-                               finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
-                                          fence, MLX5_OPCODE_SET_PSV);
+                               finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+                                          wr->wr_id, nreq, fence,
+                                          MLX5_OPCODE_SET_PSV);
                                err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
-                                                 &size, nreq, false, true);
+                                                 &size, &cur_edge, nreq, false,
+                                                 true);
                                if (err) {
                                        mlx5_ib_warn(dev, "\n");
                                        err = -ENOMEM;
@@ -4790,8 +4885,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                                        goto out;
                                }
 
-                               finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
-                                          fence, MLX5_OPCODE_SET_PSV);
+                               finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+                                          wr->wr_id, nreq, fence,
+                                          MLX5_OPCODE_SET_PSV);
                                qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
                                num_sge = 0;
                                goto skip_psv;
@@ -4828,16 +4924,14 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                        set_datagram_seg(seg, wr);
                        seg += sizeof(struct mlx5_wqe_datagram_seg);
                        size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
-                       if (unlikely((seg == qend)))
-                               seg = mlx5_get_send_wqe(qp, 0);
+                       handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
+
                        break;
                case IB_QPT_UD:
                        set_datagram_seg(seg, wr);
                        seg += sizeof(struct mlx5_wqe_datagram_seg);
                        size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
-
-                       if (unlikely((seg == qend)))
-                               seg = mlx5_get_send_wqe(qp, 0);
+                       handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
 
                        /* handle qp that supports ud offload */
                        if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
@@ -4847,11 +4941,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                                memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
                                seg += sizeof(struct mlx5_wqe_eth_pad);
                                size += sizeof(struct mlx5_wqe_eth_pad) / 16;
-
-                               seg = set_eth_seg(seg, wr, qend, qp, &size);
-
-                               if (unlikely((seg == qend)))
-                                       seg = mlx5_get_send_wqe(qp, 0);
+                               set_eth_seg(wr, qp, &seg, &size, &cur_edge);
+                               handle_post_send_edge(&qp->sq, &seg, size,
+                                                     &cur_edge);
                        }
                        break;
                case MLX5_IB_QPT_REG_UMR:
@@ -4867,13 +4959,11 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                                goto out;
                        seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
                        size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
-                       if (unlikely((seg == qend)))
-                               seg = mlx5_get_send_wqe(qp, 0);
+                       handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
                        set_reg_mkey_segment(seg, wr);
                        seg += sizeof(struct mlx5_mkey_seg);
                        size += sizeof(struct mlx5_mkey_seg) / 16;
-                       if (unlikely((seg == qend)))
-                               seg = mlx5_get_send_wqe(qp, 0);
+                       handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
                        break;
 
                default:
@@ -4881,33 +4971,29 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                }
 
                if (wr->send_flags & IB_SEND_INLINE && num_sge) {
-                       int uninitialized_var(sz);
-
-                       err = set_data_inl_seg(qp, wr, seg, &sz);
+                       err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
                        if (unlikely(err)) {
                                mlx5_ib_warn(dev, "\n");
                                *bad_wr = wr;
                                goto out;
                        }
-                       size += sz;
                } else {
-                       dpseg = seg;
                        for (i = 0; i < num_sge; i++) {
-                               if (unlikely(dpseg == qend)) {
-                                       seg = mlx5_get_send_wqe(qp, 0);
-                                       dpseg = seg;
-                               }
+                               handle_post_send_edge(&qp->sq, &seg, size,
+                                                     &cur_edge);
                                if (likely(wr->sg_list[i].length)) {
-                                       set_data_ptr_seg(dpseg, wr->sg_list + i);
+                                       set_data_ptr_seg
+                                       ((struct mlx5_wqe_data_seg *)seg,
+                                        wr->sg_list + i);
                                        size += sizeof(struct mlx5_wqe_data_seg) / 16;
-                                       dpseg++;
+                                       seg += sizeof(struct mlx5_wqe_data_seg);
                                }
                        }
                }
 
                qp->next_fence = next_fence;
-               finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence,
-                          mlx5_ib_opcode[wr->opcode]);
+               finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
+                          fence, mlx5_ib_opcode[wr->opcode]);
 skip_psv:
                if (0)
                        dump_wqe(qp, idx, size);
@@ -4993,7 +5079,7 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
                        goto out;
                }
 
-               scat = get_recv_wqe(qp, ind);
+               scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
                if (qp->wq_sig)
                        scat++;
 
@@ -5441,7 +5527,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
        struct mlx5_ib_xrcd *xrcd;
        int err;
-       u16 uid;
 
        if (!MLX5_CAP_GEN(dev->mdev, xrc))
                return ERR_PTR(-ENOSYS);
@@ -5450,14 +5535,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
        if (!xrcd)
                return ERR_PTR(-ENOMEM);
 
-       uid = context ? to_mucontext(context)->devx_uid : 0;
-       err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid);
+       err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
        if (err) {
                kfree(xrcd);
                return ERR_PTR(-ENOMEM);
        }
 
-       xrcd->uid = uid;
        return &xrcd->ibxrcd;
 }
 
@@ -5465,10 +5548,9 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
 {
        struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
        u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
-       u16 uid =  to_mxrcd(xrcd)->uid;
        int err;
 
-       err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid);
+       err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
        if (err)
                mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
 
index d012e7dbcc38150053a5fc71fca8f41f3ea5421f..0413b10dea71cee43b604f1a23fc382fae5a0e87 100644 (file)
@@ -1,50 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2018, Mellanox Technologies inc.  All rights reserved.
  */
 
 #include <linux/module.h>
 #include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
 #include <linux/slab.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
-
 #include "mlx5_ib.h"
-
-/* not supported currently */
-static int srq_signature;
+#include "srq.h"
 
 static void *get_wqe(struct mlx5_ib_srq *srq, int n)
 {
-       return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
+       return mlx5_frag_buf_get_wqe(&srq->fbc, n);
 }
 
 static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
@@ -144,7 +113,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 
        in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
        in->page_offset = offset;
-       in->uid = to_mpd(pd)->uid;
+       in->uid = (in->type != IB_SRQT_XRC) ?  to_mpd(pd)->uid : 0;
        if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
            in->type != IB_SRQT_BASIC)
                in->user_index = uidx;
@@ -173,12 +142,16 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
                return err;
        }
 
-       if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) {
+       if (mlx5_frag_buf_alloc_node(dev->mdev, buf_size, &srq->buf,
+                                    dev->mdev->priv.numa_node)) {
                mlx5_ib_dbg(dev, "buf alloc failed\n");
                err = -ENOMEM;
                goto err_db;
        }
 
+       mlx5_init_fbc(srq->buf.frags, srq->msrq.wqe_shift, ilog2(srq->msrq.max),
+                     &srq->fbc);
+
        srq->head    = 0;
        srq->tail    = srq->msrq.max - 1;
        srq->wqe_ctr = 0;
@@ -195,14 +168,14 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
                err = -ENOMEM;
                goto err_buf;
        }
-       mlx5_fill_page_array(&srq->buf, in->pas);
+       mlx5_fill_page_frag_array(&srq->buf, in->pas);
 
        srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL);
        if (!srq->wrid) {
                err = -ENOMEM;
                goto err_in;
        }
-       srq->wq_sig = !!srq_signature;
+       srq->wq_sig = 0;
 
        in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
        if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
@@ -215,7 +188,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
        kvfree(in->pas);
 
 err_buf:
-       mlx5_buf_free(dev->mdev, &srq->buf);
+       mlx5_frag_buf_free(dev->mdev, &srq->buf);
 
 err_db:
        mlx5_db_free(dev->mdev, &srq->db);
@@ -232,7 +205,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
 static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
 {
        kvfree(srq->wrid);
-       mlx5_buf_free(dev->mdev, &srq->buf);
+       mlx5_frag_buf_free(dev->mdev, &srq->buf);
        mlx5_db_free(dev->mdev, &srq->db);
 }
 
@@ -327,7 +300,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 
        in.pd = to_mpd(pd)->pdn;
        in.db_record = srq->db.dma;
-       err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
+       err = mlx5_cmd_create_srq(dev, &srq->msrq, &in);
        kvfree(in.pas);
        if (err) {
                mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
@@ -351,7 +324,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
        return &srq->ibsrq;
 
 err_core:
-       mlx5_core_destroy_srq(dev->mdev, &srq->msrq);
+       mlx5_cmd_destroy_srq(dev, &srq->msrq);
 
 err_usr_kern_srq:
        if (pd->uobject)
@@ -381,7 +354,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
                        return -EINVAL;
 
                mutex_lock(&srq->mutex);
-               ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1);
+               ret = mlx5_cmd_arm_srq(dev, &srq->msrq, attr->srq_limit, 1);
                mutex_unlock(&srq->mutex);
 
                if (ret)
@@ -402,7 +375,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
        if (!out)
                return -ENOMEM;
 
-       ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out);
+       ret = mlx5_cmd_query_srq(dev, &srq->msrq, out);
        if (ret)
                goto out_box;
 
@@ -420,7 +393,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
        struct mlx5_ib_dev *dev = to_mdev(srq->device);
        struct mlx5_ib_srq *msrq = to_msrq(srq);
 
-       mlx5_core_destroy_srq(dev->mdev, &msrq->msrq);
+       mlx5_cmd_destroy_srq(dev, &msrq->msrq);
 
        if (srq->uobject) {
                mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h
new file mode 100644 (file)
index 0000000..75eb583
--- /dev/null
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef MLX5_IB_SRQ_H
+#define MLX5_IB_SRQ_H
+
+enum {
+       MLX5_SRQ_FLAG_ERR    = (1 << 0),
+       MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
+       MLX5_SRQ_FLAG_RNDV   = (1 << 2),
+};
+
+struct mlx5_srq_attr {
+       u32 type;
+       u32 flags;
+       u32 log_size;
+       u32 wqe_shift;
+       u32 log_page_size;
+       u32 wqe_cnt;
+       u32 srqn;
+       u32 xrcd;
+       u32 page_offset;
+       u32 cqn;
+       u32 pd;
+       u32 lwm;
+       u32 user_index;
+       u64 db_record;
+       __be64 *pas;
+       u32 tm_log_list_size;
+       u32 tm_next_tag;
+       u32 tm_hw_phase_cnt;
+       u32 tm_sw_phase_cnt;
+       u16 uid;
+};
+
+struct mlx5_ib_dev;
+
+struct mlx5_core_srq {
+       struct mlx5_core_rsc_common common; /* must be first */
+       u32 srqn;
+       int max;
+       size_t max_gs;
+       size_t max_avail_gather;
+       int wqe_shift;
+       void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e);
+
+       atomic_t refcount;
+       struct completion free;
+       u16 uid;
+};
+
+struct mlx5_srq_table {
+       struct notifier_block nb;
+       /* protect radix tree
+        */
+       spinlock_t lock;
+       struct radix_tree_root tree;
+};
+
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                       struct mlx5_srq_attr *in);
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                      struct mlx5_srq_attr *out);
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                    u16 lwm, int is_srq);
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn);
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev);
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev);
+#endif /* MLX5_IB_SRQ_H */
similarity index 71%
rename from drivers/net/ethernet/mellanox/mlx5/core/srq.c
rename to drivers/infiniband/hw/mlx5/srq_cmd.c
index 6a6fc9be01e69f77c49e7d22f2f065ba57eddb53..7aaaffbd4afa099f36b229af88beb73836fce68e 100644 (file)
@@ -1,67 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2018, Mellanox Technologies inc.  All rights reserved.
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
-#include <linux/mlx5/srq.h>
-#include <rdma/ib_verbs.h>
-#include "mlx5_core.h"
-#include <linux/mlx5/transobj.h>
-
-void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
-{
-       struct mlx5_srq_table *table = &dev->priv.srq_table;
-       struct mlx5_core_srq *srq;
-
-       spin_lock(&table->lock);
-
-       srq = radix_tree_lookup(&table->tree, srqn);
-       if (srq)
-               atomic_inc(&srq->refcount);
-
-       spin_unlock(&table->lock);
-
-       if (!srq) {
-               mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
-               return;
-       }
-
-       srq->event(srq, event_type);
-
-       if (atomic_dec_and_test(&srq->refcount))
-               complete(&srq->free);
-}
+#include "mlx5_ib.h"
+#include "srq.h"
 
 static int get_pas_size(struct mlx5_srq_attr *in)
 {
@@ -132,9 +78,9 @@ static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
        in->db_record     = MLX5_GET64(srqc, srqc, dbr_addr);
 }
 
-struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
 {
-       struct mlx5_srq_table *table = &dev->priv.srq_table;
+       struct mlx5_srq_table *table = &dev->srq_table;
        struct mlx5_core_srq *srq;
 
        spin_lock(&table->lock);
@@ -147,9 +93,8 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
 
        return srq;
 }
-EXPORT_SYMBOL(mlx5_core_get_srq);
 
-static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                          struct mlx5_srq_attr *in)
 {
        u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
@@ -176,7 +121,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        MLX5_SET(create_srq_in, create_in, opcode,
                 MLX5_CMD_OP_CREATE_SRQ);
 
-       err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+       err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
                            sizeof(create_out));
        kvfree(create_in);
        if (!err) {
@@ -187,8 +132,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        return err;
 }
 
-static int destroy_srq_cmd(struct mlx5_core_dev *dev,
-                          struct mlx5_core_srq *srq)
+static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
 {
        u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
        u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
@@ -198,11 +142,11 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev,
        MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
        MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
 
-       return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
-                            srq_out, sizeof(srq_out));
+       return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+                            sizeof(srq_out));
 }
 
-static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                       u16 lwm, int is_srq)
 {
        u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
@@ -214,11 +158,11 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        MLX5_SET(arm_rq_in, srq_in, lwm,      lwm);
        MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
 
-       return  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
-                             srq_out, sizeof(srq_out));
+       return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+                            sizeof(srq_out));
 }
 
-static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                         struct mlx5_srq_attr *out)
 {
        u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
@@ -233,8 +177,8 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        MLX5_SET(query_srq_in, srq_in, opcode,
                 MLX5_CMD_OP_QUERY_SRQ);
        MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
-       err =  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
-                            srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
+       err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+                           MLX5_ST_SZ_BYTES(query_srq_out));
        if (err)
                goto out;
 
@@ -247,7 +191,7 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        return err;
 }
 
-static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
+static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
                              struct mlx5_core_srq *srq,
                              struct mlx5_srq_attr *in)
 {
@@ -277,7 +221,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
                 MLX5_CMD_OP_CREATE_XRC_SRQ);
 
        memset(create_out, 0, sizeof(create_out));
-       err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+       err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
                            sizeof(create_out));
        if (err)
                goto out;
@@ -289,7 +233,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
        return err;
 }
 
-static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
+static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev,
                               struct mlx5_core_srq *srq)
 {
        u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
@@ -300,12 +244,12 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
        MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
        MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
 
-       return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+       return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
                             xrcsrq_out, sizeof(xrcsrq_out));
 }
 
-static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
-                          struct mlx5_core_srq *srq, u16 lwm)
+static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                          u16 lwm)
 {
        u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
        u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
@@ -316,11 +260,11 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
        MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm,      lwm);
        MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
 
-       return  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+       return  mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
                              xrcsrq_out, sizeof(xrcsrq_out));
 }
 
-static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
+static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev,
                             struct mlx5_core_srq *srq,
                             struct mlx5_srq_attr *out)
 {
@@ -338,8 +282,8 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
                 MLX5_CMD_OP_QUERY_XRC_SRQ);
        MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
 
-       err =  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
-                            MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+       err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
+                           xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out));
        if (err)
                goto out;
 
@@ -354,21 +298,27 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
        return err;
 }
 
-static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                          struct mlx5_srq_attr *in)
 {
-       void *create_in;
+       void *create_out = NULL;
+       void *create_in = NULL;
        void *rmpc;
        void *wq;
        int pas_size;
+       int outlen;
        int inlen;
        int err;
 
        pas_size = get_pas_size(in);
        inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
+       outlen = MLX5_ST_SZ_BYTES(create_rmp_out);
        create_in = kvzalloc(inlen, GFP_KERNEL);
-       if (!create_in)
-               return -ENOMEM;
+       create_out = kvzalloc(outlen, GFP_KERNEL);
+       if (!create_in || !create_out) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
        wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
@@ -378,16 +328,20 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        set_wq(wq, in);
        memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
 
-       err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
-       if (!err)
+       MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP);
+       err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen);
+       if (!err) {
+               srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn);
                srq->uid = in->uid;
+       }
 
+out:
        kvfree(create_in);
+       kvfree(create_out);
        return err;
 }
 
-static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
-                          struct mlx5_core_srq *srq)
+static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
 {
        u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]   = {};
        u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
@@ -395,22 +349,30 @@ static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
        MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
        MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
        MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
-static int arm_rmp_cmd(struct mlx5_core_dev *dev,
-                      struct mlx5_core_srq *srq,
+static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                       u16 lwm)
 {
-       void *in;
+       void *out = NULL;
+       void *in = NULL;
        void *rmpc;
        void *wq;
        void *bitmask;
+       int outlen;
+       int inlen;
        int err;
 
-       in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
+       inlen = MLX5_ST_SZ_BYTES(modify_rmp_in);
+       outlen = MLX5_ST_SZ_BYTES(modify_rmp_out);
+
+       in = kvzalloc(inlen, GFP_KERNEL);
+       out = kvzalloc(outlen, GFP_KERNEL);
+       if (!in || !out) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        rmpc =    MLX5_ADDR_OF(modify_rmp_in,   in,   ctx);
        bitmask = MLX5_ADDR_OF(modify_rmp_in,   in,   bitmask);
@@ -422,25 +384,39 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev,
        MLX5_SET(wq,            wq,      lwm,       lwm);
        MLX5_SET(rmp_bitmask,   bitmask, lwm,       1);
        MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+       MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
 
-       err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
+       err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen);
 
+out:
        kvfree(in);
+       kvfree(out);
        return err;
 }
 
-static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                         struct mlx5_srq_attr *out)
 {
-       u32 *rmp_out;
+       u32 *rmp_out = NULL;
+       u32 *rmp_in = NULL;
        void *rmpc;
+       int outlen;
+       int inlen;
        int err;
 
-       rmp_out =  kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
-       if (!rmp_out)
-               return -ENOMEM;
+       outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
+       inlen = MLX5_ST_SZ_BYTES(query_rmp_in);
 
-       err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out);
+       rmp_out = kvzalloc(outlen, GFP_KERNEL);
+       rmp_in = kvzalloc(inlen, GFP_KERNEL);
+       if (!rmp_out || !rmp_in) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP);
+       MLX5_SET(query_rmp_in, rmp_in, rmpn,   srq->srqn);
+       err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen);
        if (err)
                goto out;
 
@@ -451,10 +427,11 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 
 out:
        kvfree(rmp_out);
+       kvfree(rmp_in);
        return err;
 }
 
-static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                          struct mlx5_srq_attr *in)
 {
        u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
@@ -489,7 +466,7 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        MLX5_SET(xrqc, xrqc, cqn, in->cqn);
        MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
        MLX5_SET(create_xrq_in, create_in, uid, in->uid);
-       err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+       err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
                            sizeof(create_out));
        kvfree(create_in);
        if (!err) {
@@ -500,7 +477,7 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        return err;
 }
 
-static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
+static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
 {
        u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
        u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
@@ -509,10 +486,10 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
        MLX5_SET(destroy_xrq_in, in, xrqn,   srq->srqn);
        MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
 
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
-static int arm_xrq_cmd(struct mlx5_core_dev *dev,
+static int arm_xrq_cmd(struct mlx5_ib_dev *dev,
                       struct mlx5_core_srq *srq,
                       u16 lwm)
 {
@@ -525,10 +502,10 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev,
        MLX5_SET(arm_rq_in, in, lwm,        lwm);
        MLX5_SET(arm_rq_in, in, uid, srq->uid);
 
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
-static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                         struct mlx5_srq_attr *out)
 {
        u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
@@ -544,7 +521,7 @@ static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
        MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
 
-       err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen);
+       err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen);
        if (err)
                goto out;
 
@@ -567,11 +544,10 @@ static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        return err;
 }
 
-static int create_srq_split(struct mlx5_core_dev *dev,
-                           struct mlx5_core_srq *srq,
+static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
                            struct mlx5_srq_attr *in)
 {
-       if (!dev->issi)
+       if (!dev->mdev->issi)
                return create_srq_cmd(dev, srq, in);
        switch (srq->common.res) {
        case MLX5_RES_XSRQ:
@@ -583,10 +559,9 @@ static int create_srq_split(struct mlx5_core_dev *dev,
        }
 }
 
-static int destroy_srq_split(struct mlx5_core_dev *dev,
-                            struct mlx5_core_srq *srq)
+static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
 {
-       if (!dev->issi)
+       if (!dev->mdev->issi)
                return destroy_srq_cmd(dev, srq);
        switch (srq->common.res) {
        case MLX5_RES_XSRQ:
@@ -598,11 +573,11 @@ static int destroy_srq_split(struct mlx5_core_dev *dev,
        }
 }
 
-int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                        struct mlx5_srq_attr *in)
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                       struct mlx5_srq_attr *in)
 {
+       struct mlx5_srq_table *table = &dev->srq_table;
        int err;
-       struct mlx5_srq_table *table = &dev->priv.srq_table;
 
        switch (in->type) {
        case IB_SRQT_XRC:
@@ -625,10 +600,8 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        spin_lock_irq(&table->lock);
        err = radix_tree_insert(&table->tree, srq->srqn, srq);
        spin_unlock_irq(&table->lock);
-       if (err) {
-               mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
+       if (err)
                goto err_destroy_srq_split;
-       }
 
        return 0;
 
@@ -637,25 +610,18 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 
        return err;
 }
-EXPORT_SYMBOL(mlx5_core_create_srq);
 
-int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
 {
-       struct mlx5_srq_table *table = &dev->priv.srq_table;
+       struct mlx5_srq_table *table = &dev->srq_table;
        struct mlx5_core_srq *tmp;
        int err;
 
        spin_lock_irq(&table->lock);
        tmp = radix_tree_delete(&table->tree, srq->srqn);
        spin_unlock_irq(&table->lock);
-       if (!tmp) {
-               mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
-               return -EINVAL;
-       }
-       if (tmp != srq) {
-               mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
+       if (!tmp || tmp != srq)
                return -EINVAL;
-       }
 
        err = destroy_srq_split(dev, srq);
        if (err)
@@ -667,12 +633,11 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
 
        return 0;
 }
-EXPORT_SYMBOL(mlx5_core_destroy_srq);
 
-int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                       struct mlx5_srq_attr *out)
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                      struct mlx5_srq_attr *out)
 {
-       if (!dev->issi)
+       if (!dev->mdev->issi)
                return query_srq_cmd(dev, srq, out);
        switch (srq->common.res) {
        case MLX5_RES_XSRQ:
@@ -683,12 +648,11 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
                return query_rmp_cmd(dev, srq, out);
        }
 }
-EXPORT_SYMBOL(mlx5_core_query_srq);
 
-int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                     u16 lwm, int is_srq)
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                    u16 lwm, int is_srq)
 {
-       if (!dev->issi)
+       if (!dev->mdev->issi)
                return arm_srq_cmd(dev, srq, lwm, is_srq);
        switch (srq->common.res) {
        case MLX5_RES_XSRQ:
@@ -699,18 +663,60 @@ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
                return arm_rmp_cmd(dev, srq, lwm);
        }
 }
-EXPORT_SYMBOL(mlx5_core_arm_srq);
 
-void mlx5_init_srq_table(struct mlx5_core_dev *dev)
+static int srq_event_notifier(struct notifier_block *nb,
+                             unsigned long type, void *data)
+{
+       struct mlx5_srq_table *table;
+       struct mlx5_core_srq *srq;
+       struct mlx5_eqe *eqe;
+       u32 srqn;
+
+       if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR &&
+           type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)
+               return NOTIFY_DONE;
+
+       table = container_of(nb, struct mlx5_srq_table, nb);
+
+       eqe = data;
+       srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+
+       spin_lock(&table->lock);
+
+       srq = radix_tree_lookup(&table->tree, srqn);
+       if (srq)
+               atomic_inc(&srq->refcount);
+
+       spin_unlock(&table->lock);
+
+       if (!srq)
+               return NOTIFY_OK;
+
+       srq->event(srq, eqe->type);
+
+       if (atomic_dec_and_test(&srq->refcount))
+               complete(&srq->free);
+
+       return NOTIFY_OK;
+}
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev)
 {
-       struct mlx5_srq_table *table = &dev->priv.srq_table;
+       struct mlx5_srq_table *table = &dev->srq_table;
 
        memset(table, 0, sizeof(*table));
        spin_lock_init(&table->lock);
        INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+
+       table->nb.notifier_call = srq_event_notifier;
+       mlx5_notifier_register(dev->mdev, &table->nb);
+
+       return 0;
 }
 
-void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev)
 {
-       /* nothing */
+       struct mlx5_srq_table *table = &dev->srq_table;
+
+       mlx5_notifier_unregister(dev->mdev, &table->nb);
 }
index fb1ff59f40bd61a3a20e724d45e996be33fb6b0b..cdbf707fa2671d62574b953c25dd0725745f4ea6 100644 (file)
@@ -3237,7 +3237,6 @@ static int init_6120_variables(struct qib_devdata *dd)
        /* we always allocate at least 2048 bytes for eager buffers */
        ret = ib_mtu_enum_to_int(qib_ibmtu);
        dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
-       BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
        dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
 
        qib_6120_tidtemplate(dd);
index 163a57a88742891f88748a4ffdf2cfe15a83b38a..9fde45538f6e9b309b5e19c57210a59a5a6f29c7 100644 (file)
@@ -4043,7 +4043,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
        /* we always allocate at least 2048 bytes for eager buffers */
        ret = ib_mtu_enum_to_int(qib_ibmtu);
        dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
-       BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
        dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
 
        qib_7220_tidtemplate(dd);
@@ -4252,7 +4251,6 @@ static int init_sdma_7220_regs(struct qib_pportdata *ppd)
                unsigned word = i / 64;
                unsigned bit = i & 63;
 
-               BUG_ON(word >= 3);
                senddmabufmask[word] |= 1ULL << bit;
        }
        qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]);
index bf5e222eed8e61fcc66a7d59e9aed89e603f1c17..17d6b24b34736a89a832d7df1555e3aa9a6708d8 100644 (file)
@@ -1382,7 +1382,6 @@ static void err_decode(char *msg, size_t len, u64 errs,
                                        *msg++ = ',';
                                        len--;
                                }
-                               BUG_ON(!msp->sz);
                                /* msp->sz counts the nul */
                                took = min_t(size_t, msp->sz - (size_t)1, len);
                                memcpy(msg,  msp->msg, took);
@@ -6599,7 +6598,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
 
        /* we always allocate at least 2048 bytes for eager buffers */
        dd->rcvegrbufsize = max(mtu, 2048);
-       BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
        dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
 
        qib_7322_tidtemplate(dd);
@@ -6904,7 +6902,6 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd)
                unsigned word = erstbuf / BITS_PER_LONG;
                unsigned bit = erstbuf & (BITS_PER_LONG - 1);
 
-               BUG_ON(word >= 3);
                senddmabufmask[word] |= 1ULL << bit;
        }
        qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]);
index d7cdc77d630648f16edaba49d421d209d071c398..9fd69903ca57c6aa49dac064d80f6e9ef30790b1 100644 (file)
@@ -209,7 +209,6 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
                rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt +
                        rcd->rcvegrbufs_perchunk - 1) /
                        rcd->rcvegrbufs_perchunk;
-               BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk));
                rcd->rcvegrbufs_perchunk_shift =
                        ilog2(rcd->rcvegrbufs_perchunk);
        }
index 30595b358d8f8a6eddbfaab799067e29e8fc259b..864f2af171f70278106b58bb934ad4936da502be 100644 (file)
@@ -387,7 +387,7 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
 
 static int qib_pcie_coalesce;
 module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
-MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
+MODULE_PARM_DESC(pcie_coalesce, "tune PCIe coalescing on some Intel chipsets");
 
 /*
  * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300
index 757d4c9d713ddab9366b8e918b2e28d83cc6f440..0b6ca424c11d26a204a0d9c6c1fad68526d69ed6 100644 (file)
@@ -572,7 +572,6 @@ int qib_sdma_verbs_send(struct qib_pportdata *ppd,
                        len = sge->length;
                if (len > sge->sge_length)
                        len = sge->sge_length;
-               BUG_ON(len == 0);
                dw = (len + 3) >> 2;
                addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr,
                                      dw << 2, DMA_TO_DEVICE);
index 4d4c31ea4e2d4ae79f9e8dbf668a0de0a7c6500d..868da0ece7ba6bd3a06684d267084d941a32eb0a 100644 (file)
@@ -178,7 +178,6 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
                        len = length;
                if (len > sge->sge_length)
                        len = sge->sge_length;
-               BUG_ON(len == 0);
                rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
                sge->vaddr += len;
                sge->length -= len;
index 926f3c8eba69fa6e07f86a3777fb0d89fc92d823..31c523b2a9f5722b3b7e1588a0667018c6a8182e 100644 (file)
@@ -237,7 +237,6 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
 
                ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
                                        sdma_rb_node);
-               BUG_ON(ret == 0);
        }
        pq->sdma_rb_node = sdma_rb_node;
 
index 4b0f5761a6462c7598ad4d9a24669c8ef03bf26a..8914abdd758426ca492f497bddeeef9facf3b305 100644 (file)
@@ -150,7 +150,6 @@ static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
                        len = length;
                if (len > sge.sge_length)
                        len = sge.sge_length;
-               BUG_ON(len == 0);
                if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
                    (len != length && (len & (sizeof(u32) - 1)))) {
                        ndesc = 0;
@@ -193,7 +192,6 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
                        len = length;
                if (len > sge->sge_length)
                        len = sge->sge_length;
-               BUG_ON(len == 0);
                memcpy(data, sge->vaddr, len);
                sge->vaddr += len;
                sge->length -= len;
@@ -449,7 +447,6 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
                        len = length;
                if (len > ss->sge.sge_length)
                        len = ss->sge.sge_length;
-               BUG_ON(len == 0);
                /* If the source address is not aligned, try to align it. */
                off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
                if (off) {
index 73bd00f8d2c83bb5092eead120ffad9a9391591b..413fa5732e2b08c532409a4b0078782e45622d2f 100644 (file)
@@ -649,7 +649,7 @@ static int __init usnic_ib_init(void)
 
        err = usnic_uiom_init(DRV_NAME);
        if (err) {
-               usnic_err("Unable to initalize umem with err %d\n", err);
+               usnic_err("Unable to initialize umem with err %d\n", err);
                return err;
        }
 
index bf5136533d4972684e2db3556fa89d355bd6ba0b..0cdb156e165e325ec847018b9df3e44f404cdda1 100644 (file)
@@ -681,7 +681,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
        err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport],
                                                res_spec);
        if (err) {
-               usnic_err("Spec does not meet miniumum req for transport %d\n",
+               usnic_err("Spec does not meet minimum req for transport %d\n",
                                transport);
                log_spec(res_spec);
                return ERR_PTR(err);
index 1735deb1a9d4ec484a1b10272841da151b9f8869..a1bd8cfc2c2565dfc8edbaa9b07a7fc43735e522 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2016, 2017 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -1094,6 +1094,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
                qp->ibqp.qp_num = err;
                qp->port_num = init_attr->port_num;
                rvt_init_qp(rdi, qp, init_attr->qp_type);
+               if (rdi->driver_f.qp_priv_init) {
+                       err = rdi->driver_f.qp_priv_init(rdi, qp, init_attr);
+                       if (err) {
+                               ret = ERR_PTR(err);
+                               goto bail_rq_wq;
+                       }
+               }
                break;
 
        default:
index d9ec2de6873869a218a06ecf57f252bbc4ff5d74..8f79bd86d0337fdf2d9e38bd16b20ec61a8ae124 100644 (file)
@@ -65,8 +65,9 @@
  */
 #define RXE_UVERBS_ABI_VERSION         2
 
-#define IB_PHYS_STATE_LINK_UP          (5)
-#define IB_PHYS_STATE_LINK_DOWN                (3)
+#define RDMA_LINK_PHYS_STATE_LINK_UP   (5)
+#define RDMA_LINK_PHYS_STATE_DISABLED  (3)
+#define RDMA_LINK_PHYS_STATE_POLLING   (2)
 
 #define RXE_ROCE_V2_SPORT              (0xc000)
 
index ea089cb091ade455029956a12dada273ac2d1092..e996da67a85183910dc7d8625badb7a435feeab3 100644 (file)
@@ -439,6 +439,7 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
  */
 static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 {
+       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
        struct rxe_cqe cqe;
 
        if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
@@ -451,6 +452,11 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
                advance_consumer(qp->sq.queue);
        }
 
+       if (wqe->wr.opcode == IB_WR_SEND ||
+           wqe->wr.opcode == IB_WR_SEND_WITH_IMM ||
+           wqe->wr.opcode == IB_WR_SEND_WITH_INV)
+               rxe_counter_inc(rxe, RXE_CNT_RDMA_SEND);
+
        /*
         * we completed something so let req run again
         * if it is trying to fence
index 6aeb7a165e46919c1c3651d72776fd05f03173b1..4a24895846d36cb5cdb2be549fc6b8f5dcfc459d 100644 (file)
@@ -37,15 +37,18 @@ static const char * const rxe_counter_name[] = {
        [RXE_CNT_SENT_PKTS]           =  "sent_pkts",
        [RXE_CNT_RCVD_PKTS]           =  "rcvd_pkts",
        [RXE_CNT_DUP_REQ]             =  "duplicate_request",
-       [RXE_CNT_OUT_OF_SEQ_REQ]      =  "out_of_sequence",
+       [RXE_CNT_OUT_OF_SEQ_REQ]      =  "out_of_seq_request",
        [RXE_CNT_RCV_RNR]             =  "rcvd_rnr_err",
        [RXE_CNT_SND_RNR]             =  "send_rnr_err",
        [RXE_CNT_RCV_SEQ_ERR]         =  "rcvd_seq_err",
-       [RXE_CNT_COMPLETER_SCHED]     =  "ack_deffered",
+       [RXE_CNT_COMPLETER_SCHED]     =  "ack_deferred",
        [RXE_CNT_RETRY_EXCEEDED]      =  "retry_exceeded_err",
        [RXE_CNT_RNR_RETRY_EXCEEDED]  =  "retry_rnr_exceeded_err",
        [RXE_CNT_COMP_RETRY]          =  "completer_retry_err",
        [RXE_CNT_SEND_ERR]            =  "send_err",
+       [RXE_CNT_LINK_DOWNED]         =  "link_downed",
+       [RXE_CNT_RDMA_SEND]           =  "rdma_sends",
+       [RXE_CNT_RDMA_RECV]           =  "rdma_recvs",
 };
 
 int rxe_ib_get_hw_stats(struct ib_device *ibdev,
index f44df1b76742741bf4dc5ebeecba1cb634b00838..72c0d63c79e0d04a7a5aec2c270221d4acb25111 100644 (file)
@@ -50,6 +50,9 @@ enum rxe_counters {
        RXE_CNT_RNR_RETRY_EXCEEDED,
        RXE_CNT_COMP_RETRY,
        RXE_CNT_SEND_ERR,
+       RXE_CNT_LINK_DOWNED,
+       RXE_CNT_RDMA_SEND,
+       RXE_CNT_RDMA_RECV,
        RXE_NUM_OF_COUNTERS
 };
 
index afd53f57a62be0aef0e1694bb4a0031802c54a71..a675c9f2b427832e9e146f58329d1e248a0dbee0 100644 (file)
@@ -250,11 +250,12 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
        return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
 }
 
-static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
-                                 struct rxe_pkt_info *pkt, struct sk_buff *skb)
+static inline int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
+                                 struct sk_buff *skb)
 {
        int err;
        int is_request = pkt->mask & RXE_REQ_MASK;
+       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 
        if ((is_request && (qp->req.state != QP_STATE_READY)) ||
            (!is_request && (qp->resp.state != QP_STATE_READY))) {
index 40e82e0f6c2d2265e06db58f445869d6afcea73b..b26a8141f3edcb184b21f66f9d5d4002475014e7 100644 (file)
@@ -607,7 +607,6 @@ void rxe_port_up(struct rxe_dev *rxe)
 
        port = &rxe->port;
        port->attr.state = IB_PORT_ACTIVE;
-       port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
 
        rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
        dev_info(&rxe->ib_dev.dev, "set active\n");
@@ -620,9 +619,9 @@ void rxe_port_down(struct rxe_dev *rxe)
 
        port = &rxe->port;
        port->attr.state = IB_PORT_DOWN;
-       port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
 
        rxe_port_event(rxe, IB_EVENT_PORT_ERR);
+       rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED);
        dev_info(&rxe->ib_dev.dev, "set down\n");
 }
 
index 36b53fb94a49d0c59a0ac785c1c6f5ca956598ea..b5c91df220473f922d23ae3bdbb460e4d02c5c00 100644 (file)
@@ -112,6 +112,18 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
        return rxe_type_info[pool->type].cache;
 }
 
+static void rxe_cache_clean(size_t cnt)
+{
+       int i;
+       struct rxe_type_info *type;
+
+       for (i = 0; i < cnt; i++) {
+               type = &rxe_type_info[i];
+               kmem_cache_destroy(type->cache);
+               type->cache = NULL;
+       }
+}
+
 int rxe_cache_init(void)
 {
        int err;
@@ -136,24 +148,14 @@ int rxe_cache_init(void)
        return 0;
 
 err1:
-       while (--i >= 0) {
-               kmem_cache_destroy(type->cache);
-               type->cache = NULL;
-       }
+       rxe_cache_clean(i);
 
        return err;
 }
 
 void rxe_cache_exit(void)
 {
-       int i;
-       struct rxe_type_info *type;
-
-       for (i = 0; i < RXE_NUM_TYPES; i++) {
-               type = &rxe_type_info[i];
-               kmem_cache_destroy(type->cache);
-               type->cache = NULL;
-       }
+       rxe_cache_clean(RXE_NUM_TYPES);
 }
 
 static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
@@ -241,7 +243,7 @@ static void rxe_pool_put(struct rxe_pool *pool)
        kref_put(&pool->ref_cnt, rxe_pool_release);
 }
 
-int rxe_pool_cleanup(struct rxe_pool *pool)
+void rxe_pool_cleanup(struct rxe_pool *pool)
 {
        unsigned long flags;
 
@@ -253,8 +255,6 @@ int rxe_pool_cleanup(struct rxe_pool *pool)
        write_unlock_irqrestore(&pool->pool_lock, flags);
 
        rxe_pool_put(pool);
-
-       return 0;
 }
 
 static u32 alloc_index(struct rxe_pool *pool)
index aa4ba307097bcd8618b71eb5c62524f4052cb9fe..72968c29e01f7c6f0bfad82bf2b5f337ea88131a 100644 (file)
@@ -126,7 +126,7 @@ int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
                  enum rxe_elem_type type, u32 max_elem);
 
 /* free resources from object pool */
-int rxe_pool_cleanup(struct rxe_pool *pool);
+void rxe_pool_cleanup(struct rxe_pool *pool);
 
 /* allocate an object from pool */
 void *rxe_alloc(struct rxe_pool *pool);
index b9710907dac219fc33c8c39d03285aa47010ed6d..2ca4ffe5015fa91bbc4934dc867b35b0d823a03e 100644 (file)
@@ -97,7 +97,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
                goto err1;
 
        if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
-               if (port_num != 1) {
+               if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
                        pr_warn("invalid port = %d\n", port_num);
                        goto err1;
                }
@@ -433,7 +433,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
        }
 
        if (mask & IB_QP_PORT) {
-               if (attr->port_num != 1) {
+               if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
                        pr_warn("invalid port %d\n", attr->port_num);
                        goto err1;
                }
@@ -448,7 +448,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
        if (mask & IB_QP_ALT_PATH) {
                if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
                        goto err1;
-               if (attr->alt_port_num != 1) {
+               if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num))  {
                        pr_warn("invalid alt port %d\n", attr->alt_port_num);
                        goto err1;
                }
index 6c361d70d7cdd8b5fd7bfe88b6ac7df9ab354374..c5d9b558fa90a2f8e85af19a8c6a7d85e3e6fd9f 100644 (file)
@@ -643,6 +643,7 @@ int rxe_requester(void *arg)
                        rmr->access = wqe->wr.wr.reg.access;
                        rmr->lkey = wqe->wr.wr.reg.key;
                        rmr->rkey = wqe->wr.wr.reg.key;
+                       rmr->iova = wqe->wr.wr.reg.mr->iova;
                        wqe->state = wqe_state_done;
                        wqe->status = IB_WC_SUCCESS;
                } else {
@@ -728,7 +729,7 @@ int rxe_requester(void *arg)
        save_state(wqe, qp, &rollback_wqe, &rollback_psn);
        update_wqe_state(qp, wqe, &pkt);
        update_wqe_psn(qp, wqe, &pkt, payload);
-       ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
+       ret = rxe_xmit_packet(qp, &pkt, skb);
        if (ret) {
                qp->need_req_skb = 1;
 
index c962160292f492a9bea24ae98c50ec9d5fdd258d..2315281882506a1c6f563c219655ad97be5010dc 100644 (file)
@@ -124,12 +124,9 @@ static inline enum resp_states get_req(struct rxe_qp *qp,
        struct sk_buff *skb;
 
        if (qp->resp.state == QP_STATE_ERROR) {
-               skb = skb_dequeue(&qp->req_pkts);
-               if (skb) {
-                       /* drain request packet queue */
+               while ((skb = skb_dequeue(&qp->req_pkts))) {
                        rxe_drop_ref(qp);
                        kfree_skb(skb);
-                       return RESPST_GET_REQ;
                }
 
                /* go drain recv wr queue */
@@ -660,7 +657,6 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 static enum resp_states read_reply(struct rxe_qp *qp,
                                   struct rxe_pkt_info *req_pkt)
 {
-       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
        struct rxe_pkt_info ack_pkt;
        struct sk_buff *skb;
        int mtu = qp->mtu;
@@ -739,7 +735,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
        p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
        *p = ~icrc;
 
-       err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+       err = rxe_xmit_packet(qp, &ack_pkt, skb);
        if (err) {
                pr_err("Failed sending RDMA reply.\n");
                return RESPST_ERR_RNR;
@@ -838,18 +834,25 @@ static enum resp_states do_complete(struct rxe_qp *qp,
        struct ib_wc *wc = &cqe.ibwc;
        struct ib_uverbs_wc *uwc = &cqe.uibwc;
        struct rxe_recv_wqe *wqe = qp->resp.wqe;
+       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 
        if (unlikely(!wqe))
                return RESPST_CLEANUP;
 
        memset(&cqe, 0, sizeof(cqe));
 
-       wc->wr_id               = wqe->wr_id;
-       wc->status              = qp->resp.status;
-       wc->qp                  = &qp->ibqp;
+       if (qp->rcq->is_user) {
+               uwc->status             = qp->resp.status;
+               uwc->qp_num             = qp->ibqp.qp_num;
+               uwc->wr_id              = wqe->wr_id;
+       } else {
+               wc->status              = qp->resp.status;
+               wc->qp                  = &qp->ibqp;
+               wc->wr_id               = wqe->wr_id;
+       }
 
-       /* fields after status are not required for errors */
        if (wc->status == IB_WC_SUCCESS) {
+               rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
                wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
                                pkt->mask & RXE_WRITE_MASK) ?
                                        IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
@@ -898,7 +901,6 @@ static enum resp_states do_complete(struct rxe_qp *qp,
                        }
 
                        if (pkt->mask & RXE_IETH_MASK) {
-                               struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
                                struct rxe_mem *rmr;
 
                                wc->wc_flags |= IB_WC_WITH_INVALIDATE;
@@ -950,7 +952,6 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
        int err = 0;
        struct rxe_pkt_info ack_pkt;
        struct sk_buff *skb;
-       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 
        skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
                                 0, psn, syndrome, NULL);
@@ -959,7 +960,7 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
                goto err1;
        }
 
-       err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+       err = rxe_xmit_packet(qp, &ack_pkt, skb);
        if (err)
                pr_err_ratelimited("Failed sending ack\n");
 
@@ -973,7 +974,6 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
        int rc = 0;
        struct rxe_pkt_info ack_pkt;
        struct sk_buff *skb;
-       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
        struct resp_res *res;
 
        skb = prepare_ack_packet(qp, pkt, &ack_pkt,
@@ -1001,7 +1001,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
        res->last_psn  = ack_pkt.psn;
        res->cur_psn   = ack_pkt.psn;
 
-       rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+       rc = rxe_xmit_packet(qp, &ack_pkt, skb);
        if (rc) {
                pr_err_ratelimited("Failed sending ack\n");
                rxe_drop_ref(qp);
@@ -1131,8 +1131,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
                if (res) {
                        skb_get(res->atomic.skb);
                        /* Resend the result. */
-                       rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
-                                            pkt, res->atomic.skb);
+                       rc = rxe_xmit_packet(qp, pkt, res->atomic.skb);
                        if (rc) {
                                pr_err("Failed resending result. This flow is not handled - skb ignored\n");
                                rc = RESPST_CLEANUP;
index 9c19f2027511b1fb2c093360b275471cc158c14c..30817c79ba962fbfe11de60d73467c6e839cd43c 100644 (file)
@@ -71,6 +71,14 @@ static int rxe_query_port(struct ib_device *dev,
        mutex_lock(&rxe->usdev_lock);
        rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
                              &attr->active_width);
+
+       if (attr->state == IB_PORT_ACTIVE)
+               attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP;
+       else if (dev_get_flags(rxe->ndev) & IFF_UP)
+               attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING;
+       else
+               attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED;
+
        mutex_unlock(&rxe->usdev_lock);
 
 out:
@@ -1279,11 +1287,9 @@ int rxe_register_device(struct rxe_dev *rxe)
        return err;
 }
 
-int rxe_unregister_device(struct rxe_dev *rxe)
+void rxe_unregister_device(struct rxe_dev *rxe)
 {
        struct ib_device *dev = &rxe->ib_dev;
 
        ib_unregister_device(dev);
-
-       return 0;
 }
index 82e670d6eeea3e863c4e651aac04bdb15680beac..831381b7788da971ea953b2db620ea0448555dbc 100644 (file)
@@ -467,7 +467,7 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
 }
 
 int rxe_register_device(struct rxe_dev *rxe);
-int rxe_unregister_device(struct rxe_dev *rxe);
+void rxe_unregister_device(struct rxe_dev *rxe);
 
 void rxe_mc_cleanup(struct rxe_pool_entry *arg);
 
index 009be8889d71d05c7eeeb7d6684276ae54c76911..dbe97c02848ce08a6e361a0eadd3e6e79f4c3c14 100644 (file)
@@ -277,16 +277,13 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
                        enum iser_data_dir cmd_dir)
 {
        struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
-       int ret;
 
        if (!reg->mem_h)
                return;
 
        iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
 
-       ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
-       if (ret)
-               iser_err("ib_fmr_pool_unmap failed %d\n", ret);
+       ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
 
        reg->mem_h = NULL;
 }
index 2357aa727dcf593336ee3d92c7547c2f3afbf86b..adc0e91d2bb568359eb1352d33d6c20060c23aae 100644 (file)
@@ -3617,7 +3617,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
                                             const char *name)
 {
        struct srpt_port *sport = wwn->priv;
-       static struct se_portal_group *tpg;
+       struct se_portal_group *tpg;
        int res;
 
        WARN_ON_ONCE(wwn != &sport->port_guid_wwn &&
index d324a3884462914bd91ffb94c132756e2f056282..d499b3d003480ecdbb74a5c5bf58f705cda22728 100644 (file)
@@ -12,9 +12,9 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 # mlx5 core basic
 #
 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
-               health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
+               health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
                mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-               fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o  \
+               fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
                diag/fs_tracepoint.o diag/fw_tracer.o
 
 #
index a5a0823e5ada8fa2484c5c88ab766d349731c3c3..8ab636d59edbdda6e72675901fa3cda3423df8ad 100644 (file)
 #include <linux/random.h>
 #include <linux/io-mapping.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
 #include <linux/debugfs.h>
 
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 enum {
        CMD_IF_REV = 5,
@@ -313,6 +315,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_FPGA_DESTROY_QP:
        case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
        case MLX5_CMD_OP_DEALLOC_MEMIC:
+       case MLX5_CMD_OP_PAGE_FAULT_RESUME:
                return MLX5_CMD_STAT_OK;
 
        case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -326,7 +329,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_CREATE_MKEY:
        case MLX5_CMD_OP_QUERY_MKEY:
        case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
-       case MLX5_CMD_OP_PAGE_FAULT_RESUME:
        case MLX5_CMD_OP_CREATE_EQ:
        case MLX5_CMD_OP_QUERY_EQ:
        case MLX5_CMD_OP_GEN_EQE:
@@ -805,6 +807,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
        return MLX5_GET(mbox_in, in->first.data, opcode);
 }
 
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+
 static void cb_timeout_handler(struct work_struct *work)
 {
        struct delayed_work *dwork = container_of(work, struct delayed_work,
@@ -1412,14 +1416,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
                up(&cmd->sem);
 }
 
+static int cmd_comp_notifier(struct notifier_block *nb,
+                            unsigned long type, void *data)
+{
+       struct mlx5_core_dev *dev;
+       struct mlx5_cmd *cmd;
+       struct mlx5_eqe *eqe;
+
+       cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb);
+       dev = container_of(cmd, struct mlx5_core_dev, cmd);
+       eqe = data;
+
+       mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+
+       return NOTIFY_OK;
+}
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
 {
+       MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD);
+       mlx5_eq_notifier_register(dev, &dev->cmd.nb);
        mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
 }
 
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
 {
        mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
+       mlx5_eq_notifier_unregister(dev, &dev->cmd.nb);
 }
 
 static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1435,7 +1457,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
        }
 }
 
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
 {
        struct mlx5_cmd *cmd = &dev->cmd;
        struct mlx5_cmd_work_ent *ent;
@@ -1533,7 +1555,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
                }
        }
 }
-EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
+{
+       unsigned long flags;
+       u64 vector;
+
+       /* wait for pending handlers to complete */
+       mlx5_eq_synchronize_cmd_irq(dev);
+       spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+       vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+       if (!vector)
+               goto no_trig;
+
+       vector |= MLX5_TRIGGERED_CMD_COMP;
+       spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+       mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+       mlx5_cmd_comp_handler(dev, vector, true);
+       return;
+
+no_trig:
+       spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
 
 static int status_to_err(u8 status)
 {
index 4b85abb5c9f7936ebc6b70b6a05328f6270c668d..713a17ee37518e8e017d4c35456c439fa5beb004 100644 (file)
@@ -38,6 +38,7 @@
 #include <rdma/ib_verbs.h>
 #include <linux/mlx5/cq.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 #define TASKLET_MAX_TIME 2
 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
@@ -92,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
        u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
        u32 out[MLX5_ST_SZ_DW(create_cq_out)];
        u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
-       struct mlx5_eq *eq;
+       struct mlx5_eq_comp *eq;
        int err;
 
-       eq = mlx5_eqn2eq(dev, eqn);
+       eq = mlx5_eqn2comp_eq(dev, eqn);
        if (IS_ERR(eq))
                return PTR_ERR(eq);
 
@@ -119,12 +120,12 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
        INIT_LIST_HEAD(&cq->tasklet_ctx.list);
 
        /* Add to comp EQ CQ tree to recv comp events */
-       err = mlx5_eq_add_cq(eq, cq);
+       err = mlx5_eq_add_cq(&eq->core, cq);
        if (err)
                goto err_cmd;
 
        /* Add to async EQ CQ tree to recv async events */
-       err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+       err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq);
        if (err)
                goto err_cq_add;
 
@@ -139,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
        return 0;
 
 err_cq_add:
-       mlx5_eq_del_cq(eq, cq);
+       mlx5_eq_del_cq(&eq->core, cq);
 err_cmd:
        memset(din, 0, sizeof(din));
        memset(dout, 0, sizeof(dout));
@@ -157,11 +158,11 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
        u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
        int err;
 
-       err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+       err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
        if (err)
                return err;
 
-       err = mlx5_eq_del_cq(cq->eq, cq);
+       err = mlx5_eq_del_cq(&cq->eq->core, cq);
        if (err)
                return err;
 
index 90fabd612b6cd84f1420afa151cc6c3b0103acfb..a11e22d0b0ccbda0674ba7873cfc57fab8ebedd7 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 enum {
        QP_PID,
@@ -349,6 +350,16 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
        return param;
 }
 
+static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+                             u32 *out, int outlen)
+{
+       u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
+
+       MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+       MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
 static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
                         int index)
 {
index 37ba7c78859db17aa7ecfa76648ca54adb71790b..d2ed14bc37c3c218c1e29875f104d0f06773460d 100644 (file)
@@ -45,75 +45,11 @@ struct mlx5_device_context {
        unsigned long           state;
 };
 
-struct mlx5_delayed_event {
-       struct list_head        list;
-       struct mlx5_core_dev    *dev;
-       enum mlx5_dev_event     event;
-       unsigned long           param;
-};
-
 enum {
        MLX5_INTERFACE_ADDED,
        MLX5_INTERFACE_ATTACHED,
 };
 
-static void add_delayed_event(struct mlx5_priv *priv,
-                             struct mlx5_core_dev *dev,
-                             enum mlx5_dev_event event,
-                             unsigned long param)
-{
-       struct mlx5_delayed_event *delayed_event;
-
-       delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
-       if (!delayed_event) {
-               mlx5_core_err(dev, "event %d is missed\n", event);
-               return;
-       }
-
-       mlx5_core_dbg(dev, "Accumulating event %d\n", event);
-       delayed_event->dev = dev;
-       delayed_event->event = event;
-       delayed_event->param = param;
-       list_add_tail(&delayed_event->list, &priv->waiting_events_list);
-}
-
-static void delayed_event_release(struct mlx5_device_context *dev_ctx,
-                                 struct mlx5_priv *priv)
-{
-       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
-       struct mlx5_delayed_event *de;
-       struct mlx5_delayed_event *n;
-       struct list_head temp;
-
-       INIT_LIST_HEAD(&temp);
-
-       spin_lock_irq(&priv->ctx_lock);
-
-       priv->is_accum_events = false;
-       list_splice_init(&priv->waiting_events_list, &temp);
-       if (!dev_ctx->context)
-               goto out;
-       list_for_each_entry_safe(de, n, &temp, list)
-               dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
-
-out:
-       spin_unlock_irq(&priv->ctx_lock);
-
-       list_for_each_entry_safe(de, n, &temp, list) {
-               list_del(&de->list);
-               kfree(de);
-       }
-}
-
-/* accumulating events that can come after mlx5_ib calls to
- * ib_register_device, till adding that interface to the events list.
- */
-static void delayed_event_start(struct mlx5_priv *priv)
-{
-       spin_lock_irq(&priv->ctx_lock);
-       priv->is_accum_events = true;
-       spin_unlock_irq(&priv->ctx_lock);
-}
 
 void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 {
@@ -129,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 
        dev_ctx->intf = intf;
 
-       delayed_event_start(priv);
-
        dev_ctx->context = intf->add(dev);
        if (dev_ctx->context) {
                set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
@@ -139,22 +73,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 
                spin_lock_irq(&priv->ctx_lock);
                list_add_tail(&dev_ctx->list, &priv->ctx_list);
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-               if (dev_ctx->intf->pfault) {
-                       if (priv->pfault) {
-                               mlx5_core_err(dev, "multiple page fault handlers not supported");
-                       } else {
-                               priv->pfault_ctx = dev_ctx->context;
-                               priv->pfault = dev_ctx->intf->pfault;
-                       }
-               }
-#endif
                spin_unlock_irq(&priv->ctx_lock);
        }
 
-       delayed_event_release(dev_ctx, priv);
-
        if (!dev_ctx->context)
                kfree(dev_ctx);
 }
@@ -179,15 +100,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
        if (!dev_ctx)
                return;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       spin_lock_irq(&priv->ctx_lock);
-       if (priv->pfault == dev_ctx->intf->pfault)
-               priv->pfault = NULL;
-       spin_unlock_irq(&priv->ctx_lock);
-
-       synchronize_srcu(&priv->pfault_srcu);
-#endif
-
        spin_lock_irq(&priv->ctx_lock);
        list_del(&dev_ctx->list);
        spin_unlock_irq(&priv->ctx_lock);
@@ -207,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
        if (!dev_ctx)
                return;
 
-       delayed_event_start(priv);
        if (intf->attach) {
                if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
-                       goto out;
+                       return;
                if (intf->attach(dev, dev_ctx->context))
-                       goto out;
-
+                       return;
                set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
        } else {
                if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
-                       goto out;
+                       return;
                dev_ctx->context = intf->add(dev);
                if (!dev_ctx->context)
-                       goto out;
-
+                       return;
                set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
        }
-
-out:
-       delayed_event_release(dev_ctx, priv);
 }
 
 void mlx5_attach_device(struct mlx5_core_dev *dev)
@@ -422,44 +328,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
        return res;
 }
 
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-                    unsigned long param)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_device_context *dev_ctx;
-       unsigned long flags;
-
-       spin_lock_irqsave(&priv->ctx_lock, flags);
-
-       if (priv->is_accum_events)
-               add_delayed_event(priv, dev, event, param);
-
-       /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
-        * still in priv->ctx_list. In this case, only notify the dev_ctx if its
-        * ADDED or ATTACHED bit are set.
-        */
-       list_for_each_entry(dev_ctx, &priv->ctx_list, list)
-               if (dev_ctx->intf->event &&
-                   (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
-                    test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
-                       dev_ctx->intf->event(dev, dev_ctx->context, event, param);
-
-       spin_unlock_irqrestore(&priv->ctx_lock, flags);
-}
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
-                         struct mlx5_pagefault *pfault)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       int srcu_idx;
-
-       srcu_idx = srcu_read_lock(&priv->pfault_srcu);
-       if (priv->pfault)
-               priv->pfault(dev, priv->pfault_ctx, pfault);
-       srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
-}
-#endif
 
 void mlx5_dev_list_lock(void)
 {
index d4ec93bde4dedbaeca4bb5976c705a3ea6b83f82..6999f4486e9ec786424be5dd9f72d91e333c0086 100644 (file)
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 #define CREATE_TRACE_POINTS
+#include "lib/eq.h"
 #include "fw_tracer.h"
 #include "fw_tracer_tracepoint.h"
 
@@ -846,9 +847,9 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
        return ERR_PTR(err);
 }
 
-/* Create HW resources + start tracer
- * must be called before Async EQ is created
- */
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
+
+/* Create HW resources + start tracer */
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
 {
        struct mlx5_core_dev *dev;
@@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
                goto err_dealloc_pd;
        }
 
+       MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
+       mlx5_eq_notifier_register(dev, &tracer->nb);
+
        mlx5_fw_tracer_start(tracer);
 
        return 0;
@@ -883,9 +887,7 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
        return err;
 }
 
-/* Stop tracer + Cleanup HW resources
- * must be called after Async EQ is destroyed
- */
+/* Stop tracer + Cleanup HW resources */
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
 {
        if (IS_ERR_OR_NULL(tracer))
@@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
 
        mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
                      tracer->owner);
-
+       mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
        cancel_work_sync(&tracer->ownership_change_work);
        cancel_work_sync(&tracer->handle_traces_work);
 
@@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
        kfree(tracer);
 }
 
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
 {
-       struct mlx5_fw_tracer *tracer = dev->tracer;
-
-       if (!tracer)
-               return;
+       struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
+       struct mlx5_core_dev *dev = tracer->dev;
+       struct mlx5_eqe *eqe = data;
 
        switch (eqe->sub_type) {
        case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
@@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
                mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
                              eqe->sub_type);
        }
+
+       return NOTIFY_OK;
 }
 
 EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
index 0347f2dd5cee1263617496543aa9269e0d9193cf..a8b8747f2b61142ded720a7e573f68b5be39fa14 100644 (file)
@@ -55,6 +55,7 @@
 
 struct mlx5_fw_tracer {
        struct mlx5_core_dev *dev;
+       struct mlx5_nb        nb;
        bool owner;
        u8   trc_ver;
        struct workqueue_struct *work_queue;
@@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 
 #endif
index 11832480292646c9fcbd881402ed95497218c67e..7e0a0cf041d55c1d597eb5156582129dc1c4d18b 100644 (file)
@@ -178,8 +178,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 {
        return is_kdump_kernel() ?
                MLX5E_MIN_NUM_CHANNELS :
-               min_t(int, mdev->priv.eq_table.num_comp_vectors,
-                     MLX5E_MAX_NUM_CHANNELS);
+               min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
 }
 
 /* Use this function to get max num channels after netdev was created */
@@ -633,7 +632,6 @@ struct mlx5e_channel_stats {
 } ____cacheline_aligned_in_smp;
 
 enum {
-       MLX5E_STATE_ASYNC_EVENTS_ENABLED,
        MLX5E_STATE_OPENED,
        MLX5E_STATE_DESTROYING,
 };
@@ -692,6 +690,8 @@ struct mlx5e_priv {
        struct hwtstamp_config     tstamp;
        u16                        q_counter;
        u16                        drop_rq_q_counter;
+       struct notifier_block      events_nb;
+
 #ifdef CONFIG_MLX5_CORE_EN_DCB
        struct mlx5e_dcbx          dcbx;
 #endif
index 871313d6b34d1b315e6ef1a9c07cba396de14186..138c7679eebba08630f1f1df958e95fa0cf881e7 100644 (file)
@@ -49,6 +49,7 @@
 #include "lib/clock.h"
 #include "en/port.h"
 #include "en/xdp.h"
+#include "lib/eq.h"
 
 struct mlx5e_rq_param {
        u32                     rqc[MLX5_ST_SZ_DW(rqc)];
@@ -293,33 +294,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
        queue_work(priv->wq, &priv->update_stats_work);
 }
 
-static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
-                             enum mlx5_dev_event event, unsigned long param)
+static int async_event(struct notifier_block *nb, unsigned long event, void *data)
 {
-       struct mlx5e_priv *priv = vpriv;
+       struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+       struct mlx5_eqe   *eqe = data;
 
-       if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
-               return;
+       if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
+               return NOTIFY_DONE;
 
-       switch (event) {
-       case MLX5_DEV_EVENT_PORT_UP:
-       case MLX5_DEV_EVENT_PORT_DOWN:
+       switch (eqe->sub_type) {
+       case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+       case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
                queue_work(priv->wq, &priv->update_carrier_work);
                break;
        default:
-               break;
+               return NOTIFY_DONE;
        }
+
+       return NOTIFY_OK;
 }
 
 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
-       set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+       priv->events_nb.notifier_call = async_event;
+       mlx5_notifier_register(priv->mdev, &priv->events_nb);
 }
 
 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
-       clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
-       synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
+       mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
 }
 
 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
@@ -1763,11 +1766,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
        mlx5e_free_cq(cq);
 }
 
-static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
-{
-       return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
-}
-
 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
                             struct mlx5e_params *params,
                             struct mlx5e_channel_param *cparam)
@@ -1918,9 +1916,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
                              struct mlx5e_channel_param *cparam,
                              struct mlx5e_channel **cp)
 {
+       int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
        struct net_dim_cq_moder icocq_moder = {0, 0};
        struct net_device *netdev = priv->netdev;
-       int cpu = mlx5e_get_cpu(priv, ix);
        struct mlx5e_channel *c;
        unsigned int irq;
        int err;
@@ -4134,17 +4132,17 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
                                        struct mlx5e_txqsq *sq)
 {
-       struct mlx5_eq *eq = sq->cq.mcq.eq;
+       struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
        u32 eqe_count;
 
        netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
-                  eq->eqn, eq->cons_index, eq->irqn);
+                  eq->core.eqn, eq->core.cons_index, eq->core.irqn);
 
        eqe_count = mlx5_eq_poll_irq_disabled(eq);
        if (!eqe_count)
                return false;
 
-       netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
+       netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn);
        sq->channel->stats->eq_rearm++;
        return true;
 }
@@ -4975,7 +4973,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
        netif_carrier_off(netdev);
 
 #ifdef CONFIG_MLX5_EN_ARFS
-       netdev->rx_cpu_rmap = mdev->rmap;
+       netdev->rx_cpu_rmap =  mlx5_eq_table_get_rmap(mdev);
 #endif
 
        return 0;
@@ -5199,7 +5197,6 @@ static struct mlx5_interface mlx5e_interface = {
        .remove    = mlx5e_remove,
        .attach    = mlx5e_attach,
        .detach    = mlx5e_detach,
-       .event     = mlx5e_async_event,
        .protocol  = MLX5_INTERFACE_PROTOCOL_ETH,
        .get_dev   = mlx5e_get_netdev,
 };
index 3e99d0728b2f2c5366a13f01400d4354d3c80b2c..0b7f3ebb7b9db99fe03eda7a00588ad42023ce4b 100644 (file)
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 
+#include "lib/mlx5.h"
 #include "en.h"
 #include "en_accel/ipsec.h"
 #include "en_accel/tls.h"
@@ -1122,15 +1123,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
 static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
                                    int idx)
 {
-       struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
+       struct mlx5_pme_stats pme_stats;
        int i;
 
+       mlx5_get_pme_stats(priv->mdev, &pme_stats);
+
        for (i = 0; i < NUM_PME_STATUS_STATS; i++)
-               data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+               data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters,
                                                   mlx5e_pme_status_desc, i);
 
        for (i = 0; i < NUM_PME_ERR_STATS; i++)
-               data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+               data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters,
                                                   mlx5e_pme_error_desc, i);
 
        return idx;
index c1e1a16a9b07d4335bb4cdc3b29bdea3673b8fa2..4aa39a1fe23f0e224981e477074f7bf52bc74d59 100644 (file)
  */
 
 #include <linux/interrupt.h>
+#include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
 #include <linux/mlx5/cmd.h>
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
 #endif
 #include "mlx5_core.h"
+#include "lib/eq.h"
 #include "fpga/core.h"
 #include "eswitch.h"
 #include "lib/clock.h"
 #include "diag/fw_tracer.h"
 
 enum {
-       MLX5_EQE_SIZE           = sizeof(struct mlx5_eqe),
        MLX5_EQE_OWNER_INIT_VAL = 0x1,
 };
 
@@ -55,14 +57,32 @@ enum {
 };
 
 enum {
-       MLX5_NUM_SPARE_EQE      = 0x80,
-       MLX5_NUM_ASYNC_EQE      = 0x1000,
-       MLX5_NUM_CMD_EQE        = 32,
-       MLX5_NUM_PF_DRAIN       = 64,
+       MLX5_EQ_DOORBEL_OFFSET  = 0x40,
 };
 
-enum {
-       MLX5_EQ_DOORBEL_OFFSET  = 0x40,
+struct mlx5_irq_info {
+       cpumask_var_t mask;
+       char name[MLX5_MAX_IRQ_NAME];
+       void *context; /* dev_id provided to request_irq */
+};
+
+struct mlx5_eq_table {
+       struct list_head        comp_eqs_list;
+       struct mlx5_eq          pages_eq;
+       struct mlx5_eq          cmd_eq;
+       struct mlx5_eq          async_eq;
+
+       struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
+
+       /* Since CQ DB is stored in async_eq */
+       struct mlx5_nb          cq_err_nb;
+
+       struct mutex            lock; /* sync async eqs creations */
+       int                     num_comp_vectors;
+       struct mlx5_irq_info    *irq_info;
+#ifdef CONFIG_RFS_ACCEL
+       struct cpu_rmap         *rmap;
+#endif
 };
 
 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)          | \
@@ -78,17 +98,6 @@ enum {
                               (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE)       | \
                               (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
 
-struct map_eq_in {
-       u64     mask;
-       u32     reserved;
-       u32     unmap_eqn;
-};
-
-struct cre_des_eq {
-       u8      reserved[15];
-       u8      eqn;
-};
-
 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
 {
        u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
@@ -99,213 +108,56 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
-{
-       return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
-}
-
-static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
-{
-       struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
-
-       return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
-}
-
-static const char *eqe_type_str(u8 type)
-{
-       switch (type) {
-       case MLX5_EVENT_TYPE_COMP:
-               return "MLX5_EVENT_TYPE_COMP";
-       case MLX5_EVENT_TYPE_PATH_MIG:
-               return "MLX5_EVENT_TYPE_PATH_MIG";
-       case MLX5_EVENT_TYPE_COMM_EST:
-               return "MLX5_EVENT_TYPE_COMM_EST";
-       case MLX5_EVENT_TYPE_SQ_DRAINED:
-               return "MLX5_EVENT_TYPE_SQ_DRAINED";
-       case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
-               return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
-       case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
-               return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
-       case MLX5_EVENT_TYPE_CQ_ERROR:
-               return "MLX5_EVENT_TYPE_CQ_ERROR";
-       case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
-               return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
-       case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
-               return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
-       case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
-               return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
-       case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
-               return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
-       case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
-               return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
-       case MLX5_EVENT_TYPE_INTERNAL_ERROR:
-               return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
-       case MLX5_EVENT_TYPE_PORT_CHANGE:
-               return "MLX5_EVENT_TYPE_PORT_CHANGE";
-       case MLX5_EVENT_TYPE_GPIO_EVENT:
-               return "MLX5_EVENT_TYPE_GPIO_EVENT";
-       case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
-               return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
-       case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
-               return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
-       case MLX5_EVENT_TYPE_REMOTE_CONFIG:
-               return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
-       case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
-               return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
-       case MLX5_EVENT_TYPE_STALL_EVENT:
-               return "MLX5_EVENT_TYPE_STALL_EVENT";
-       case MLX5_EVENT_TYPE_CMD:
-               return "MLX5_EVENT_TYPE_CMD";
-       case MLX5_EVENT_TYPE_PAGE_REQUEST:
-               return "MLX5_EVENT_TYPE_PAGE_REQUEST";
-       case MLX5_EVENT_TYPE_PAGE_FAULT:
-               return "MLX5_EVENT_TYPE_PAGE_FAULT";
-       case MLX5_EVENT_TYPE_PPS_EVENT:
-               return "MLX5_EVENT_TYPE_PPS_EVENT";
-       case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
-               return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
-       case MLX5_EVENT_TYPE_FPGA_ERROR:
-               return "MLX5_EVENT_TYPE_FPGA_ERROR";
-       case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
-               return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
-       case MLX5_EVENT_TYPE_GENERAL_EVENT:
-               return "MLX5_EVENT_TYPE_GENERAL_EVENT";
-       case MLX5_EVENT_TYPE_DEVICE_TRACER:
-               return "MLX5_EVENT_TYPE_DEVICE_TRACER";
-       default:
-               return "Unrecognized event";
-       }
-}
-
-static enum mlx5_dev_event port_subtype_event(u8 subtype)
-{
-       switch (subtype) {
-       case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
-               return MLX5_DEV_EVENT_PORT_DOWN;
-       case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
-               return MLX5_DEV_EVENT_PORT_UP;
-       case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
-               return MLX5_DEV_EVENT_PORT_INITIALIZED;
-       case MLX5_PORT_CHANGE_SUBTYPE_LID:
-               return MLX5_DEV_EVENT_LID_CHANGE;
-       case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
-               return MLX5_DEV_EVENT_PKEY_CHANGE;
-       case MLX5_PORT_CHANGE_SUBTYPE_GUID:
-               return MLX5_DEV_EVENT_GUID_CHANGE;
-       case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
-               return MLX5_DEV_EVENT_CLIENT_REREG;
-       }
-       return -1;
-}
-
-static void eq_update_ci(struct mlx5_eq *eq, int arm)
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
 {
-       __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
-       u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
-
-       __raw_writel((__force u32)cpu_to_be32(val), addr);
-       /* We still want ordering, just not swabbing, so add a barrier */
-       mb();
-}
+       struct mlx5_cq_table *table = &eq->cq_table;
+       struct mlx5_core_cq *cq = NULL;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static void eqe_pf_action(struct work_struct *work)
-{
-       struct mlx5_pagefault *pfault = container_of(work,
-                                                    struct mlx5_pagefault,
-                                                    work);
-       struct mlx5_eq *eq = pfault->eq;
+       spin_lock(&table->lock);
+       cq = radix_tree_lookup(&table->tree, cqn);
+       if (likely(cq))
+               mlx5_cq_hold(cq);
+       spin_unlock(&table->lock);
 
-       mlx5_core_page_fault(eq->dev, pfault);
-       mempool_free(pfault, eq->pf_ctx.pool);
+       return cq;
 }
 
-static void eq_pf_process(struct mlx5_eq *eq)
+static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
 {
-       struct mlx5_core_dev *dev = eq->dev;
-       struct mlx5_eqe_page_fault *pf_eqe;
-       struct mlx5_pagefault *pfault;
+       struct mlx5_eq_comp *eq_comp = eq_ptr;
+       struct mlx5_eq *eq = eq_ptr;
        struct mlx5_eqe *eqe;
        int set_ci = 0;
+       u32 cqn = -1;
 
        while ((eqe = next_eqe_sw(eq))) {
-               pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
-               if (!pfault) {
-                       schedule_work(&eq->pf_ctx.work);
-                       break;
-               }
-
+               struct mlx5_core_cq *cq;
+               /* Make sure we read EQ entry contents after we've
+                * checked the ownership bit.
+                */
                dma_rmb();
-               pf_eqe = &eqe->data.page_fault;
-               pfault->event_subtype = eqe->sub_type;
-               pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
-
-               mlx5_core_dbg(dev,
-                             "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
-                             eqe->sub_type, pfault->bytes_committed);
-
-               switch (eqe->sub_type) {
-               case MLX5_PFAULT_SUBTYPE_RDMA:
-                       /* RDMA based event */
-                       pfault->type =
-                               be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
-                       pfault->token =
-                               be32_to_cpu(pf_eqe->rdma.pftype_token) &
-                               MLX5_24BIT_MASK;
-                       pfault->rdma.r_key =
-                               be32_to_cpu(pf_eqe->rdma.r_key);
-                       pfault->rdma.packet_size =
-                               be16_to_cpu(pf_eqe->rdma.packet_length);
-                       pfault->rdma.rdma_op_len =
-                               be32_to_cpu(pf_eqe->rdma.rdma_op_len);
-                       pfault->rdma.rdma_va =
-                               be64_to_cpu(pf_eqe->rdma.rdma_va);
-                       mlx5_core_dbg(dev,
-                                     "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
-                                     pfault->type, pfault->token,
-                                     pfault->rdma.r_key);
-                       mlx5_core_dbg(dev,
-                                     "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
-                                     pfault->rdma.rdma_op_len,
-                                     pfault->rdma.rdma_va);
-                       break;
-
-               case MLX5_PFAULT_SUBTYPE_WQE:
-                       /* WQE based event */
-                       pfault->type =
-                               (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
-                       pfault->token =
-                               be32_to_cpu(pf_eqe->wqe.token);
-                       pfault->wqe.wq_num =
-                               be32_to_cpu(pf_eqe->wqe.pftype_wq) &
-                               MLX5_24BIT_MASK;
-                       pfault->wqe.wqe_index =
-                               be16_to_cpu(pf_eqe->wqe.wqe_index);
-                       pfault->wqe.packet_size =
-                               be16_to_cpu(pf_eqe->wqe.packet_length);
-                       mlx5_core_dbg(dev,
-                                     "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
-                                     pfault->type, pfault->token,
-                                     pfault->wqe.wq_num,
-                                     pfault->wqe.wqe_index);
-                       break;
-
-               default:
-                       mlx5_core_warn(dev,
-                                      "Unsupported page fault event sub-type: 0x%02hhx\n",
-                                      eqe->sub_type);
-                       /* Unsupported page faults should still be
-                        * resolved by the page fault handler
-                        */
+               /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
+               cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+
+               cq = mlx5_eq_cq_get(eq, cqn);
+               if (likely(cq)) {
+                       ++cq->arm_sn;
+                       cq->comp(cq);
+                       mlx5_cq_put(cq);
+               } else {
+                       mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
                }
 
-               pfault->eq = eq;
-               INIT_WORK(&pfault->work, eqe_pf_action);
-               queue_work(eq->pf_ctx.wq, &pfault->work);
-
                ++eq->cons_index;
                ++set_ci;
 
+               /* The HCA will think the queue has overflowed if we
+                * don't tell it we've been processing events.  We
+                * create our EQs with MLX5_NUM_SPARE_EQE extra
+                * entries, so we must update our consumer index at
+                * least that often.
+                */
                if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
                        eq_update_ci(eq, 0);
                        set_ci = 0;
@@ -313,165 +165,41 @@ static void eq_pf_process(struct mlx5_eq *eq)
        }
 
        eq_update_ci(eq, 1);
-}
 
-static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
-{
-       struct mlx5_eq *eq = eq_ptr;
-       unsigned long flags;
-
-       if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
-               eq_pf_process(eq);
-               spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
-       } else {
-               schedule_work(&eq->pf_ctx.work);
-       }
+       if (cqn != -1)
+               tasklet_schedule(&eq_comp->tasklet_ctx.task);
 
        return IRQ_HANDLED;
 }
 
-/* mempool_refill() was proposed but unfortunately wasn't accepted
- * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
- * Chip workaround.
+/* Some architectures don't latch interrupts when they are disabled, so using
+ * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
+ * avoid losing them.  It is not recommended to use it, unless this is the last
+ * resort.
  */
-static void mempool_refill(mempool_t *pool)
-{
-       while (pool->curr_nr < pool->min_nr)
-               mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
-}
-
-static void eq_pf_action(struct work_struct *work)
-{
-       struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
-
-       mempool_refill(eq->pf_ctx.pool);
-
-       spin_lock_irq(&eq->pf_ctx.lock);
-       eq_pf_process(eq);
-       spin_unlock_irq(&eq->pf_ctx.lock);
-}
-
-static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
-{
-       spin_lock_init(&pf_ctx->lock);
-       INIT_WORK(&pf_ctx->work, eq_pf_action);
-
-       pf_ctx->wq = alloc_ordered_workqueue(name,
-                                            WQ_MEM_RECLAIM);
-       if (!pf_ctx->wq)
-               return -ENOMEM;
-
-       pf_ctx->pool = mempool_create_kmalloc_pool
-               (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
-       if (!pf_ctx->pool)
-               goto err_wq;
-
-       return 0;
-err_wq:
-       destroy_workqueue(pf_ctx->wq);
-       return -ENOMEM;
-}
-
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
-                               u32 wq_num, u8 type, int error)
-{
-       u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
-       u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)]   = {0};
-
-       MLX5_SET(page_fault_resume_in, in, opcode,
-                MLX5_CMD_OP_PAGE_FAULT_RESUME);
-       MLX5_SET(page_fault_resume_in, in, error, !!error);
-       MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
-       MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
-       MLX5_SET(page_fault_resume_in, in, token, token);
-
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
-#endif
-
-static void general_event_handler(struct mlx5_core_dev *dev,
-                                 struct mlx5_eqe *eqe)
-{
-       switch (eqe->sub_type) {
-       case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
-               if (dev->event)
-                       dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
-               break;
-       default:
-               mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
-                             eqe->sub_type);
-       }
-}
-
-static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
-                                   struct mlx5_eqe *eqe)
-{
-       u64 value_lsb;
-       u64 value_msb;
-
-       value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
-       value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
-
-       mlx5_core_warn(dev,
-                      "High temperature on sensors with bit set %llx %llx",
-                      value_msb, value_lsb);
-}
-
-/* caller must eventually call mlx5_cq_put on the returned cq */
-static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
-{
-       struct mlx5_cq_table *table = &eq->cq_table;
-       struct mlx5_core_cq *cq = NULL;
-
-       spin_lock(&table->lock);
-       cq = radix_tree_lookup(&table->tree, cqn);
-       if (likely(cq))
-               mlx5_cq_hold(cq);
-       spin_unlock(&table->lock);
-
-       return cq;
-}
-
-static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
 {
-       struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
-       if (unlikely(!cq)) {
-               mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
-               return;
-       }
-
-       ++cq->arm_sn;
-
-       cq->comp(cq);
-
-       mlx5_cq_put(cq);
-}
-
-static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
-{
-       struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
-       if (unlikely(!cq)) {
-               mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
-               return;
-       }
+       u32 count_eqe;
 
-       cq->event(cq, event_type);
+       disable_irq(eq->core.irqn);
+       count_eqe = eq->core.cons_index;
+       mlx5_eq_comp_int(eq->core.irqn, eq);
+       count_eqe = eq->core.cons_index - count_eqe;
+       enable_irq(eq->core.irqn);
 
-       mlx5_cq_put(cq);
+       return count_eqe;
 }
 
-static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
+static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
 {
        struct mlx5_eq *eq = eq_ptr;
-       struct mlx5_core_dev *dev = eq->dev;
+       struct mlx5_eq_table *eqt;
+       struct mlx5_core_dev *dev;
        struct mlx5_eqe *eqe;
        int set_ci = 0;
-       u32 cqn = -1;
-       u32 rsn;
-       u8 port;
+
+       dev = eq->dev;
+       eqt = dev->priv.eq_table;
 
        while ((eqe = next_eqe_sw(eq))) {
                /*
@@ -480,116 +208,12 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
                 */
                dma_rmb();
 
-               mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
-                             eq->eqn, eqe_type_str(eqe->type));
-               switch (eqe->type) {
-               case MLX5_EVENT_TYPE_COMP:
-                       cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
-                       mlx5_eq_cq_completion(eq, cqn);
-                       break;
-               case MLX5_EVENT_TYPE_DCT_DRAINED:
-                       rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
-                       rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
-                       mlx5_rsc_event(dev, rsn, eqe->type);
-                       break;
-               case MLX5_EVENT_TYPE_PATH_MIG:
-               case MLX5_EVENT_TYPE_COMM_EST:
-               case MLX5_EVENT_TYPE_SQ_DRAINED:
-               case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
-               case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
-               case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
-               case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
-               case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
-                       rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
-                       rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
-                       mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
-                                     eqe_type_str(eqe->type), eqe->type, rsn);
-                       mlx5_rsc_event(dev, rsn, eqe->type);
-                       break;
-
-               case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
-               case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
-                       rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
-                       mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
-                                     eqe_type_str(eqe->type), eqe->type, rsn);
-                       mlx5_srq_event(dev, rsn, eqe->type);
-                       break;
-
-               case MLX5_EVENT_TYPE_CMD:
-                       mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
-                       break;
-
-               case MLX5_EVENT_TYPE_PORT_CHANGE:
-                       port = (eqe->data.port.port >> 4) & 0xf;
-                       switch (eqe->sub_type) {
-                       case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
-                       case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
-                       case MLX5_PORT_CHANGE_SUBTYPE_LID:
-                       case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
-                       case MLX5_PORT_CHANGE_SUBTYPE_GUID:
-                       case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
-                       case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
-                               if (dev->event)
-                                       dev->event(dev, port_subtype_event(eqe->sub_type),
-                                                  (unsigned long)port);
-                               break;
-                       default:
-                               mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
-                                              port, eqe->sub_type);
-                       }
-                       break;
-               case MLX5_EVENT_TYPE_CQ_ERROR:
-                       cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
-                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
-                                      cqn, eqe->data.cq_err.syndrome);
-                       mlx5_eq_cq_event(eq, cqn, eqe->type);
-                       break;
-
-               case MLX5_EVENT_TYPE_PAGE_REQUEST:
-                       {
-                               u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
-                               s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
+               if (likely(eqe->type < MLX5_EVENT_TYPE_MAX))
+                       atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
+               else
+                       mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type);
 
-                               mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
-                                             func_id, npages);
-                               mlx5_core_req_pages_handler(dev, func_id, npages);
-                       }
-                       break;
-
-               case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
-                       mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
-                       break;
-
-               case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
-                       mlx5_port_module_event(dev, eqe);
-                       break;
-
-               case MLX5_EVENT_TYPE_PPS_EVENT:
-                       mlx5_pps_event(dev, eqe);
-                       break;
-
-               case MLX5_EVENT_TYPE_FPGA_ERROR:
-               case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
-                       mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
-                       break;
-
-               case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
-                       mlx5_temp_warning_event(dev, eqe);
-                       break;
-
-               case MLX5_EVENT_TYPE_GENERAL_EVENT:
-                       general_event_handler(dev, eqe);
-                       break;
-
-               case MLX5_EVENT_TYPE_DEVICE_TRACER:
-                       mlx5_fw_tracer_event(dev, eqe);
-                       break;
-
-               default:
-                       mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
-                                      eqe->type, eq->eqn);
-                       break;
-               }
+               atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
 
                ++eq->cons_index;
                ++set_ci;
@@ -608,30 +232,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 
        eq_update_ci(eq, 1);
 
-       if (cqn != -1)
-               tasklet_schedule(&eq->tasklet_ctx.task);
-
        return IRQ_HANDLED;
 }
 
-/* Some architectures don't latch interrupts when they are disabled, so using
- * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
- * avoid losing them.  It is not recommended to use it, unless this is the last
- * resort.
- */
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
-{
-       u32 count_eqe;
-
-       disable_irq(eq->irqn);
-       count_eqe = eq->cons_index;
-       mlx5_eq_int(eq->irqn, eq);
-       count_eqe = eq->cons_index - count_eqe;
-       enable_irq(eq->irqn);
-
-       return count_eqe;
-}
-
 static void init_eq_buf(struct mlx5_eq *eq)
 {
        struct mlx5_eqe *eqe;
@@ -643,39 +246,35 @@ static void init_eq_buf(struct mlx5_eq *eq)
        }
 }
 
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-                      int nent, u64 mask, const char *name,
-                      enum mlx5_eq_type type)
+static int
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+             struct mlx5_eq_param *param)
 {
+       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
        struct mlx5_cq_table *cq_table = &eq->cq_table;
        u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
        struct mlx5_priv *priv = &dev->priv;
-       irq_handler_t handler;
+       u8 vecidx = param->index;
        __be64 *pas;
        void *eqc;
        int inlen;
        u32 *in;
        int err;
 
+       if (eq_table->irq_info[vecidx].context)
+               return -EEXIST;
+
        /* Init CQ table */
        memset(cq_table, 0, sizeof(*cq_table));
        spin_lock_init(&cq_table->lock);
        INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
 
-       eq->type = type;
-       eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+       eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
        eq->cons_index = 0;
        err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
        if (err)
                return err;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       if (type == MLX5_EQ_TYPE_PF)
-               handler = mlx5_eq_pf_int;
-       else
-#endif
-               handler = mlx5_eq_int;
-
        init_eq_buf(eq);
 
        inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
@@ -691,7 +290,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
        mlx5_fill_page_array(&eq->buf, pas);
 
        MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
-       MLX5_SET64(create_eq_in, in, event_bitmask, mask);
+       MLX5_SET64(create_eq_in, in, event_bitmask, param->mask);
 
        eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
        MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
@@ -704,15 +303,17 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
        if (err)
                goto err_in;
 
-       snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
+       snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
                 name, pci_name(dev->pdev));
+       eq_table->irq_info[vecidx].context = param->context;
 
+       eq->vecidx = vecidx;
        eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
        eq->irqn = pci_irq_vector(dev->pdev, vecidx);
        eq->dev = dev;
        eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
-       err = request_irq(eq->irqn, handler, 0,
-                         priv->irq_info[vecidx].name, eq);
+       err = request_irq(eq->irqn, param->handler, 0,
+                         eq_table->irq_info[vecidx].name, param->context);
        if (err)
                goto err_eq;
 
@@ -720,21 +321,6 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
        if (err)
                goto err_irq;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       if (type == MLX5_EQ_TYPE_PF) {
-               err = init_pf_ctx(&eq->pf_ctx, name);
-               if (err)
-                       goto err_irq;
-       } else
-#endif
-       {
-               INIT_LIST_HEAD(&eq->tasklet_ctx.list);
-               INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
-               spin_lock_init(&eq->tasklet_ctx.lock);
-               tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
-                            (unsigned long)&eq->tasklet_ctx);
-       }
-
        /* EQs are created in ARMED state
         */
        eq_update_ci(eq, 1);
@@ -756,27 +342,25 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
        return err;
 }
 
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
+       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+       struct mlx5_irq_info *irq_info;
        int err;
 
+       irq_info = &eq_table->irq_info[eq->vecidx];
+
        mlx5_debug_eq_remove(dev, eq);
-       free_irq(eq->irqn, eq);
+
+       free_irq(eq->irqn, irq_info->context);
+       irq_info->context = NULL;
+
        err = mlx5_cmd_destroy_eq(dev, eq->eqn);
        if (err)
                mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
                               eq->eqn);
        synchronize_irq(eq->irqn);
 
-       if (eq->type == MLX5_EQ_TYPE_COMP) {
-               tasklet_disable(&eq->tasklet_ctx.task);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       } else if (eq->type == MLX5_EQ_TYPE_PF) {
-               cancel_work_sync(&eq->pf_ctx.work);
-               destroy_workqueue(eq->pf_ctx.wq);
-               mempool_destroy(eq->pf_ctx.pool);
-#endif
-       }
        mlx5_buf_free(dev, &eq->buf);
 
        return err;
@@ -816,28 +400,106 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
        return 0;
 }
 
-int mlx5_eq_init(struct mlx5_core_dev *dev)
+int mlx5_eq_table_init(struct mlx5_core_dev *dev)
 {
-       int err;
+       struct mlx5_eq_table *eq_table;
+       int i, err;
 
-       spin_lock_init(&dev->priv.eq_table.lock);
+       eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
+       if (!eq_table)
+               return -ENOMEM;
+
+       dev->priv.eq_table = eq_table;
 
        err = mlx5_eq_debugfs_init(dev);
+       if (err)
+               goto kvfree_eq_table;
 
+       mutex_init(&eq_table->lock);
+       for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
+               ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
+
+       return 0;
+
+kvfree_eq_table:
+       kvfree(eq_table);
+       dev->priv.eq_table = NULL;
        return err;
 }
 
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
 {
        mlx5_eq_debugfs_cleanup(dev);
+       kvfree(dev->priv.eq_table);
 }
 
-int mlx5_start_eqs(struct mlx5_core_dev *dev)
+/* Async EQs */
+
+static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+                          struct mlx5_eq *eq, struct mlx5_eq_param *param)
 {
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+       int err;
+
+       mutex_lock(&eq_table->lock);
+       if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
+               err = -ENOSPC;
+               goto unlock;
+       }
+
+       err = create_map_eq(dev, eq, name, param);
+unlock:
+       mutex_unlock(&eq_table->lock);
+       return err;
+}
+
+static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
        int err;
 
+       mutex_lock(&eq_table->lock);
+       err = destroy_unmap_eq(dev, eq);
+       mutex_unlock(&eq_table->lock);
+       return err;
+}
+
+static int cq_err_event_notifier(struct notifier_block *nb,
+                                unsigned long type, void *data)
+{
+       struct mlx5_eq_table *eqt;
+       struct mlx5_core_cq *cq;
+       struct mlx5_eqe *eqe;
+       struct mlx5_eq *eq;
+       u32 cqn;
+
+       /* type == MLX5_EVENT_TYPE_CQ_ERROR */
+
+       eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
+       eq  = &eqt->async_eq;
+       eqe = data;
+
+       cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+       mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
+                      cqn, eqe->data.cq_err.syndrome);
+
+       cq = mlx5_eq_cq_get(eq, cqn);
+       if (unlikely(!cq)) {
+               mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+               return NOTIFY_OK;
+       }
+
+       cq->event(cq, type);
+
+       mlx5_cq_put(cq);
+
+       return NOTIFY_OK;
+}
+
+static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
+{
+       u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+
        if (MLX5_VPORT_MANAGER(dev))
                async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
 
@@ -865,127 +527,518 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
        if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
                async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
 
-       err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
-                                MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
-                                "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
+       return async_event_mask;
+}
+
+static int create_async_eqs(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       struct mlx5_eq_param param = {};
+       int err;
+
+       MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
+       mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+
+       param = (struct mlx5_eq_param) {
+               .index = MLX5_EQ_CMD_IDX,
+               .mask = 1ull << MLX5_EVENT_TYPE_CMD,
+               .nent = MLX5_NUM_CMD_EQE,
+               .context = &table->cmd_eq,
+               .handler = mlx5_eq_async_int,
+       };
+       err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
        if (err) {
                mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
-               return err;
+               goto err0;
        }
 
        mlx5_cmd_use_events(dev);
 
-       err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
-                                MLX5_NUM_ASYNC_EQE, async_event_mask,
-                                "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
+       param = (struct mlx5_eq_param) {
+               .index = MLX5_EQ_ASYNC_IDX,
+               .mask = gather_async_events_mask(dev),
+               .nent = MLX5_NUM_ASYNC_EQE,
+               .context = &table->async_eq,
+               .handler = mlx5_eq_async_int,
+       };
+       err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
        if (err) {
                mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
                goto err1;
        }
 
-       err = mlx5_create_map_eq(dev, &table->pages_eq,
-                                MLX5_EQ_VEC_PAGES,
-                                /* TODO: sriov max_vf + */ 1,
-                                1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
-                                MLX5_EQ_TYPE_ASYNC);
+       param = (struct mlx5_eq_param) {
+               .index = MLX5_EQ_PAGEREQ_IDX,
+               .mask =  1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
+               .nent = /* TODO: sriov max_vf + */ 1,
+               .context = &table->pages_eq,
+               .handler = mlx5_eq_async_int,
+       };
+       err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
        if (err) {
                mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
                goto err2;
        }
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       if (MLX5_CAP_GEN(dev, pg)) {
-               err = mlx5_create_map_eq(dev, &table->pfault_eq,
-                                        MLX5_EQ_VEC_PFAULT,
-                                        MLX5_NUM_ASYNC_EQE,
-                                        1 << MLX5_EVENT_TYPE_PAGE_FAULT,
-                                        "mlx5_page_fault_eq",
-                                        MLX5_EQ_TYPE_PF);
-               if (err) {
-                       mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
-                                      err);
-                       goto err3;
-               }
-       }
-
-       return err;
-err3:
-       mlx5_destroy_unmap_eq(dev, &table->pages_eq);
-#else
        return err;
-#endif
 
 err2:
-       mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       destroy_async_eq(dev, &table->async_eq);
 
 err1:
        mlx5_cmd_use_polling(dev);
-       mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+       destroy_async_eq(dev, &table->cmd_eq);
+err0:
+       mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
        return err;
 }
 
-void mlx5_stop_eqs(struct mlx5_core_dev *dev)
+static void destroy_async_eqs(struct mlx5_core_dev *dev)
 {
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
+       struct mlx5_eq_table *table = dev->priv.eq_table;
        int err;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       if (MLX5_CAP_GEN(dev, pg)) {
-               err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
-               if (err)
-                       mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
-                                     err);
-       }
-#endif
-
-       err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+       err = destroy_async_eq(dev, &table->pages_eq);
        if (err)
                mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
                              err);
 
-       err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       err = destroy_async_eq(dev, &table->async_eq);
        if (err)
                mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
                              err);
+
        mlx5_cmd_use_polling(dev);
 
-       err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+       err = destroy_async_eq(dev, &table->cmd_eq);
        if (err)
                mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
                              err);
+
+       mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 }
 
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-                      u32 *out, int outlen)
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
 {
-       u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
+       return &dev->priv.eq_table->async_eq;
+}
 
-       MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
-       MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
+{
+       synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+}
+
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
+{
+       synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+}
+
+/* Generic EQ API for mlx5_core consumers
+ * Needed For RDMA ODP EQ for now
+ */
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+                      struct mlx5_eq_param *param)
+{
+       struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
+       int err;
+
+       if (!eq)
+               return ERR_PTR(-ENOMEM);
+
+       err = create_async_eq(dev, name, eq, param);
+       if (err) {
+               kvfree(eq);
+               eq = ERR_PTR(err);
+       }
+
+       return eq;
+}
+EXPORT_SYMBOL(mlx5_eq_create_generic);
+
+int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+       int err;
+
+       if (IS_ERR(eq))
+               return -EINVAL;
+
+       err = destroy_async_eq(dev, eq);
+       if (err)
+               goto out;
+
+       kvfree(eq);
+out:
+       return err;
+}
+EXPORT_SYMBOL(mlx5_eq_destroy_generic);
+
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
+{
+       u32 ci = eq->cons_index + cc;
+       struct mlx5_eqe *eqe;
+
+       eqe = get_eqe(eq, ci & (eq->nent - 1));
+       eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
+       /* Make sure we read EQ entry contents after we've
+        * checked the ownership bit.
+        */
+       if (eqe)
+               dma_rmb();
+
+       return eqe;
+}
+EXPORT_SYMBOL(mlx5_eq_get_eqe);
+
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
+{
+       __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+       u32 val;
+
+       eq->cons_index += cc;
+       val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+       __raw_writel((__force u32)cpu_to_be32(val), addr);
+       /* We still want ordering, just not swabbing, so add a barrier */
+       mb();
+}
+EXPORT_SYMBOL(mlx5_eq_update_ci);
+
+/* Completion EQs */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+       int irq = pci_irq_vector(mdev->pdev, vecidx);
+       struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+       if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
+               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+               return -ENOMEM;
+       }
+
+       cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+                       irq_info->mask);
+
+       if (IS_ENABLED(CONFIG_SMP) &&
+           irq_set_affinity_hint(irq, irq_info->mask))
+               mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+       return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+       struct mlx5_priv *priv  = &mdev->priv;
+       int irq = pci_irq_vector(mdev->pdev, vecidx);
+       struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+       irq_set_affinity_hint(irq, NULL);
+       free_cpumask_var(irq_info->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int err;
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
+               err = set_comp_irq_affinity_hint(mdev, i);
+               if (err)
+                       goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       for (i--; i >= 0; i--)
+               clear_comp_irq_affinity_hint(mdev, i);
+
+       return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
+               clear_comp_irq_affinity_hint(mdev, i);
+}
+
+static void destroy_comp_eqs(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       struct mlx5_eq_comp *eq, *n;
+
+       clear_comp_irqs_affinity_hints(dev);
+
+#ifdef CONFIG_RFS_ACCEL
+       if (table->rmap) {
+               free_irq_cpu_rmap(table->rmap);
+               table->rmap = NULL;
+       }
+#endif
+       list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+               list_del(&eq->list);
+               if (destroy_unmap_eq(dev, &eq->core))
+                       mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
+                                      eq->core.eqn);
+               tasklet_disable(&eq->tasklet_ctx.task);
+               kfree(eq);
+       }
+}
+
+static int create_comp_eqs(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       char name[MLX5_MAX_IRQ_NAME];
+       struct mlx5_eq_comp *eq;
+       int ncomp_vec;
+       int nent;
+       int err;
+       int i;
+
+       INIT_LIST_HEAD(&table->comp_eqs_list);
+       ncomp_vec = table->num_comp_vectors;
+       nent = MLX5_COMP_EQ_SIZE;
+#ifdef CONFIG_RFS_ACCEL
+       table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
+       if (!table->rmap)
+               return -ENOMEM;
+#endif
+       for (i = 0; i < ncomp_vec; i++) {
+               int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+               struct mlx5_eq_param param = {};
+
+               eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+               if (!eq) {
+                       err = -ENOMEM;
+                       goto clean;
+               }
+
+               INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+               INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+               spin_lock_init(&eq->tasklet_ctx.lock);
+               tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+                            (unsigned long)&eq->tasklet_ctx);
+
+#ifdef CONFIG_RFS_ACCEL
+               irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
+#endif
+               snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+               param = (struct mlx5_eq_param) {
+                       .index = vecidx,
+                       .mask = 0,
+                       .nent = nent,
+                       .context = &eq->core,
+                       .handler = mlx5_eq_comp_int
+               };
+               err = create_map_eq(dev, &eq->core, name, &param);
+               if (err) {
+                       kfree(eq);
+                       goto clean;
+               }
+               mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
+               /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
+               list_add_tail(&eq->list, &table->comp_eqs_list);
+       }
+
+       err = set_comp_irq_affinity_hints(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+               goto clean;
+       }
+
+       return 0;
+
+clean:
+       destroy_comp_eqs(dev);
+       return err;
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+                   unsigned int *irqn)
+{
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       struct mlx5_eq_comp *eq, *n;
+       int err = -ENOENT;
+       int i = 0;
+
+       list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+               if (i++ == vector) {
+                       *eqn = eq->core.eqn;
+                       *irqn = eq->core.irqn;
+                       err = 0;
+                       break;
+               }
+       }
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
+{
+       return dev->priv.eq_table->num_comp_vectors;
+}
+EXPORT_SYMBOL(mlx5_comp_vectors_count);
+
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
+{
+       /* TODO: consider irq_get_affinity_mask(irq) */
+       return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+}
+EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
+
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+       return dev->priv.eq_table->rmap;
+#else
+       return NULL;
+#endif
+}
+
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
+{
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       struct mlx5_eq_comp *eq;
+
+       list_for_each_entry(eq, &table->comp_eqs_list, list) {
+               if (eq->core.eqn == eqn)
+                       return eq;
+       }
+
+       return ERR_PTR(-ENOENT);
 }
 
 /* This function should only be called after mlx5_cmd_force_teardown_hca */
 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
 {
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       struct mlx5_eq *eq;
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       int i, max_eqs;
+
+       clear_comp_irqs_affinity_hints(dev);
 
 #ifdef CONFIG_RFS_ACCEL
-       if (dev->rmap) {
-               free_irq_cpu_rmap(dev->rmap);
-               dev->rmap = NULL;
+       if (table->rmap) {
+               free_irq_cpu_rmap(table->rmap);
+               table->rmap = NULL;
        }
 #endif
-       list_for_each_entry(eq, &table->comp_eqs_list, list)
-               free_irq(eq->irqn, eq);
-
-       free_irq(table->pages_eq.irqn, &table->pages_eq);
-       free_irq(table->async_eq.irqn, &table->async_eq);
-       free_irq(table->cmd_eq.irqn, &table->cmd_eq);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       if (MLX5_CAP_GEN(dev, pg))
-               free_irq(table->pfault_eq.irqn, &table->pfault_eq);
-#endif
+
+       mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+       max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
+       for (i = max_eqs - 1; i >= 0; i--) {
+               if (!table->irq_info[i].context)
+                       continue;
+               free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
+               table->irq_info[i].context = NULL;
+       }
+       mutex_unlock(&table->lock);
+       pci_free_irq_vectors(dev->pdev);
+}
+
+static int alloc_irq_vectors(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_eq_table *table = priv->eq_table;
+       int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+                     MLX5_CAP_GEN(dev, max_num_eqs) :
+                     1 << MLX5_CAP_GEN(dev, log_max_eq);
+       int nvec;
+       int err;
+
+       nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+              MLX5_EQ_VEC_COMP_BASE;
+       nvec = min_t(int, nvec, num_eqs);
+       if (nvec <= MLX5_EQ_VEC_COMP_BASE)
+               return -ENOMEM;
+
+       table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
+       if (!table->irq_info)
+               return -ENOMEM;
+
+       nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
+                                    nvec, PCI_IRQ_MSIX);
+       if (nvec < 0) {
+               err = nvec;
+               goto err_free_irq_info;
+       }
+
+       table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
+
+       return 0;
+
+err_free_irq_info:
+       kfree(table->irq_info);
+       return err;
+}
+
+static void free_irq_vectors(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+
        pci_free_irq_vectors(dev->pdev);
+       kfree(priv->eq_table->irq_info);
+}
+
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+{
+       int err;
+
+       err = alloc_irq_vectors(dev);
+       if (err) {
+               mlx5_core_err(dev, "alloc irq vectors failed\n");
+               return err;
+       }
+
+       err = create_async_eqs(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to create async EQs\n");
+               goto err_async_eqs;
+       }
+
+       err = create_comp_eqs(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to create completion EQs\n");
+               goto err_comp_eqs;
+       }
+
+       return 0;
+err_comp_eqs:
+       destroy_async_eqs(dev);
+err_async_eqs:
+       free_irq_vectors(dev);
+       return err;
+}
+
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
+{
+       destroy_comp_eqs(dev);
+       destroy_async_eqs(dev);
+       free_irq_vectors(dev);
+}
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+       struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+       if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+               return -EINVAL;
+
+       return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
+}
+
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+       struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+       if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+               return -EINVAL;
+
+       return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
 }
index d004957328f9ca9daa6b46c375310b1f20d1af31..e6a9b19d86262a5b00129da748f6a25005e65549 100644 (file)
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 #include "eswitch.h"
 #include "fs_core.h"
+#include "lib/eq.h"
 
 #define UPLINK_VPORT 0xFFFF
 
@@ -1567,7 +1569,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
        /* Mark this vport as disabled to discard new events */
        vport->enabled = false;
 
-       synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
        /* Wait for current already scheduled events to complete */
        flush_workqueue(esw->work_queue);
        /* Disable events from this vport */
@@ -1593,10 +1594,25 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
        mutex_unlock(&esw->state_lock);
 }
 
+static int eswitch_vport_event(struct notifier_block *nb,
+                              unsigned long type, void *data)
+{
+       struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb);
+       struct mlx5_eqe *eqe = data;
+       struct mlx5_vport *vport;
+       u16 vport_num;
+
+       vport_num = be16_to_cpu(eqe->data.vport_change.vport_num);
+       vport = &esw->vports[vport_num];
+       if (vport->enabled)
+               queue_work(esw->work_queue, &vport->vport_change_handler);
+
+       return NOTIFY_OK;
+}
+
 /* Public E-Switch API */
 #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
 
-
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
        int err;
@@ -1640,6 +1656,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
        for (i = 0; i <= nvfs; i++)
                esw_enable_vport(esw, i, enabled_events);
 
+       if (mode == SRIOV_LEGACY) {
+               MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+               mlx5_eq_notifier_register(esw->dev, &esw->nb);
+       }
+
        esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
                 esw->enabled_vports);
        return 0;
@@ -1669,6 +1690,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
        mc_promisc = &esw->mc_promisc;
        nvports = esw->enabled_vports;
 
+       if (esw->mode == SRIOV_LEGACY)
+               mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+
        for (i = 0; i < esw->total_vports; i++)
                esw_disable_vport(esw, i);
 
@@ -1777,23 +1801,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
        kfree(esw);
 }
 
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
-{
-       struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
-       u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
-       struct mlx5_vport *vport;
-
-       if (!esw) {
-               pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
-                       vport_num);
-               return;
-       }
-
-       vport = &esw->vports[vport_num];
-       if (vport->enabled)
-               queue_work(esw->work_queue, &vport->vport_change_handler);
-}
-
 /* Vport Administration */
 #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
 
index aaafc9f171151db2f273f7eb12b3d2a5d12d93c6..480ffa294867db7c70b5d2f93d3fe7f1abf03dda 100644 (file)
@@ -181,6 +181,7 @@ struct esw_mc_addr { /* SRIOV only */
 
 struct mlx5_eswitch {
        struct mlx5_core_dev    *dev;
+       struct mlx5_nb          nb;
        struct mlx5_eswitch_fdb fdb_table;
        struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
        struct workqueue_struct *work_queue;
@@ -211,7 +212,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw);
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
@@ -352,7 +352,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
 static inline int  mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
 static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
new file mode 100644 (file)
index 0000000..e92df70
--- /dev/null
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+
+struct mlx5_event_nb {
+       struct mlx5_nb  nb;
+       void           *ctx;
+};
+
+/* General events handlers for the low level mlx5_core driver
+ *
+ * Other Major feature specific events such as
+ * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
+ * separate notifiers callbacks, specifically by those mlx5 components.
+ */
+static int any_notifier(struct notifier_block *, unsigned long, void *);
+static int temp_warn(struct notifier_block *, unsigned long, void *);
+static int port_module(struct notifier_block *, unsigned long, void *);
+
+/* handler which forwards the event to events->nh, driver notifiers */
+static int forward_event(struct notifier_block *, unsigned long, void *);
+
+static struct mlx5_nb events_nbs_ref[] = {
+       /* Events to be proccessed by mlx5_core */
+       {.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
+       {.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
+       {.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
+
+       /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+       /* QP/WQ resource events to forward */
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
+       /* SRQ events */
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
+};
+
+struct mlx5_events {
+       struct mlx5_core_dev *dev;
+       struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
+       /* driver notifier chain */
+       struct atomic_notifier_head nh;
+       /* port module events stats */
+       struct mlx5_pme_stats pme_stats;
+};
+
+static const char *eqe_type_str(u8 type)
+{
+       switch (type) {
+       case MLX5_EVENT_TYPE_COMP:
+               return "MLX5_EVENT_TYPE_COMP";
+       case MLX5_EVENT_TYPE_PATH_MIG:
+               return "MLX5_EVENT_TYPE_PATH_MIG";
+       case MLX5_EVENT_TYPE_COMM_EST:
+               return "MLX5_EVENT_TYPE_COMM_EST";
+       case MLX5_EVENT_TYPE_SQ_DRAINED:
+               return "MLX5_EVENT_TYPE_SQ_DRAINED";
+       case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+               return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+       case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+               return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+       case MLX5_EVENT_TYPE_CQ_ERROR:
+               return "MLX5_EVENT_TYPE_CQ_ERROR";
+       case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+               return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+       case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+               return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+       case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+               return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+       case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+               return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+       case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+               return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+       case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+               return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+       case MLX5_EVENT_TYPE_PORT_CHANGE:
+               return "MLX5_EVENT_TYPE_PORT_CHANGE";
+       case MLX5_EVENT_TYPE_GPIO_EVENT:
+               return "MLX5_EVENT_TYPE_GPIO_EVENT";
+       case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+               return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+       case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+               return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
+       case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+               return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+       case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+               return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+       case MLX5_EVENT_TYPE_STALL_EVENT:
+               return "MLX5_EVENT_TYPE_STALL_EVENT";
+       case MLX5_EVENT_TYPE_CMD:
+               return "MLX5_EVENT_TYPE_CMD";
+       case MLX5_EVENT_TYPE_PAGE_REQUEST:
+               return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+       case MLX5_EVENT_TYPE_PAGE_FAULT:
+               return "MLX5_EVENT_TYPE_PAGE_FAULT";
+       case MLX5_EVENT_TYPE_PPS_EVENT:
+               return "MLX5_EVENT_TYPE_PPS_EVENT";
+       case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+               return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
+       case MLX5_EVENT_TYPE_FPGA_ERROR:
+               return "MLX5_EVENT_TYPE_FPGA_ERROR";
+       case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+               return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
+       case MLX5_EVENT_TYPE_GENERAL_EVENT:
+               return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+       case MLX5_EVENT_TYPE_DEVICE_TRACER:
+               return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+       default:
+               return "Unrecognized event";
+       }
+}
+
+/* handles all FW events, type == eqe->type */
+static int any_notifier(struct notifier_block *nb,
+                       unsigned long type, void *data)
+{
+       struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+       struct mlx5_events   *events   = event_nb->ctx;
+       struct mlx5_eqe      *eqe      = data;
+
+       mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
+                     eqe_type_str(eqe->type), eqe->sub_type);
+       return NOTIFY_OK;
+}
+
+/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
+static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
+{
+       struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+       struct mlx5_events   *events   = event_nb->ctx;
+       struct mlx5_eqe      *eqe      = data;
+       u64 value_lsb;
+       u64 value_msb;
+
+       value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+       value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+       mlx5_core_warn(events->dev,
+                      "High temperature on sensors with bit set %llx %llx",
+                      value_msb, value_lsb);
+
+       return NOTIFY_OK;
+}
+
+/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
+       "Cable plugged",   /* MLX5_MODULE_STATUS_PLUGGED    = 0x1 */
+       "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED  = 0x2 */
+       "Cable error",     /* MLX5_MODULE_STATUS_ERROR      = 0x3 */
+};
+
+static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
+       "Power budget exceeded",
+       "Long Range for non MLNX cable",
+       "Bus stuck(I2C or data shorted)",
+       "No EEPROM/retry timeout",
+       "Enforce part number list",
+       "Unknown identifier",
+       "High Temperature",
+       "Bad or shorted cable/module",
+       "Unknown status",
+};
+
+/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static int port_module(struct notifier_block *nb, unsigned long type, void *data)
+{
+       struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+       struct mlx5_events   *events   = event_nb->ctx;
+       struct mlx5_eqe      *eqe      = data;
+
+       enum port_module_event_status_type module_status;
+       enum port_module_event_error_type error_type;
+       struct mlx5_eqe_port_module *module_event_eqe;
+       u8 module_num;
+
+       module_event_eqe = &eqe->data.port_module;
+       module_num = module_event_eqe->module;
+       module_status = module_event_eqe->module_status &
+                       PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+       error_type = module_event_eqe->error_type &
+                    PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+       if (module_status < MLX5_MODULE_STATUS_ERROR) {
+               events->pme_stats.status_counters[module_status - 1]++;
+       } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
+               if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
+                       /* Unknown error type */
+                       error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
+               events->pme_stats.error_counters[error_type]++;
+       }
+
+       if (!printk_ratelimit())
+               return NOTIFY_OK;
+
+       if (module_status < MLX5_MODULE_STATUS_ERROR)
+               mlx5_core_info(events->dev,
+                              "Port module event: module %u, %s\n",
+                              module_num, mlx5_pme_status[module_status - 1]);
+
+       else if (module_status == MLX5_MODULE_STATUS_ERROR)
+               mlx5_core_info(events->dev,
+                              "Port module event[error]: module %u, %s, %s\n",
+                              module_num, mlx5_pme_status[module_status - 1],
+                              mlx5_pme_error[error_type]);
+
+       return NOTIFY_OK;
+}
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
+{
+       *stats = dev->priv.events->pme_stats;
+}
+
+/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
+static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+       struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+       struct mlx5_events   *events   = event_nb->ctx;
+       struct mlx5_eqe      *eqe      = data;
+
+       mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
+                     eqe_type_str(eqe->type), eqe->sub_type);
+       atomic_notifier_call_chain(&events->nh, event, data);
+       return NOTIFY_OK;
+}
+
+int mlx5_events_init(struct mlx5_core_dev *dev)
+{
+       struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
+
+       if (!events)
+               return -ENOMEM;
+
+       ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
+       events->dev = dev;
+       dev->priv.events = events;
+       return 0;
+}
+
+void mlx5_events_cleanup(struct mlx5_core_dev *dev)
+{
+       kvfree(dev->priv.events);
+}
+
+void mlx5_events_start(struct mlx5_core_dev *dev)
+{
+       struct mlx5_events *events = dev->priv.events;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
+               events->notifiers[i].nb  = events_nbs_ref[i];
+               events->notifiers[i].ctx = events;
+               mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
+       }
+}
+
+void mlx5_events_stop(struct mlx5_core_dev *dev)
+{
+       struct mlx5_events *events = dev->priv.events;
+       int i;
+
+       for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
+               mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
+}
+
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+       struct mlx5_events *events = dev->priv.events;
+
+       return atomic_notifier_chain_register(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_register);
+
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+       struct mlx5_events *events = dev->priv.events;
+
+       return atomic_notifier_chain_unregister(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_unregister);
+
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
+{
+       return atomic_notifier_call_chain(&events->nh, event, data);
+}
index 436a8136f26ff5f8b879eb02313beabb717f779e..27c5f6c7d36a7c4c58c38e2a7bd3702a0ae9eed5 100644 (file)
@@ -36,6 +36,7 @@
 
 #include "mlx5_core.h"
 #include "lib/mlx5.h"
+#include "lib/eq.h"
 #include "fpga/core.h"
 #include "fpga/conn.h"
 
@@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
        return 0;
 }
 
+static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
+
+static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+       struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
+
+       return mlx5_fpga_event(fdev, event, eqe);
+}
+
+static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+       struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
+
+       return mlx5_fpga_event(fdev, event, eqe);
+}
+
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 {
        struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
        if (err)
                goto out;
 
+       MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
+       MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
+       mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
+       mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
+
        err = mlx5_fpga_conn_device_init(fdev);
        if (err)
                goto err_rsvd_gid;
@@ -201,6 +223,8 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
        mlx5_fpga_conn_device_cleanup(fdev);
 
 err_rsvd_gid:
+       mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+       mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
        mlx5_core_unreserve_gids(mdev, max_num_qps);
 out:
        spin_lock_irqsave(&fdev->state_lock, flags);
@@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
        }
 
        mlx5_fpga_conn_device_cleanup(fdev);
+       mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+       mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
+
        max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
        mlx5_core_unreserve_gids(mdev, max_num_qps);
 }
@@ -283,9 +310,10 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
        return "Unknown";
 }
 
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
+static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
+                          unsigned long event, void *eqe)
 {
-       struct mlx5_fpga_device *fdev = mdev->fpga;
+       void *data = ((struct mlx5_eqe *)eqe)->data.raw;
        const char *event_name;
        bool teardown = false;
        unsigned long flags;
@@ -303,9 +331,7 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
                fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
                break;
        default:
-               mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
-                                          event);
-               return;
+               return NOTIFY_DONE;
        }
 
        spin_lock_irqsave(&fdev->state_lock, flags);
@@ -326,4 +352,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
         */
        if (teardown)
                mlx5_trigger_health_work(fdev->mdev);
+
+       return NOTIFY_OK;
 }
index 3e2355c8df3ffd3e6b2b5da4cb734d976f011528..7e2e871dbf833b059d790bf161feb7264f2df093 100644 (file)
 
 #ifdef CONFIG_MLX5_FPGA
 
+#include <linux/mlx5/eq.h>
+
+#include "lib/eq.h"
 #include "fpga/cmd.h"
 
 /* Represents an Innova device */
 struct mlx5_fpga_device {
        struct mlx5_core_dev *mdev;
+       struct mlx5_nb fpga_err_nb;
+       struct mlx5_nb fpga_qp_err_nb;
        spinlock_t state_lock; /* Protects state transitions */
        enum mlx5_fpga_status state;
        enum mlx5_fpga_image last_admin_image;
@@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev);
 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
 
 #else
 
@@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
 {
 }
 
-static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
-                                  void *data)
-{
-}
-
 #endif
 
 #endif /* __MLX5_FPGA_CORE_H__ */
index 43118de8ee99a19b29ed687c84b45b043f1cab7f..196c07383082f9fe479e930ea4f4a3229c561c43 100644 (file)
@@ -38,6 +38,8 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
 
 enum {
        MLX5_HEALTH_POLL_INTERVAL       = 2 * HZ,
@@ -78,29 +80,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
                    &dev->iseg->cmdq_addr_l_sz);
 }
 
-static void trigger_cmd_completions(struct mlx5_core_dev *dev)
-{
-       unsigned long flags;
-       u64 vector;
-
-       /* wait for pending handlers to complete */
-       synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD));
-       spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
-       vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
-       if (!vector)
-               goto no_trig;
-
-       vector |= MLX5_TRIGGERED_CMD_COMP;
-       spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-
-       mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
-       mlx5_cmd_comp_handler(dev, vector, true);
-       return;
-
-no_trig:
-       spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-}
-
 static int in_fatal(struct mlx5_core_dev *dev)
 {
        struct mlx5_core_health *health = &dev->priv.health;
@@ -124,10 +103,10 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
        mlx5_core_err(dev, "start\n");
        if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
                dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
-               trigger_cmd_completions(dev);
+               mlx5_cmd_trigger_completions(dev);
        }
 
-       mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
+       mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
        mlx5_core_err(dev, "end\n");
 
 unlock:
index 0d90b1b4a3d388c2793de0a8f688c605f3c3abfd..d27c239e7d6cc3402fca8f23757b55a0aaccdf4d 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/clocksource.h>
 #include <linux/highmem.h>
 #include <rdma/mlx5-abi.h>
+#include "lib/eq.h"
 #include "en.h"
 #include "clock.h"
 
@@ -439,16 +440,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
        clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
 }
 
-void mlx5_pps_event(struct mlx5_core_dev *mdev,
-                   struct mlx5_eqe *eqe)
+static int mlx5_pps_event(struct notifier_block *nb,
+                         unsigned long type, void *data)
 {
-       struct mlx5_clock *clock = &mdev->clock;
+       struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+       struct mlx5_core_dev *mdev = clock->mdev;
        struct ptp_clock_event ptp_event;
-       struct timespec64 ts;
-       u64 nsec_now, nsec_delta;
        u64 cycles_now, cycles_delta;
+       u64 nsec_now, nsec_delta, ns;
+       struct mlx5_eqe *eqe = data;
        int pin = eqe->data.pps.pin;
-       s64 ns;
+       struct timespec64 ts;
        unsigned long flags;
 
        switch (clock->ptp_info.pin_config[pin].func) {
@@ -463,6 +465,7 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
                } else {
                        ptp_event.type = PTP_CLOCK_EXTTS;
                }
+               /* TODOL clock->ptp can be NULL if ptp_clock_register failes */
                ptp_clock_event(clock->ptp, &ptp_event);
                break;
        case PTP_PF_PEROUT:
@@ -481,8 +484,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
                write_sequnlock_irqrestore(&clock->lock, flags);
                break;
        default:
-               mlx5_core_err(mdev, " Unhandled event\n");
+               mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
+                             clock->ptp_info.pin_config[pin].func);
        }
+
+       return NOTIFY_OK;
 }
 
 void mlx5_init_clock(struct mlx5_core_dev *mdev)
@@ -567,6 +573,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
                               PTR_ERR(clock->ptp));
                clock->ptp = NULL;
        }
+
+       MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
+       mlx5_eq_notifier_register(mdev, &clock->pps_nb);
 }
 
 void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
@@ -576,6 +585,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
        if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
                return;
 
+       mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
        if (clock->ptp) {
                ptp_clock_unregister(clock->ptp);
                clock->ptp = NULL;
index 263cb6e2aeee52e5bbdbd698ab3556531529a14a..31600924bdc367824b2ed9ae199dbc05d17177d7 100644 (file)
@@ -36,7 +36,6 @@
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 void mlx5_init_clock(struct mlx5_core_dev *mdev);
 void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
-void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 
 static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
 {
@@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
 #else
 static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
 static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
-static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {}
-
 static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
 {
        return -1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
new file mode 100644 (file)
index 0000000..c0fb6d7
--- /dev/null
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_EQ_H__
+#define __LIB_MLX5_EQ_H__
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
+#include <linux/mlx5/cq.h>
+
+#define MLX5_MAX_IRQ_NAME   (32)
+#define MLX5_EQE_SIZE       (sizeof(struct mlx5_eqe))
+
+struct mlx5_eq_tasklet {
+       struct list_head      list;
+       struct list_head      process_list;
+       struct tasklet_struct task;
+       spinlock_t            lock; /* lock completion tasklet list */
+};
+
+struct mlx5_cq_table {
+       spinlock_t              lock;   /* protect radix tree */
+       struct radix_tree_root  tree;
+};
+
+struct mlx5_eq {
+       struct mlx5_core_dev    *dev;
+       struct mlx5_cq_table    cq_table;
+       __be32 __iomem          *doorbell;
+       u32                     cons_index;
+       struct mlx5_frag_buf    buf;
+       int                     size;
+       unsigned int            vecidx;
+       unsigned int            irqn;
+       u8                      eqn;
+       int                     nent;
+       struct mlx5_rsc_debug   *dbg;
+};
+
+struct mlx5_eq_comp {
+       struct mlx5_eq          core; /* Must be first */
+       struct mlx5_eq_tasklet  tasklet_ctx;
+       struct list_head        list;
+};
+
+static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+       return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+       struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+       return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+       __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+       u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+       __raw_writel((__force u32)cpu_to_be32(val), addr);
+       /* We still want ordering, just not swabbing, so add a barrier */
+       mb();
+}
+
+int mlx5_eq_table_init(struct mlx5_core_dev *dev);
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_eq_table_create(struct mlx5_core_dev *dev);
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
+void mlx5_cq_tasklet_cb(unsigned long data);
+struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
+
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
+#endif
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+
+#endif
index 7550b1cc8c6aed8bcaf73f3b8ea8d332e093fe3f..4d78a459676e73c73a99cf155f2f29d34b0075b4 100644 (file)
@@ -33,6 +33,8 @@
 #ifndef __LIB_MLX5_H__
 #define __LIB_MLX5_H__
 
+#include "mlx5_core.h"
+
 void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
 void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
 int  mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
@@ -40,4 +42,37 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
 int  mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
 void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
 
+/* TODO move to lib/events.h */
+
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK    0xF
+
+enum port_module_event_status_type {
+       MLX5_MODULE_STATUS_PLUGGED   = 0x1,
+       MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+       MLX5_MODULE_STATUS_ERROR     = 0x3,
+       MLX5_MODULE_STATUS_NUM       = 0x3,
+};
+
+enum  port_module_event_error_type {
+       MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED,
+       MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE,
+       MLX5_MODULE_EVENT_ERROR_BUS_STUCK,
+       MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT,
+       MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST,
+       MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER,
+       MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE,
+       MLX5_MODULE_EVENT_ERROR_BAD_CABLE,
+       MLX5_MODULE_EVENT_ERROR_UNKNOWN,
+       MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_pme_stats {
+       u64 status_counters[MLX5_MODULE_STATUS_NUM];
+       u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+
 #endif
index 28132c7dc05f252c6287a3fa6a8a37415de4872c..7789955738127c3d3382dbe64fc540ea14996f8f 100644 (file)
@@ -43,7 +43,6 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
 #include <linux/debugfs.h>
 #include <linux/kmod.h>
 #include <linux/mlx5/mlx5_ifc.h>
@@ -53,6 +52,7 @@
 #endif
 #include <net/devlink.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 #include "fs_core.h"
 #include "lib/mpfs.h"
 #include "eswitch.h"
@@ -319,51 +319,6 @@ static void release_bar(struct pci_dev *pdev)
        pci_release_regions(pdev);
 }
 
-static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_eq_table *table = &priv->eq_table;
-       int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
-                     MLX5_CAP_GEN(dev, max_num_eqs) :
-                     1 << MLX5_CAP_GEN(dev, log_max_eq);
-       int nvec;
-       int err;
-
-       nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
-              MLX5_EQ_VEC_COMP_BASE;
-       nvec = min_t(int, nvec, num_eqs);
-       if (nvec <= MLX5_EQ_VEC_COMP_BASE)
-               return -ENOMEM;
-
-       priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
-       if (!priv->irq_info)
-               return -ENOMEM;
-
-       nvec = pci_alloc_irq_vectors(dev->pdev,
-                       MLX5_EQ_VEC_COMP_BASE + 1, nvec,
-                       PCI_IRQ_MSIX);
-       if (nvec < 0) {
-               err = nvec;
-               goto err_free_irq_info;
-       }
-
-       table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
-       return 0;
-
-err_free_irq_info:
-       kfree(priv->irq_info);
-       return err;
-}
-
-static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
-{
-       struct mlx5_priv *priv = &dev->priv;
-
-       pci_free_irq_vectors(dev->pdev);
-       kfree(priv->irq_info);
-}
-
 struct mlx5_reg_host_endianness {
        u8      he;
        u8      rsvd[15];
@@ -637,177 +592,6 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
        return (u64)timer_l | (u64)timer_h1 << 32;
 }
 
-static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-       struct mlx5_priv *priv  = &mdev->priv;
-       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
-       if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
-               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
-               return -ENOMEM;
-       }
-
-       cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
-                       priv->irq_info[i].mask);
-
-       if (IS_ENABLED(CONFIG_SMP) &&
-           irq_set_affinity_hint(irq, priv->irq_info[i].mask))
-               mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
-       return 0;
-}
-
-static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-       struct mlx5_priv *priv  = &mdev->priv;
-       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
-       irq_set_affinity_hint(irq, NULL);
-       free_cpumask_var(priv->irq_info[i].mask);
-}
-
-static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
-{
-       int err;
-       int i;
-
-       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
-               err = mlx5_irq_set_affinity_hint(mdev, i);
-               if (err)
-                       goto err_out;
-       }
-
-       return 0;
-
-err_out:
-       for (i--; i >= 0; i--)
-               mlx5_irq_clear_affinity_hint(mdev, i);
-
-       return err;
-}
-
-static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
-{
-       int i;
-
-       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
-               mlx5_irq_clear_affinity_hint(mdev, i);
-}
-
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
-                   unsigned int *irqn)
-{
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       struct mlx5_eq *eq, *n;
-       int err = -ENOENT;
-
-       spin_lock(&table->lock);
-       list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
-               if (eq->index == vector) {
-                       *eqn = eq->eqn;
-                       *irqn = eq->irqn;
-                       err = 0;
-                       break;
-               }
-       }
-       spin_unlock(&table->lock);
-
-       return err;
-}
-EXPORT_SYMBOL(mlx5_vector2eqn);
-
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
-{
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       struct mlx5_eq *eq;
-
-       spin_lock(&table->lock);
-       list_for_each_entry(eq, &table->comp_eqs_list, list)
-               if (eq->eqn == eqn) {
-                       spin_unlock(&table->lock);
-                       return eq;
-               }
-
-       spin_unlock(&table->lock);
-
-       return ERR_PTR(-ENOENT);
-}
-
-static void free_comp_eqs(struct mlx5_core_dev *dev)
-{
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       struct mlx5_eq *eq, *n;
-
-#ifdef CONFIG_RFS_ACCEL
-       if (dev->rmap) {
-               free_irq_cpu_rmap(dev->rmap);
-               dev->rmap = NULL;
-       }
-#endif
-       spin_lock(&table->lock);
-       list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
-               list_del(&eq->list);
-               spin_unlock(&table->lock);
-               if (mlx5_destroy_unmap_eq(dev, eq))
-                       mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
-                                      eq->eqn);
-               kfree(eq);
-               spin_lock(&table->lock);
-       }
-       spin_unlock(&table->lock);
-}
-
-static int alloc_comp_eqs(struct mlx5_core_dev *dev)
-{
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       char name[MLX5_MAX_IRQ_NAME];
-       struct mlx5_eq *eq;
-       int ncomp_vec;
-       int nent;
-       int err;
-       int i;
-
-       INIT_LIST_HEAD(&table->comp_eqs_list);
-       ncomp_vec = table->num_comp_vectors;
-       nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
-       dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
-       if (!dev->rmap)
-               return -ENOMEM;
-#endif
-       for (i = 0; i < ncomp_vec; i++) {
-               eq = kzalloc(sizeof(*eq), GFP_KERNEL);
-               if (!eq) {
-                       err = -ENOMEM;
-                       goto clean;
-               }
-
-#ifdef CONFIG_RFS_ACCEL
-               irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev,
-                                MLX5_EQ_VEC_COMP_BASE + i));
-#endif
-               snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
-               err = mlx5_create_map_eq(dev, eq,
-                                        i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
-                                        name, MLX5_EQ_TYPE_COMP);
-               if (err) {
-                       kfree(eq);
-                       goto clean;
-               }
-               mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
-               eq->index = i;
-               spin_lock(&table->lock);
-               list_add_tail(&eq->list, &table->comp_eqs_list);
-               spin_unlock(&table->lock);
-       }
-
-       return 0;
-
-clean:
-       free_comp_eqs(dev);
-       return err;
-}
-
 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
        u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]   = {0};
@@ -944,22 +728,26 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto out;
        }
 
-       err = mlx5_eq_init(dev);
+       err = mlx5_eq_table_init(dev);
        if (err) {
                dev_err(&pdev->dev, "failed to initialize eq\n");
                goto out;
        }
 
+       err = mlx5_events_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "failed to initialize events\n");
+               goto err_eq_cleanup;
+       }
+
        err = mlx5_cq_debugfs_init(dev);
        if (err) {
                dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
-               goto err_eq_cleanup;
+               goto err_events_cleanup;
        }
 
        mlx5_init_qp_table(dev);
 
-       mlx5_init_srq_table(dev);
-
        mlx5_init_mkey_table(dev);
 
        mlx5_init_reserved_gids(dev);
@@ -1013,12 +801,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 err_tables_cleanup:
        mlx5_vxlan_destroy(dev->vxlan);
        mlx5_cleanup_mkey_table(dev);
-       mlx5_cleanup_srq_table(dev);
        mlx5_cleanup_qp_table(dev);
        mlx5_cq_debugfs_cleanup(dev);
-
+err_events_cleanup:
+       mlx5_events_cleanup(dev);
 err_eq_cleanup:
-       mlx5_eq_cleanup(dev);
+       mlx5_eq_table_cleanup(dev);
 
 out:
        return err;
@@ -1036,10 +824,10 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
        mlx5_cleanup_clock(dev);
        mlx5_cleanup_reserved_gids(dev);
        mlx5_cleanup_mkey_table(dev);
-       mlx5_cleanup_srq_table(dev);
        mlx5_cleanup_qp_table(dev);
        mlx5_cq_debugfs_cleanup(dev);
-       mlx5_eq_cleanup(dev);
+       mlx5_events_cleanup(dev);
+       mlx5_eq_table_cleanup(dev);
 }
 
 static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
@@ -1131,16 +919,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto reclaim_boot_pages;
        }
 
-       err = mlx5_pagealloc_start(dev);
-       if (err) {
-               dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
-               goto reclaim_boot_pages;
-       }
-
        err = mlx5_cmd_init_hca(dev, sw_owner_id);
        if (err) {
                dev_err(&pdev->dev, "init hca failed\n");
-               goto err_pagealloc_stop;
+               goto reclaim_boot_pages;
        }
 
        mlx5_set_driver_version(dev);
@@ -1161,23 +943,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                }
        }
 
-       err = mlx5_alloc_irq_vectors(dev);
-       if (err) {
-               dev_err(&pdev->dev, "alloc irq vectors failed\n");
-               goto err_cleanup_once;
-       }
-
        dev->priv.uar = mlx5_get_uars_page(dev);
        if (IS_ERR(dev->priv.uar)) {
                dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
                err = PTR_ERR(dev->priv.uar);
-               goto err_disable_msix;
+               goto err_get_uars;
        }
 
-       err = mlx5_start_eqs(dev);
+       mlx5_events_start(dev);
+       mlx5_pagealloc_start(dev);
+
+       err = mlx5_eq_table_create(dev);
        if (err) {
-               dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
-               goto err_put_uars;
+               dev_err(&pdev->dev, "Failed to create EQs\n");
+               goto err_eq_table;
        }
 
        err = mlx5_fw_tracer_init(dev->tracer);
@@ -1186,18 +965,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_fw_tracer;
        }
 
-       err = alloc_comp_eqs(dev);
-       if (err) {
-               dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
-               goto err_comp_eqs;
-       }
-
-       err = mlx5_irq_set_affinity_hints(dev);
-       if (err) {
-               dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
-               goto err_affinity_hints;
-       }
-
        err = mlx5_fpga_device_start(dev);
        if (err) {
                dev_err(&pdev->dev, "fpga device start failed %d\n", err);
@@ -1266,24 +1033,17 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        mlx5_fpga_device_stop(dev);
 
 err_fpga_start:
-       mlx5_irq_clear_affinity_hints(dev);
-
-err_affinity_hints:
-       free_comp_eqs(dev);
-
-err_comp_eqs:
        mlx5_fw_tracer_cleanup(dev->tracer);
 
 err_fw_tracer:
-       mlx5_stop_eqs(dev);
+       mlx5_eq_table_destroy(dev);
 
-err_put_uars:
+err_eq_table:
+       mlx5_pagealloc_stop(dev);
+       mlx5_events_stop(dev);
        mlx5_put_uars_page(dev, priv->uar);
 
-err_disable_msix:
-       mlx5_free_irq_vectors(dev);
-
-err_cleanup_once:
+err_get_uars:
        if (boot)
                mlx5_cleanup_once(dev);
 
@@ -1294,9 +1054,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto out_err;
        }
 
-err_pagealloc_stop:
-       mlx5_pagealloc_stop(dev);
-
 reclaim_boot_pages:
        mlx5_reclaim_startup_pages(dev);
 
@@ -1340,21 +1097,20 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        mlx5_accel_ipsec_cleanup(dev);
        mlx5_accel_tls_cleanup(dev);
        mlx5_fpga_device_stop(dev);
-       mlx5_irq_clear_affinity_hints(dev);
-       free_comp_eqs(dev);
        mlx5_fw_tracer_cleanup(dev->tracer);
-       mlx5_stop_eqs(dev);
+       mlx5_eq_table_destroy(dev);
+       mlx5_pagealloc_stop(dev);
+       mlx5_events_stop(dev);
        mlx5_put_uars_page(dev, priv->uar);
-       mlx5_free_irq_vectors(dev);
        if (cleanup)
                mlx5_cleanup_once(dev);
        mlx5_stop_health_poll(dev, cleanup);
+
        err = mlx5_cmd_teardown_hca(dev);
        if (err) {
                dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
                goto out;
        }
-       mlx5_pagealloc_stop(dev);
        mlx5_reclaim_startup_pages(dev);
        mlx5_core_disable_hca(dev, 0);
        mlx5_cmd_cleanup(dev);
@@ -1364,12 +1120,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        return err;
 }
 
-struct mlx5_core_event_handler {
-       void (*event)(struct mlx5_core_dev *dev,
-                     enum mlx5_dev_event event,
-                     void *data);
-};
-
 static const struct devlink_ops mlx5_devlink_ops = {
 #ifdef CONFIG_MLX5_ESWITCH
        .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
@@ -1403,7 +1153,6 @@ static int init_one(struct pci_dev *pdev,
        pci_set_drvdata(pdev, dev);
 
        dev->pdev = pdev;
-       dev->event = mlx5_core_event;
        dev->profile = &profile[prof_sel];
 
        INIT_LIST_HEAD(&priv->ctx_list);
@@ -1411,17 +1160,6 @@ static int init_one(struct pci_dev *pdev,
        mutex_init(&dev->pci_status_mutex);
        mutex_init(&dev->intf_state_mutex);
 
-       INIT_LIST_HEAD(&priv->waiting_events_list);
-       priv->is_accum_events = false;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       err = init_srcu_struct(&priv->pfault_srcu);
-       if (err) {
-               dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
-                       err);
-               goto clean_dev;
-       }
-#endif
        mutex_init(&priv->bfregs.reg_head.lock);
        mutex_init(&priv->bfregs.wc_head.lock);
        INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
@@ -1430,7 +1168,7 @@ static int init_one(struct pci_dev *pdev,
        err = mlx5_pci_init(dev, priv);
        if (err) {
                dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
-               goto clean_srcu;
+               goto clean_dev;
        }
 
        err = mlx5_health_init(dev);
@@ -1439,12 +1177,14 @@ static int init_one(struct pci_dev *pdev,
                goto close_pci;
        }
 
-       mlx5_pagealloc_init(dev);
+       err = mlx5_pagealloc_init(dev);
+       if (err)
+               goto err_pagealloc_init;
 
        err = mlx5_load_one(dev, priv, true);
        if (err) {
                dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
-               goto clean_health;
+               goto err_load_one;
        }
 
        request_module_nowait(MLX5_IB_MOD);
@@ -1458,16 +1198,13 @@ static int init_one(struct pci_dev *pdev,
 
 clean_load:
        mlx5_unload_one(dev, priv, true);
-clean_health:
+err_load_one:
        mlx5_pagealloc_cleanup(dev);
+err_pagealloc_init:
        mlx5_health_cleanup(dev);
 close_pci:
        mlx5_pci_close(dev, priv);
-clean_srcu:
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       cleanup_srcu_struct(&priv->pfault_srcu);
 clean_dev:
-#endif
        devlink_free(devlink);
 
        return err;
@@ -1491,9 +1228,6 @@ static void remove_one(struct pci_dev *pdev)
        mlx5_pagealloc_cleanup(dev);
        mlx5_health_cleanup(dev);
        mlx5_pci_close(dev, priv);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       cleanup_srcu_struct(&priv->pfault_srcu);
-#endif
        devlink_free(devlink);
 }
 
@@ -1637,7 +1371,6 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
         * kexec. There is no need to cleanup the mlx5_core software
         * contexts.
         */
-       mlx5_irq_clear_affinity_hints(dev);
        mlx5_core_eq_free_irqs(dev);
 
        return 0;
index 0594d0961cb3fa6ba03b00e68ecc0fc3300a1ec7..fd3141a4b3f1a47835fb0f4b46b9ba1c9b7a3502 100644 (file)
@@ -78,6 +78,11 @@ do {                                                                 \
                 __func__, __LINE__, current->pid,                      \
                ##__VA_ARGS__)
 
+#define mlx5_core_warn_once(__dev, format, ...)                                \
+       dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,   \
+                     __func__, __LINE__, current->pid,                 \
+                     ##__VA_ARGS__)
+
 #define mlx5_core_info(__dev, format, ...)                             \
        dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
 
@@ -97,12 +102,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
-
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-                    unsigned long param);
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
-                         struct mlx5_pagefault *pfault);
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
 void mlx5_recover_device(struct mlx5_core_dev *dev);
@@ -124,28 +123,7 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
 
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-                      int nent, u64 mask, const char *name,
-                      enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-                      u32 *out, int outlen);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
-/* This function should only be called after mlx5_cmd_force_teardown_hca */
-void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
-void mlx5_cq_tasklet_cb(unsigned long data);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
 int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 
@@ -159,6 +137,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_remove(struct mlx5_core_dev *dev);
 
+int mlx5_events_init(struct mlx5_core_dev *dev);
+void mlx5_events_cleanup(struct mlx5_core_dev *dev);
+void mlx5_events_start(struct mlx5_core_dev *dev);
+void mlx5_events_stop(struct mlx5_core_dev *dev);
+
 void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
 void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
 void mlx5_attach_device(struct mlx5_core_dev *dev);
index e36d3e3675f963c44ff76f6c69a7ac6c72155554..a83b517b07143e68e1aaa8172d1924b7f91888b5 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 enum {
        MLX5_PAGES_CANT_GIVE    = 0,
@@ -433,15 +434,28 @@ static void pages_work_handler(struct work_struct *work)
        kfree(req);
 }
 
-void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-                                s32 npages)
+static int req_pages_handler(struct notifier_block *nb,
+                            unsigned long type, void *data)
 {
        struct mlx5_pages_req *req;
-
+       struct mlx5_core_dev *dev;
+       struct mlx5_priv *priv;
+       struct mlx5_eqe *eqe;
+       u16 func_id;
+       s32 npages;
+
+       priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb);
+       dev  = container_of(priv, struct mlx5_core_dev, priv);
+       eqe  = data;
+
+       func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+       npages  = be32_to_cpu(eqe->data.req_pages.num_pages);
+       mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
+                     func_id, npages);
        req = kzalloc(sizeof(*req), GFP_ATOMIC);
        if (!req) {
                mlx5_core_warn(dev, "failed to allocate pages request\n");
-               return;
+               return NOTIFY_DONE;
        }
 
        req->dev = dev;
@@ -449,6 +463,7 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
        req->npages = npages;
        INIT_WORK(&req->work, pages_work_handler);
        queue_work(dev->priv.pg_wq, &req->work);
+       return NOTIFY_OK;
 }
 
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
@@ -524,29 +539,32 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
        return 0;
 }
 
-void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
 {
        dev->priv.page_root = RB_ROOT;
        INIT_LIST_HEAD(&dev->priv.free_list);
+       dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+       if (!dev->priv.pg_wq)
+               return -ENOMEM;
+
+       return 0;
 }
 
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
 {
-       /* nothing */
+       destroy_workqueue(dev->priv.pg_wq);
 }
 
-int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev)
 {
-       dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
-       if (!dev->priv.pg_wq)
-               return -ENOMEM;
-
-       return 0;
+       MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST);
+       mlx5_eq_notifier_register(dev, &dev->priv.pg_nb);
 }
 
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
 {
-       destroy_workqueue(dev->priv.pg_wq);
+       mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb);
+       flush_workqueue(dev->priv.pg_wq);
 }
 
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
index 31a9cbd85689b01fc0bfe9e6c221d73cc7c5fe13..2b82f35f4c35153080cb0716abd68caa6f12d3c7 100644 (file)
@@ -915,63 +915,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
        *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
 }
 
-static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
-       "Cable plugged",   /* MLX5_MODULE_STATUS_PLUGGED    = 0x1 */
-       "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED  = 0x2 */
-       "Cable error",     /* MLX5_MODULE_STATUS_ERROR      = 0x3 */
-};
-
-static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
-       "Power budget exceeded",
-       "Long Range for non MLNX cable",
-       "Bus stuck(I2C or data shorted)",
-       "No EEPROM/retry timeout",
-       "Enforce part number list",
-       "Unknown identifier",
-       "High Temperature",
-       "Bad or shorted cable/module",
-       "Unknown status",
-};
-
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
-{
-       enum port_module_event_status_type module_status;
-       enum port_module_event_error_type error_type;
-       struct mlx5_eqe_port_module *module_event_eqe;
-       struct mlx5_priv *priv = &dev->priv;
-       u8 module_num;
-
-       module_event_eqe = &eqe->data.port_module;
-       module_num = module_event_eqe->module;
-       module_status = module_event_eqe->module_status &
-                       PORT_MODULE_EVENT_MODULE_STATUS_MASK;
-       error_type = module_event_eqe->error_type &
-                    PORT_MODULE_EVENT_ERROR_TYPE_MASK;
-
-       if (module_status < MLX5_MODULE_STATUS_ERROR) {
-               priv->pme_stats.status_counters[module_status - 1]++;
-       } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
-               if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
-                       /* Unknown error type */
-                       error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
-               priv->pme_stats.error_counters[error_type]++;
-       }
-
-       if (!printk_ratelimit())
-               return;
-
-       if (module_status < MLX5_MODULE_STATUS_ERROR)
-               mlx5_core_info(dev,
-                              "Port module event: module %u, %s\n",
-                              module_num, mlx5_pme_status[module_status - 1]);
-
-       else if (module_status == MLX5_MODULE_STATUS_ERROR)
-               mlx5_core_info(dev,
-                              "Port module event[error]: module %u, %s, %s\n",
-                              module_num, mlx5_pme_status[module_status - 1],
-                              mlx5_pme_error[error_type]);
-}
-
 int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
 {
        u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
index 91b8139a388d2ece13a3fa5c296bd63ed7ab57c3..388f205a497f0ba703c26b73ea05be1b591ad7b7 100644 (file)
 #include <linux/mlx5/transobj.h>
 
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
-static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
-                                                u32 rsn)
+static struct mlx5_core_rsc_common *
+mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
 {
-       struct mlx5_qp_table *table = &dev->priv.qp_table;
        struct mlx5_core_rsc_common *common;
 
        spin_lock(&table->lock);
@@ -53,11 +53,6 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
 
        spin_unlock(&table->lock);
 
-       if (!common) {
-               mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
-                              rsn);
-               return NULL;
-       }
        return common;
 }
 
@@ -120,19 +115,57 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
        }
 }
 
-void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+static int rsc_event_notifier(struct notifier_block *nb,
+                             unsigned long type, void *data)
 {
-       struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+       struct mlx5_core_rsc_common *common;
+       struct mlx5_qp_table *table;
+       struct mlx5_core_dev *dev;
        struct mlx5_core_dct *dct;
+       u8 event_type = (u8)type;
        struct mlx5_core_qp *qp;
+       struct mlx5_priv *priv;
+       struct mlx5_eqe *eqe;
+       u32 rsn;
+
+       switch (event_type) {
+       case MLX5_EVENT_TYPE_DCT_DRAINED:
+               eqe = data;
+               rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+               rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+               break;
+       case MLX5_EVENT_TYPE_PATH_MIG:
+       case MLX5_EVENT_TYPE_COMM_EST:
+       case MLX5_EVENT_TYPE_SQ_DRAINED:
+       case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+       case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+       case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+       case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+       case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+               eqe = data;
+               rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+               rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+               break;
+       default:
+               return NOTIFY_DONE;
+       }
+
+       table = container_of(nb, struct mlx5_qp_table, nb);
+       priv  = container_of(table, struct mlx5_priv, qp_table);
+       dev   = container_of(priv, struct mlx5_core_dev, priv);
+
+       mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn);
 
-       if (!common)
-               return;
+       common = mlx5_get_rsc(table, rsn);
+       if (!common) {
+               mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn);
+               return NOTIFY_OK;
+       }
 
        if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
                mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
                               event_type, rsn);
-               return;
+               goto out;
        }
 
        switch (common->res) {
@@ -150,8 +183,10 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
        default:
                mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
        }
-
+out:
        mlx5_core_put_rsc(common);
+
+       return NOTIFY_OK;
 }
 
 static int create_resource_common(struct mlx5_core_dev *dev,
@@ -487,10 +522,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev)
        spin_lock_init(&table->lock);
        INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
        mlx5_qp_debugfs_init(dev);
+
+       table->nb.notifier_call = rsc_event_notifier;
+       mlx5_notifier_register(dev, &table->nb);
 }
 
 void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
 {
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+       mlx5_notifier_unregister(dev, &table->nb);
        mlx5_qp_debugfs_cleanup(dev);
 }
 
@@ -670,3 +711,20 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
        return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+                                               int res_num,
+                                               enum mlx5_res_type res_type)
+{
+       u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+       return mlx5_get_rsc(table, rsn);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_hold);
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+       mlx5_core_put_rsc(res);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_put);
index a1ee9a8a769e8a96e2c25f84454772159bb4bd16..c4d4b76096dc2e5884965966e9e1e63913b5abd4 100644 (file)
@@ -258,115 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 }
 EXPORT_SYMBOL(mlx5_core_destroy_tis);
 
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *rmpn)
-{
-       u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
-       int err;
-
-       MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
-       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-       if (!err)
-               *rmpn = MLX5_GET(create_rmp_out, out, rmpn);
-
-       return err;
-}
-
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
-{
-       u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
-
-       MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
-       return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-}
-
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
-{
-       u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
-
-       MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
-       MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out,
-                                         sizeof(out));
-}
-
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
-{
-       u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
-       int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
-
-       MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
-       MLX5_SET(query_rmp_in, in, rmpn,   rmpn);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
-{
-       void *in;
-       void *rmpc;
-       void *wq;
-       void *bitmask;
-       int  err;
-
-       in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
-
-       rmpc    = MLX5_ADDR_OF(modify_rmp_in,   in,   ctx);
-       bitmask = MLX5_ADDR_OF(modify_rmp_in,   in,   bitmask);
-       wq      = MLX5_ADDR_OF(rmpc,            rmpc, wq);
-
-       MLX5_SET(modify_rmp_in, in,      rmp_state, MLX5_RMPC_STATE_RDY);
-       MLX5_SET(modify_rmp_in, in,      rmpn,      rmpn);
-       MLX5_SET(wq,            wq,      lwm,       lwm);
-       MLX5_SET(rmp_bitmask,   bitmask, lwm,       1);
-       MLX5_SET(rmpc,          rmpc,    state,     MLX5_RMPC_STATE_RDY);
-
-       err =  mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
-       kvfree(in);
-
-       return err;
-}
-
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                         u32 *xsrqn)
-{
-       u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
-       int err;
-
-       MLX5_SET(create_xrc_srq_in, in, opcode,     MLX5_CMD_OP_CREATE_XRC_SRQ);
-       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-       if (!err)
-               *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
-
-       return err;
-}
-
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
-{
-       u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
-       MLX5_SET(destroy_xrc_srq_in, in, opcode,   MLX5_CMD_OP_DESTROY_XRC_SRQ);
-       MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
-{
-       u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
-       MLX5_SET(arm_xrc_srq_in, in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
-       MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
-       MLX5_SET(arm_xrc_srq_in, in, lwm,      lwm);
-       MLX5_SET(arm_xrc_srq_in, in, op_mod,
-                MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
 int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *rqtn)
 {
index 31a750570c3889c5396ccac46000b07f8aeb2db7..28b757a64029915f3f6754af9b8535481334fec4 100644 (file)
@@ -60,7 +60,7 @@ struct mlx5_core_cq {
        } tasklet_ctx;
        int                     reset_notify_added;
        struct list_head        reset_notify;
-       struct mlx5_eq          *eq;
+       struct mlx5_eq_comp     *eq;
        u16 uid;
 };
 
index b4c0457fbebd9b92ec74181dbd371a7e20833749..f7c8bebfe472943b6ae4c703661debb1010192c4 100644 (file)
@@ -212,6 +212,13 @@ enum {
        MLX5_PFAULT_SUBTYPE_RDMA = 1,
 };
 
+enum wqe_page_fault_type {
+       MLX5_WQE_PF_TYPE_RMP = 0,
+       MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE = 1,
+       MLX5_WQE_PF_TYPE_RESP = 2,
+       MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC = 3,
+};
+
 enum {
        MLX5_PERM_LOCAL_READ    = 1 << 2,
        MLX5_PERM_LOCAL_WRITE   = 1 << 3,
@@ -294,9 +301,15 @@ enum {
        MLX5_EVENT_QUEUE_TYPE_DCT = 6,
 };
 
+/* mlx5 components can subscribe to any one of these events via
+ * mlx5_eq_notifier_register API.
+ */
 enum mlx5_event {
+       /* Special value to subscribe to any event */
+       MLX5_EVENT_TYPE_NOTIFY_ANY         = 0x0,
+       /* HW events enum start: comp events are not subscribable */
        MLX5_EVENT_TYPE_COMP               = 0x0,
-
+       /* HW Async events enum start: subscribable events */
        MLX5_EVENT_TYPE_PATH_MIG           = 0x01,
        MLX5_EVENT_TYPE_COMM_EST           = 0x02,
        MLX5_EVENT_TYPE_SQ_DRAINED         = 0x03,
@@ -334,6 +347,8 @@ enum mlx5_event {
        MLX5_EVENT_TYPE_FPGA_QP_ERROR      = 0x21,
 
        MLX5_EVENT_TYPE_DEVICE_TRACER      = 0x26,
+
+       MLX5_EVENT_TYPE_MAX                = MLX5_EVENT_TYPE_DEVICE_TRACER + 1,
 };
 
 enum {
index aa5963b5d38e192d4e4bb455938bc6200a8ee48e..b090a96f87dff2ae74613121c50bfe23d5b34604 100644 (file)
 #include <linux/mempool.h>
 #include <linux/interrupt.h>
 #include <linux/idr.h>
+#include <linux/notifier.h>
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
-#include <linux/mlx5/srq.h>
+#include <linux/mlx5/eq.h>
 #include <linux/timecounter.h>
 #include <linux/ptp_clock_kernel.h>
 
@@ -84,18 +85,6 @@ enum {
        MLX5_MAX_PORTS  = 2,
 };
 
-enum {
-       MLX5_EQ_VEC_PAGES        = 0,
-       MLX5_EQ_VEC_CMD          = 1,
-       MLX5_EQ_VEC_ASYNC        = 2,
-       MLX5_EQ_VEC_PFAULT       = 3,
-       MLX5_EQ_VEC_COMP_BASE,
-};
-
-enum {
-       MLX5_MAX_IRQ_NAME       = 32
-};
-
 enum {
        MLX5_ATOMIC_MODE_OFFSET = 16,
        MLX5_ATOMIC_MODE_IB_COMP = 1,
@@ -205,16 +194,7 @@ struct mlx5_rsc_debug {
 };
 
 enum mlx5_dev_event {
-       MLX5_DEV_EVENT_SYS_ERROR,
-       MLX5_DEV_EVENT_PORT_UP,
-       MLX5_DEV_EVENT_PORT_DOWN,
-       MLX5_DEV_EVENT_PORT_INITIALIZED,
-       MLX5_DEV_EVENT_LID_CHANGE,
-       MLX5_DEV_EVENT_PKEY_CHANGE,
-       MLX5_DEV_EVENT_GUID_CHANGE,
-       MLX5_DEV_EVENT_CLIENT_REREG,
-       MLX5_DEV_EVENT_PPS,
-       MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT,
+       MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */
 };
 
 enum mlx5_port_status {
@@ -222,14 +202,6 @@ enum mlx5_port_status {
        MLX5_PORT_DOWN      = 2,
 };
 
-enum mlx5_eq_type {
-       MLX5_EQ_TYPE_COMP,
-       MLX5_EQ_TYPE_ASYNC,
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       MLX5_EQ_TYPE_PF,
-#endif
-};
-
 struct mlx5_bfreg_info {
        u32                    *sys_pages;
        int                     num_low_latency_bfregs;
@@ -297,6 +269,8 @@ struct mlx5_cmd_stats {
 };
 
 struct mlx5_cmd {
+       struct mlx5_nb    nb;
+
        void           *cmd_alloc_buf;
        dma_addr_t      alloc_dma;
        int             alloc_size;
@@ -366,51 +340,6 @@ struct mlx5_frag_buf_ctrl {
        u8                      log_frag_strides;
 };
 
-struct mlx5_eq_tasklet {
-       struct list_head list;
-       struct list_head process_list;
-       struct tasklet_struct task;
-       /* lock on completion tasklet list */
-       spinlock_t lock;
-};
-
-struct mlx5_eq_pagefault {
-       struct work_struct       work;
-       /* Pagefaults lock */
-       spinlock_t               lock;
-       struct workqueue_struct *wq;
-       mempool_t               *pool;
-};
-
-struct mlx5_cq_table {
-       /* protect radix tree */
-       spinlock_t              lock;
-       struct radix_tree_root  tree;
-};
-
-struct mlx5_eq {
-       struct mlx5_core_dev   *dev;
-       struct mlx5_cq_table    cq_table;
-       __be32 __iomem         *doorbell;
-       u32                     cons_index;
-       struct mlx5_frag_buf    buf;
-       int                     size;
-       unsigned int            irqn;
-       u8                      eqn;
-       int                     nent;
-       u64                     mask;
-       struct list_head        list;
-       int                     index;
-       struct mlx5_rsc_debug   *dbg;
-       enum mlx5_eq_type       type;
-       union {
-               struct mlx5_eq_tasklet   tasklet_ctx;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-               struct mlx5_eq_pagefault pf_ctx;
-#endif
-       };
-};
-
 struct mlx5_core_psv {
        u32     psv_idx;
        struct psv_layout {
@@ -463,36 +392,6 @@ struct mlx5_core_rsc_common {
        struct completion       free;
 };
 
-struct mlx5_core_srq {
-       struct mlx5_core_rsc_common     common; /* must be first */
-       u32             srqn;
-       int             max;
-       size_t          max_gs;
-       size_t          max_avail_gather;
-       int             wqe_shift;
-       void (*event)   (struct mlx5_core_srq *, enum mlx5_event);
-
-       atomic_t                refcount;
-       struct completion       free;
-       u16             uid;
-};
-
-struct mlx5_eq_table {
-       void __iomem           *update_ci;
-       void __iomem           *update_arm_ci;
-       struct list_head        comp_eqs_list;
-       struct mlx5_eq          pages_eq;
-       struct mlx5_eq          async_eq;
-       struct mlx5_eq          cmd_eq;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       struct mlx5_eq          pfault_eq;
-#endif
-       int                     num_comp_vectors;
-       /* protect EQs list
-        */
-       spinlock_t              lock;
-};
-
 struct mlx5_uars_page {
        void __iomem           *map;
        bool                    wc;
@@ -542,13 +441,8 @@ struct mlx5_core_health {
 };
 
 struct mlx5_qp_table {
-       /* protect radix tree
-        */
-       spinlock_t              lock;
-       struct radix_tree_root  tree;
-};
+       struct notifier_block   nb;
 
-struct mlx5_srq_table {
        /* protect radix tree
         */
        spinlock_t              lock;
@@ -575,11 +469,6 @@ struct mlx5_core_sriov {
        int                     enabled_vfs;
 };
 
-struct mlx5_irq_info {
-       cpumask_var_t mask;
-       char name[MLX5_MAX_IRQ_NAME];
-};
-
 struct mlx5_fc_stats {
        spinlock_t counters_idr_lock; /* protects counters_idr */
        struct idr counters_idr;
@@ -593,10 +482,11 @@ struct mlx5_fc_stats {
        unsigned long sampling_interval; /* jiffies */
 };
 
+struct mlx5_events;
 struct mlx5_mpfs;
 struct mlx5_eswitch;
 struct mlx5_lag;
-struct mlx5_pagefault;
+struct mlx5_eq_table;
 
 struct mlx5_rate_limit {
        u32                     rate;
@@ -619,37 +509,12 @@ struct mlx5_rl_table {
        struct mlx5_rl_entry   *rl_entry;
 };
 
-enum port_module_event_status_type {
-       MLX5_MODULE_STATUS_PLUGGED   = 0x1,
-       MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
-       MLX5_MODULE_STATUS_ERROR     = 0x3,
-       MLX5_MODULE_STATUS_NUM       = 0x3,
-};
-
-enum  port_module_event_error_type {
-       MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED,
-       MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE,
-       MLX5_MODULE_EVENT_ERROR_BUS_STUCK,
-       MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT,
-       MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST,
-       MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER,
-       MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE,
-       MLX5_MODULE_EVENT_ERROR_BAD_CABLE,
-       MLX5_MODULE_EVENT_ERROR_UNKNOWN,
-       MLX5_MODULE_EVENT_ERROR_NUM,
-};
-
-struct mlx5_port_module_event_stats {
-       u64 status_counters[MLX5_MODULE_STATUS_NUM];
-       u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
-};
-
 struct mlx5_priv {
        char                    name[MLX5_MAX_NAME_LEN];
-       struct mlx5_eq_table    eq_table;
-       struct mlx5_irq_info    *irq_info;
+       struct mlx5_eq_table    *eq_table;
 
        /* pages stuff */
+       struct mlx5_nb          pg_nb;
        struct workqueue_struct *pg_wq;
        struct rb_root          page_root;
        int                     fw_pages;
@@ -659,8 +524,6 @@ struct mlx5_priv {
 
        struct mlx5_core_health health;
 
-       struct mlx5_srq_table   srq_table;
-
        /* start: qp staff */
        struct mlx5_qp_table    qp_table;
        struct dentry          *qp_debugfs;
@@ -690,9 +553,7 @@ struct mlx5_priv {
        struct list_head        dev_list;
        struct list_head        ctx_list;
        spinlock_t              ctx_lock;
-
-       struct list_head        waiting_events_list;
-       bool                    is_accum_events;
+       struct mlx5_events      *events;
 
        struct mlx5_flow_steering *steering;
        struct mlx5_mpfs        *mpfs;
@@ -703,15 +564,6 @@ struct mlx5_priv {
        struct mlx5_fc_stats            fc_stats;
        struct mlx5_rl_table            rl_table;
 
-       struct mlx5_port_module_event_stats  pme_stats;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       void                  (*pfault)(struct mlx5_core_dev *dev,
-                                       void *context,
-                                       struct mlx5_pagefault *pfault);
-       void                   *pfault_ctx;
-       struct srcu_struct      pfault_srcu;
-#endif
        struct mlx5_bfreg_data          bfregs;
        struct mlx5_uars_page          *uar;
 };
@@ -736,44 +588,6 @@ enum mlx5_pagefault_type_flags {
        MLX5_PFAULT_RDMA      = 1 << 2,
 };
 
-/* Contains the details of a pagefault. */
-struct mlx5_pagefault {
-       u32                     bytes_committed;
-       u32                     token;
-       u8                      event_subtype;
-       u8                      type;
-       union {
-               /* Initiator or send message responder pagefault details. */
-               struct {
-                       /* Received packet size, only valid for responders. */
-                       u32     packet_size;
-                       /*
-                        * Number of resource holding WQE, depends on type.
-                        */
-                       u32     wq_num;
-                       /*
-                        * WQE index. Refers to either the send queue or
-                        * receive queue, according to event_subtype.
-                        */
-                       u16     wqe_index;
-               } wqe;
-               /* RDMA responder pagefault details */
-               struct {
-                       u32     r_key;
-                       /*
-                        * Received packet size, minimal size page fault
-                        * resolution required for forward progress.
-                        */
-                       u32     packet_size;
-                       u32     rdma_op_len;
-                       u64     rdma_va;
-               } rdma;
-       };
-
-       struct mlx5_eq         *eq;
-       struct work_struct      work;
-};
-
 struct mlx5_td {
        struct list_head tirs_list;
        u32              tdn;
@@ -803,6 +617,8 @@ struct mlx5_pps {
 };
 
 struct mlx5_clock {
+       struct mlx5_core_dev      *mdev;
+       struct mlx5_nb             pps_nb;
        seqlock_t                  lock;
        struct cyclecounter        cycles;
        struct timecounter         tc;
@@ -810,7 +626,6 @@ struct mlx5_clock {
        u32                        nominal_c_mult;
        unsigned long              overflow_period;
        struct delayed_work        overflow_work;
-       struct mlx5_core_dev      *mdev;
        struct ptp_clock          *ptp;
        struct ptp_clock_info      ptp_info;
        struct mlx5_pps            pps_info;
@@ -843,9 +658,6 @@ struct mlx5_core_dev {
        /* sync interface state */
        struct mutex            intf_state_mutex;
        unsigned long           intf_state;
-       void                    (*event) (struct mlx5_core_dev *dev,
-                                         enum mlx5_dev_event event,
-                                         unsigned long param);
        struct mlx5_priv        priv;
        struct mlx5_profile     *profile;
        atomic_t                num_qps;
@@ -858,9 +670,6 @@ struct mlx5_core_dev {
        } roce;
 #ifdef CONFIG_MLX5_FPGA
        struct mlx5_fpga_device *fpga;
-#endif
-#ifdef CONFIG_RFS_ACCEL
-       struct cpu_rmap         *rmap;
 #endif
        struct mlx5_clock        clock;
        struct mlx5_ib_clock_info  *clock_info;
@@ -940,8 +749,8 @@ struct mlx5_hca_vport_context {
        u64                     node_guid;
        u32                     cap_mask1;
        u32                     cap_mask1_perm;
-       u32                     cap_mask2;
-       u32                     cap_mask2_perm;
+       u16                     cap_mask2;
+       u16                     cap_mask2_perm;
        u16                     lid;
        u8                      init_type_reply; /* bitmask: see ib spec 14.2.5.6 InitTypeReply */
        u8                      lmc;
@@ -1070,13 +879,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
                                                      gfp_t flags, int npages);
 void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
                                 struct mlx5_cmd_mailbox *head);
-int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                        struct mlx5_srq_attr *in);
-int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq);
-int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                       struct mlx5_srq_attr *out);
-int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                     u16 lwm, int is_srq);
 void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev);
 int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
@@ -1095,9 +897,9 @@ int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
                      u16 opmod, u8 port);
-void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
-int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
                                 s32 npages);
@@ -1108,9 +910,6 @@ void mlx5_unregister_debugfs(void);
 
 void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas);
 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
-void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
-struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                    unsigned int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1155,6 +954,9 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
                     bool map_wc, bool fast_path);
 void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
 
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev);
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector);
 unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev);
 int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
                           u8 roce_version, u8 roce_l3_type, const u8 *gid,
@@ -1202,11 +1004,6 @@ struct mlx5_interface {
        void                    (*remove)(struct mlx5_core_dev *dev, void *context);
        int                     (*attach)(struct mlx5_core_dev *dev, void *context);
        void                    (*detach)(struct mlx5_core_dev *dev, void *context);
-       void                    (*event)(struct mlx5_core_dev *dev, void *context,
-                                        enum mlx5_dev_event event, unsigned long param);
-       void                    (*pfault)(struct mlx5_core_dev *dev,
-                                         void *context,
-                                         struct mlx5_pagefault *pfault);
        void *                  (*get_dev)(void *context);
        int                     protocol;
        struct list_head        list;
@@ -1215,6 +1012,9 @@ struct mlx5_interface {
 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol);
 int mlx5_register_interface(struct mlx5_interface *intf);
 void mlx5_unregister_interface(struct mlx5_interface *intf);
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb);
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb);
+
 int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
 
 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
@@ -1306,10 +1106,4 @@ enum {
        MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
 };
 
-static inline const struct cpumask *
-mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector)
-{
-       return dev->priv.irq_info[vector].mask;
-}
-
 #endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
new file mode 100644 (file)
index 0000000..00045cc
--- /dev/null
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef MLX5_CORE_EQ_H
+#define MLX5_CORE_EQ_H
+
+enum {
+       MLX5_EQ_PAGEREQ_IDX        = 0,
+       MLX5_EQ_CMD_IDX            = 1,
+       MLX5_EQ_ASYNC_IDX          = 2,
+       /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */
+       MLX5_EQ_PFAULT_IDX         = 3,
+       MLX5_EQ_MAX_ASYNC_EQS,
+       /* completion eqs vector indices start here */
+       MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS,
+};
+
+#define MLX5_NUM_CMD_EQE   (32)
+#define MLX5_NUM_ASYNC_EQE (0x1000)
+#define MLX5_NUM_SPARE_EQE (0x80)
+
+struct mlx5_eq;
+struct mlx5_core_dev;
+
+struct mlx5_eq_param {
+       u8             index;
+       int            nent;
+       u64            mask;
+       void          *context;
+       irq_handler_t  handler;
+};
+
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+                      struct mlx5_eq_param *param);
+int
+mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc);
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm);
+
+/* The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events.  We
+ * create EQs with MLX5_NUM_SPARE_EQE extra entries,
+ * so we must update our consumer index at
+ * least that often.
+ *
+ * mlx5_eq_update_cc must be called on every EQE @EQ irq handler
+ */
+static inline u32 mlx5_eq_update_cc(struct mlx5_eq *eq, u32 cc)
+{
+       if (unlikely(cc >= MLX5_NUM_SPARE_EQE)) {
+               mlx5_eq_update_ci(eq, cc, 0);
+               cc = 0;
+       }
+       return cc;
+}
+
+struct mlx5_nb {
+       struct notifier_block nb;
+       u8 event_type;
+};
+
+#define mlx5_nb_cof(ptr, type, member) \
+       (container_of(container_of(ptr, struct mlx5_nb, nb), type, member))
+
+#define MLX5_NB_INIT(name, handler, event) do {              \
+       (name)->nb.notifier_call = handler;                  \
+       (name)->event_type = MLX5_EVENT_TYPE_##event;        \
+} while (0)
+
+#endif /* MLX5_CORE_EQ_H */
index 34e17e6f894290f161d5e734e6fb2f9dfb0c148e..52393fbcf3b402c3ec7be691d3f4e57af1495d51 100644 (file)
@@ -144,6 +144,9 @@ enum {
        MLX5_CMD_OP_DESTROY_XRQ                   = 0x718,
        MLX5_CMD_OP_QUERY_XRQ                     = 0x719,
        MLX5_CMD_OP_ARM_XRQ                       = 0x71a,
+       MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY     = 0x725,
+       MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY       = 0x726,
+       MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS        = 0x727,
        MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
        MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
        MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT       = 0x752,
@@ -245,6 +248,7 @@ enum {
        MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c,
        MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d,
        MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e,
+       MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT = 0x93f,
        MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
        MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
        MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
@@ -260,6 +264,12 @@ enum {
        MLX5_CMD_OP_MAX
 };
 
+/* Valid range for general commands that don't work over an object */
+enum {
+       MLX5_CMD_OP_GENERAL_START = 0xb00,
+       MLX5_CMD_OP_GENERAL_END = 0xd00,
+};
+
 struct mlx5_ifc_flow_table_fields_supported_bits {
        u8         outer_dmac[0x1];
        u8         outer_smac[0x1];
@@ -349,7 +359,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
        u8         reformat_l3_tunnel_to_l2[0x1];
        u8         reformat_l2_to_l3_tunnel[0x1];
        u8         reformat_and_modify_action[0x1];
-       u8         reserved_at_14[0xb];
+       u8         reserved_at_15[0xb];
        u8         reserved_at_20[0x2];
        u8         log_max_ft_size[0x6];
        u8         log_max_modify_header_context[0x8];
@@ -586,7 +596,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits {
        u8      fdb_multi_path_to_table[0x1];
        u8      reserved_at_1d[0x1];
        u8      multi_fdb_encap[0x1];
-       u8      reserved_at_1e[0x1e1];
+       u8      reserved_at_1f[0x1e1];
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
 
@@ -829,7 +839,7 @@ struct mlx5_ifc_vector_calc_cap_bits {
        struct mlx5_ifc_calc_op calc2;
        struct mlx5_ifc_calc_op calc3;
 
-       u8         reserved_at_e0[0x720];
+       u8         reserved_at_c0[0x720];
 };
 
 enum {
@@ -883,6 +893,10 @@ enum {
        MLX5_CAP_UMR_FENCE_NONE         = 0x2,
 };
 
+enum {
+       MLX5_UCTX_CAP_RAW_TX = 1UL << 0,
+};
+
 struct mlx5_ifc_cmd_hca_cap_bits {
        u8         reserved_at_0[0x30];
        u8         vhca_id[0x10];
@@ -1043,7 +1057,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         vector_calc[0x1];
        u8         umr_ptr_rlky[0x1];
        u8         imaicl[0x1];
-       u8         reserved_at_232[0x4];
+       u8         qp_packet_based[0x1];
+       u8         reserved_at_233[0x3];
        u8         qkv[0x1];
        u8         pkv[0x1];
        u8         set_deth_sqpn[0x1];
@@ -1193,7 +1208,13 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         num_vhca_ports[0x8];
        u8         reserved_at_618[0x6];
        u8         sw_owner_id[0x1];
-       u8         reserved_at_61f[0x1e1];
+       u8         reserved_at_61f[0x1];
+
+       u8         reserved_at_620[0x80];
+
+       u8         uctx_cap[0x20];
+
+       u8         reserved_at_6c0[0x140];
 };
 
 enum mlx5_flow_destination_type {
@@ -2249,7 +2270,8 @@ struct mlx5_ifc_qpc_bits {
        u8         st[0x8];
        u8         reserved_at_10[0x3];
        u8         pm_state[0x2];
-       u8         reserved_at_15[0x3];
+       u8         reserved_at_15[0x1];
+       u8         req_e2e_credit_mode[0x2];
        u8         offload_type[0x4];
        u8         end_padding_mode[0x2];
        u8         reserved_at_1e[0x2];
@@ -5567,7 +5589,7 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits {
 struct mlx5_ifc_modify_nic_vport_field_select_bits {
        u8         reserved_at_0[0x12];
        u8         affiliation[0x1];
-       u8         reserved_at_e[0x1];
+       u8         reserved_at_13[0x1];
        u8         disable_uc_local_lb[0x1];
        u8         disable_mc_local_lb[0x1];
        u8         node_guid[0x1];
@@ -9028,7 +9050,7 @@ struct mlx5_ifc_dcbx_param_bits {
        u8         dcbx_cee_cap[0x1];
        u8         dcbx_ieee_cap[0x1];
        u8         dcbx_standby_cap[0x1];
-       u8         reserved_at_0[0x5];
+       u8         reserved_at_3[0x5];
        u8         port_number[0x8];
        u8         reserved_at_10[0xa];
        u8         max_application_table_size[6];
@@ -9276,7 +9298,9 @@ struct mlx5_ifc_umem_bits {
 struct mlx5_ifc_uctx_bits {
        u8         modify_field_select[0x40];
 
-       u8         reserved_at_40[0x1c0];
+       u8         cap[0x20];
+
+       u8         reserved_at_60[0x1a0];
 };
 
 struct mlx5_ifc_create_umem_in_bits {
index 34aed6032f868317f3140b40ad2d94668d0bd9d9..bf4bc01ffb0c68fb46417e8939aad45d4cc0f538 100644 (file)
@@ -107,9 +107,6 @@ enum mlx5e_connector_type {
 
 #define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
 
-#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
-#define PORT_MODULE_EVENT_ERROR_TYPE_MASK         0xF
-
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
                         int ptys_size, int proto_mask, u8 local_port);
index fbe322c966bc775916f37ab4addf9609eb53f4b0..b26ea90773840eb9a6b127abc56b9334c67fb9a6 100644 (file)
@@ -596,6 +596,11 @@ int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id);
 int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
                              int reset, void *out, int out_size);
 
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+                                               int res_num,
+                                               enum mlx5_res_type res_type);
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
+
 static inline const char *mlx5_qp_type_str(int type)
 {
        switch (type) {
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
deleted file mode 100644 (file)
index 1b1f3c2..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef MLX5_SRQ_H
-#define MLX5_SRQ_H
-
-#include <linux/mlx5/driver.h>
-
-enum {
-       MLX5_SRQ_FLAG_ERR    = (1 << 0),
-       MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
-       MLX5_SRQ_FLAG_RNDV   = (1 << 2),
-};
-
-struct mlx5_srq_attr {
-       u32 type;
-       u32 flags;
-       u32 log_size;
-       u32 wqe_shift;
-       u32 log_page_size;
-       u32 wqe_cnt;
-       u32 srqn;
-       u32 xrcd;
-       u32 page_offset;
-       u32 cqn;
-       u32 pd;
-       u32 lwm;
-       u32 user_index;
-       u64 db_record;
-       __be64 *pas;
-       u32 tm_log_list_size;
-       u32 tm_next_tag;
-       u32 tm_hw_phase_cnt;
-       u32 tm_sw_phase_cnt;
-       u16 uid;
-};
-
-struct mlx5_core_dev;
-
-void mlx5_init_srq_table(struct mlx5_core_dev *dev);
-void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev);
-
-#endif /* MLX5_SRQ_H */
index 7f5ca2cd3a32f7438f3f1ab39ad47422a701b53b..a261d5528ff7e5b5382576d746d731175ac42e2c 100644 (file)
@@ -58,17 +58,6 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
 int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
                         int inlen);
 void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *rmpn);
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                         u32 *rmpn);
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-
 int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *rqtn);
 int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
index f62b842e6596142750d476ef5f6ce606f253ab43..f8982e4e9702ad8dbf744ef85aa014fd5082019e 100644 (file)
@@ -88,6 +88,6 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
                                         int                 list_len,
                                         u64                 io_virtual_address);
 
-int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
+void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
 
 #endif /* IB_FMR_POOL_H */
index f6ba366051c734607d5c09d0f63cf759fdc46a43..fdef558e3a2dc5c579648c2fabc1a495df46f0df 100644 (file)
@@ -277,6 +277,7 @@ enum ib_port_capability_mask_bits {
        IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
        IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
        IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+       IB_PORT_CAP_MASK2_SUP = 1 << 15,
        IB_PORT_CM_SUP = 1 << 16,
        IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
        IB_PORT_REINIT_SUP = 1 << 18,
@@ -295,6 +296,15 @@ enum ib_port_capability_mask_bits {
        IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
 };
 
+enum ib_port_capability_mask2_bits {
+       IB_PORT_SET_NODE_DESC_SUP               = 1 << 0,
+       IB_PORT_EX_PORT_INFO_EX_SUP             = 1 << 1,
+       IB_PORT_VIRT_SUP                        = 1 << 2,
+       IB_PORT_SWITCH_PORT_STATE_TABLE_SUP     = 1 << 3,
+       IB_PORT_LINK_WIDTH_2X_SUP               = 1 << 4,
+       IB_PORT_LINK_SPEED_HDR_SUP              = 1 << 5,
+};
+
 #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
 
 struct opa_class_port_info {
index 9c0c2132a2d68a012ce3c7f3f12c2f966c040e43..89eead636e6816f207098ae7ac409955d1ffe8b7 100644 (file)
 
 #include <linux/types.h>
 #include <linux/device.h>
-#include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/rwsem.h>
-#include <linux/scatterlist.h>
 #include <linux/workqueue.h>
-#include <linux/socket.h>
 #include <linux/irq_poll.h>
 #include <uapi/linux/if_ether.h>
 #include <net/ipv6.h>
@@ -56,7 +53,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
-
+#include <linux/refcount.h>
 #include <linux/if_link.h>
 #include <linux/atomic.h>
 #include <linux/mmu_notifier.h>
@@ -437,6 +434,7 @@ enum ib_port_state {
 
 enum ib_port_width {
        IB_WIDTH_1X     = 1,
+       IB_WIDTH_2X     = 16,
        IB_WIDTH_4X     = 2,
        IB_WIDTH_8X     = 4,
        IB_WIDTH_12X    = 8
@@ -446,6 +444,7 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
 {
        switch (width) {
        case IB_WIDTH_1X:  return  1;
+       case IB_WIDTH_2X:  return  2;
        case IB_WIDTH_4X:  return  4;
        case IB_WIDTH_8X:  return  8;
        case IB_WIDTH_12X: return 12;
@@ -595,6 +594,7 @@ struct ib_port_attr {
        u8                      active_width;
        u8                      active_speed;
        u8                      phys_state;
+       u16                     port_cap_flags2;
 };
 
 enum ib_device_modify_flags {
@@ -732,7 +732,11 @@ enum ib_rate {
        IB_RATE_25_GBPS  = 15,
        IB_RATE_100_GBPS = 16,
        IB_RATE_200_GBPS = 17,
-       IB_RATE_300_GBPS = 18
+       IB_RATE_300_GBPS = 18,
+       IB_RATE_28_GBPS  = 19,
+       IB_RATE_50_GBPS  = 20,
+       IB_RATE_400_GBPS = 21,
+       IB_RATE_600_GBPS = 22,
 };
 
 /**
@@ -1508,6 +1512,10 @@ struct ib_ucontext {
 #endif
 
        struct ib_rdmacg_object cg_obj;
+       /*
+        * Implementation details of the RDMA core, don't use in drivers:
+        */
+       struct rdma_restrack_entry res;
 };
 
 struct ib_uobject {
@@ -2301,7 +2309,7 @@ struct ib_device {
         *   index - Updated the single counter pointed to by index
         *   num_counters - Updated all counters (will reset the timestamp
         *     and prevent further calls for lifespan milliseconds)
-        * Drivers are allowed to update all counters in leiu of just the
+        * Drivers are allowed to update all counters in lieu of just the
         *   one given in index at their option
         */
        int                        (*get_hw_stats)(struct ib_device *device,
@@ -2603,8 +2611,14 @@ struct ib_device {
        const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
                                                     int comp_vector);
 
-       const struct uverbs_object_tree_def *const *driver_specs;
+       const struct uapi_definition   *driver_def;
        enum rdma_driver_id             driver_id;
+       /*
+        * Provides synchronization between device unregistration and netlink
+        * commands on a device. To be used only by core.
+        */
+       refcount_t refcount;
+       struct completion unreg_completion;
 };
 
 struct ib_client {
@@ -4204,10 +4218,10 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
  */
 void rdma_roce_rescan_device(struct ib_device *ibdev);
 
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
+
 
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
-                              struct uverbs_attr_bundle *attrs);
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
 
 struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
                                     enum rdma_netdev_t type, const char *name,
index 3584d0816fcd76ca6a5b50f85b629ab8bb0cc5d5..dd0ed8048bb42bb8ffb9cde469979f87bef7afaf 100644 (file)
@@ -268,6 +268,13 @@ struct rvt_driver_provided {
         */
        void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
 
+       /*
+        * Init a struture allocated with qp_priv_alloc(). This should be
+        * called after all qp fields have been initialized in rdmavt.
+        */
+       int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+                           struct ib_qp_init_attr *init_attr);
+
        /*
         * Free the driver's private qp structure.
         */
index 2638fa7cd702178afcd1d8bb7a8cdf464d4f2372..f34aa96e451849a06775762c06b30ccdb6893266 100644 (file)
@@ -38,6 +38,10 @@ enum rdma_restrack_type {
         * @RDMA_RESTRACK_MR: Memory Region (MR)
         */
        RDMA_RESTRACK_MR,
+       /**
+        * @RDMA_RESTRACK_CTX: Verbs contexts (CTX)
+        */
+       RDMA_RESTRACK_CTX,
        /**
         * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
         */
index 84d3d15f1f38c75669f6985c08e558ab17dfa14d..2f56844fb7da750a73220d6904857a98ac367778 100644 (file)
@@ -79,6 +79,8 @@ struct uverbs_attr_spec {
         */
        u8 alloc_and_copy:1;
        u8 mandatory:1;
+       /* True if this is from UVERBS_ATTR_UHW */
+       u8 is_udata:1;
 
        union {
                struct {
@@ -140,6 +142,13 @@ struct uverbs_attr_spec {
  *
  * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns
  * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot.
+ *
+ * This also encodes the tables for the write() and write() extended commands
+ * using the coding
+ *   OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command #
+ *   OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex #
+ * ie the WRITE path is treated as a special method type in the ioctl
+ * framework.
  */
 enum uapi_radix_data {
        UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT,
@@ -147,12 +156,16 @@ enum uapi_radix_data {
        UVERBS_API_ATTR_KEY_BITS = 6,
        UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0),
        UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1,
+       UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS,
 
        UVERBS_API_METHOD_KEY_BITS = 5,
        UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS,
-       UVERBS_API_METHOD_KEY_NUM_CORE = 24,
-       UVERBS_API_METHOD_KEY_NUM_DRIVER = (1 << UVERBS_API_METHOD_KEY_BITS) -
-                                          UVERBS_API_METHOD_KEY_NUM_CORE,
+       UVERBS_API_METHOD_KEY_NUM_CORE = 22,
+       UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT,
+       UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT,
+       UVERBS_API_METHOD_KEY_NUM_DRIVER =
+               (UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) -
+               UVERBS_API_METHOD_KEY_NUM_CORE,
        UVERBS_API_METHOD_KEY_MASK = GENMASK(
                UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1,
                UVERBS_API_METHOD_KEY_SHIFT),
@@ -205,7 +218,22 @@ static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id)
        return id << UVERBS_API_METHOD_KEY_SHIFT;
 }
 
-static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
+static inline __attribute_const__ u32 uapi_key_write_method(u32 id)
+{
+       if (id >= UVERBS_API_WRITE_KEY_NUM)
+               return UVERBS_API_KEY_ERR;
+       return UVERBS_API_METHOD_IS_WRITE | id;
+}
+
+static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id)
+{
+       if (id >= UVERBS_API_WRITE_KEY_NUM)
+               return UVERBS_API_KEY_ERR;
+       return UVERBS_API_METHOD_IS_WRITE_EX | id;
+}
+
+static inline __attribute_const__ u32
+uapi_key_attr_to_ioctl_method(u32 attr_key)
 {
        return attr_key &
               (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK);
@@ -213,10 +241,23 @@ static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
 
 static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key)
 {
-       return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+       unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+       return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
               (key & UVERBS_API_ATTR_KEY_MASK) == 0;
 }
 
+static inline __attribute_const__ bool uapi_key_is_write_method(u32 key)
+{
+       return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE;
+}
+
+static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key)
+{
+       return (key & UVERBS_API_METHOD_KEY_MASK) ==
+              UVERBS_API_METHOD_IS_WRITE_EX;
+}
+
 static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key)
 {
        /* 0 is the method slot itself */
@@ -246,9 +287,12 @@ static inline __attribute_const__ u32 uapi_key_attr(u32 id)
        return id;
 }
 
+/* Only true for ioctl methods */
 static inline __attribute_const__ bool uapi_key_is_attr(u32 key)
 {
-       return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+       unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+       return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
               (key & UVERBS_API_ATTR_KEY_MASK) != 0;
 }
 
@@ -285,8 +329,7 @@ struct uverbs_method_def {
        u32                                  flags;
        size_t                               num_attrs;
        const struct uverbs_attr_def * const (*attrs)[];
-       int (*handler)(struct ib_uverbs_file *ufile,
-                      struct uverbs_attr_bundle *ctx);
+       int (*handler)(struct uverbs_attr_bundle *attrs);
 };
 
 struct uverbs_object_def {
@@ -296,11 +339,131 @@ struct uverbs_object_def {
        const struct uverbs_method_def * const (*methods)[];
 };
 
-struct uverbs_object_tree_def {
-       size_t                                   num_objects;
-       const struct uverbs_object_def * const (*objects)[];
+enum uapi_definition_kind {
+       UAPI_DEF_END = 0,
+       UAPI_DEF_OBJECT_START,
+       UAPI_DEF_WRITE,
+       UAPI_DEF_CHAIN_OBJ_TREE,
+       UAPI_DEF_CHAIN,
+       UAPI_DEF_IS_SUPPORTED_FUNC,
+       UAPI_DEF_IS_SUPPORTED_DEV_FN,
+};
+
+enum uapi_definition_scope {
+       UAPI_SCOPE_OBJECT = 1,
+       UAPI_SCOPE_METHOD = 2,
 };
 
+struct uapi_definition {
+       u8 kind;
+       u8 scope;
+       union {
+               struct {
+                       u16 object_id;
+               } object_start;
+               struct {
+                       u16 command_num;
+                       u8 is_ex:1;
+                       u8 has_udata:1;
+                       u8 has_resp:1;
+                       u8 req_size;
+                       u8 resp_size;
+               } write;
+       };
+
+       union {
+               bool (*func_is_supported)(struct ib_device *device);
+               int (*func_write)(struct uverbs_attr_bundle *attrs);
+               const struct uapi_definition *chain;
+               const struct uverbs_object_def *chain_obj_tree;
+               size_t needs_fn_offset;
+       };
+};
+
+/* Define things connected to object_id */
+#define DECLARE_UVERBS_OBJECT(_object_id, ...)                                 \
+       {                                                                      \
+               .kind = UAPI_DEF_OBJECT_START,                                 \
+               .object_start = { .object_id = _object_id },                   \
+       },                                                                     \
+               ##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...)              \
+       {                                                                      \
+               .kind = UAPI_DEF_WRITE,                                        \
+               .scope = UAPI_SCOPE_OBJECT,                                    \
+               .write = { .is_ex = 0, .command_num = _command_num },          \
+               .func_write = _func,                                           \
+               _cmd_desc,                                                     \
+       },                                                                     \
+               ##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...)           \
+       {                                                                      \
+               .kind = UAPI_DEF_WRITE,                                        \
+               .scope = UAPI_SCOPE_OBJECT,                                    \
+               .write = { .is_ex = 1, .command_num = _command_num },          \
+               .func_write = _func,                                           \
+               _cmd_desc,                                                     \
+       },                                                                     \
+               ##__VA_ARGS__
+
+/*
+ * Object is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn)                                        \
+       {                                                                      \
+               .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN,                          \
+               .scope = UAPI_SCOPE_OBJECT,                                    \
+               .needs_fn_offset =                                             \
+                       offsetof(struct ib_device, ibdev_fn) +                 \
+                       BUILD_BUG_ON_ZERO(                                     \
+                               sizeof(((struct ib_device *)0)->ibdev_fn) !=   \
+                               sizeof(void *)),                               \
+       }
+
+/*
+ * Method is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn)                                     \
+       {                                                                      \
+               .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN,                          \
+               .scope = UAPI_SCOPE_METHOD,                                    \
+               .needs_fn_offset =                                             \
+                       offsetof(struct ib_device, ibdev_fn) +                 \
+                       BUILD_BUG_ON_ZERO(                                     \
+                               sizeof(((struct ib_device *)0)->ibdev_fn) !=   \
+                               sizeof(void *)),                               \
+       }
+
+/* Call a function to determine if the entire object is supported or not */
+#define UAPI_DEF_IS_OBJ_SUPPORTED(_func)                                       \
+       {                                                                      \
+               .kind = UAPI_DEF_IS_SUPPORTED_FUNC,                            \
+               .scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func,        \
+       }
+
+/* Include another struct uapi_definition in this one */
+#define UAPI_DEF_CHAIN(_def_var)                                               \
+       {                                                                      \
+               .kind = UAPI_DEF_CHAIN, .chain = _def_var,                     \
+       }
+
+/* Temporary until the tree base description is replaced */
+#define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr)                     \
+       {                                                                      \
+               .kind = UAPI_DEF_CHAIN_OBJ_TREE,                               \
+               .object_start = { .object_id = _object_enum },                 \
+               .chain_obj_tree = _object_ptr,                                 \
+       }
+#define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...)                       \
+       UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum)),   \
+               ##__VA_ARGS__
+
 /*
  * =======================================
  *     Attribute Specifications
@@ -361,6 +524,12 @@ struct uverbs_object_tree_def {
                          .u2.objs_arr.max_len = _max_len,                     \
                          __VA_ARGS__ } })
 
+/*
+ * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted,
+ * the user must validate the type of the uobject instead.
+ */
+#define UVERBS_IDR_ANY_OBJECT 0xFFFF
+
 #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...)                     \
        (&(const struct uverbs_attr_def){                                      \
                .id = _attr_id,                                                \
@@ -433,25 +602,12 @@ struct uverbs_object_tree_def {
 #define UVERBS_ATTR_UHW()                                                      \
        UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN,                                 \
                           UVERBS_ATTR_MIN_SIZE(0),                            \
-                          UA_OPTIONAL),                                       \
+                          UA_OPTIONAL,                                        \
+                          .is_udata = 1),                                     \
        UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT,                               \
                            UVERBS_ATTR_MIN_SIZE(0),                           \
-                           UA_OPTIONAL)
-
-/*
- * =======================================
- *     Declaration helpers
- * =======================================
- */
-
-#define DECLARE_UVERBS_OBJECT_TREE(_name, ...)                                 \
-       static const struct uverbs_object_def *const _name##_ptr[] = {         \
-               __VA_ARGS__,                                                   \
-       };                                                                     \
-       static const struct uverbs_object_tree_def _name = {                   \
-               .num_objects = ARRAY_SIZE(_name##_ptr),                        \
-               .objects = &_name##_ptr,                                       \
-       }
+                           UA_OPTIONAL,                                       \
+                           .is_udata = 1)
 
 /* =================================================
  *              Parsing infrastructure
@@ -492,6 +648,8 @@ struct uverbs_attr {
 };
 
 struct uverbs_attr_bundle {
+       struct ib_udata driver_udata;
+       struct ib_udata ucore;
        struct ib_uverbs_file *ufile;
        DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
        struct uverbs_attr attrs[];
@@ -660,6 +818,12 @@ static inline int _uverbs_copy_from_or_zero(void *to,
 #define uverbs_copy_from_or_zero(to, attrs_bundle, idx)                              \
        _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
 
+static inline struct ib_ucontext *
+ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs)
+{
+       return ib_uverbs_get_ucontext_file(attrs->ufile);
+}
+
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
                       size_t idx, u64 allowed_bits);
index b3b21733cc557a03521fd60e147b8c3feb996aa6..3447bfe356d6ea7acc94766a177c6b1c73459112 100644 (file)
@@ -43,7 +43,7 @@
 #define _UVERBS_NAME(x, y)     _UVERBS_PASTE(x, y)
 #define UVERBS_METHOD(id)      _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
 #define UVERBS_HANDLER(id)     _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
-#define UVERBS_OBJECT(id)      _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
+#define UVERBS_OBJECT(id)      _UVERBS_NAME(UVERBS_MODULE_NAME, _object_##id)
 
 /* These are static so they do not need to be qualified */
 #define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
 #define ADD_UVERBS_METHODS(_name, _object_id, ...)                             \
        static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
                _object_id)[] = { __VA_ARGS__ };                               \
-       static const struct uverbs_object_def _name##_struct = {               \
+       static const struct uverbs_object_def _name = {                        \
                .id = _object_id,                                              \
                .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
                .methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
-       };                                                                     \
-       static const struct uverbs_object_def *const _name##_ptrs[] = {        \
-               &_name##_struct,                                               \
-       };                                                                     \
-       static const struct uverbs_object_tree_def _name = {                   \
-               .num_objects = 1,                                              \
-               .objects = &_name##_ptrs,                                      \
-       }
+       };
 
 /* Used by drivers to declare a complete parsing tree for a single method that
  * differs only in having additional driver specific attributes.
index 3db2802fbc6880e027da7481740f37245649b6b4..883abcf6d36eee2e2ca87a8b14cca0397db35915 100644 (file)
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/ib_user_ioctl_verbs.h>
 
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-const struct uverbs_object_tree_def *uverbs_default_get_objects(void);
-#else
-static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
-{
-       return NULL;
-}
-#endif
-
 /* Returns _id, or causes a compile error if _id is not a u32.
  *
  * The uobj APIs should only be used with the write based uAPI to access
@@ -54,15 +45,15 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo
  */
 #define _uobj_check_id(_id) ((_id) * typecheck(u32, _id))
 
-#define uobj_get_type(_ufile, _object)                                         \
-       uapi_get_object((_ufile)->device->uapi, _object)
+#define uobj_get_type(_attrs, _object)                                         \
+       uapi_get_object((_attrs)->ufile->device->uapi, _object)
 
-#define uobj_get_read(_type, _id, _ufile)                                      \
-       rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
+#define uobj_get_read(_type, _id, _attrs)                                      \
+       rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
                                _uobj_check_id(_id), UVERBS_LOOKUP_READ)
 
-#define ufd_get_read(_type, _fdnum, _ufile)                                    \
-       rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
+#define ufd_get_read(_type, _fdnum, _attrs)                                    \
+       rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
                                (_fdnum)*typecheck(s32, _fdnum),               \
                                UVERBS_LOOKUP_READ)
 
@@ -72,26 +63,27 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
                return NULL;
        return uobj->object;
 }
-#define uobj_get_obj_read(_object, _type, _id, _ufile)                         \
+#define uobj_get_obj_read(_object, _type, _id, _attrs)                         \
        ((struct ib_##_object *)_uobj_get_obj_read(                            \
-               uobj_get_read(_type, _id, _ufile)))
+               uobj_get_read(_type, _id, _attrs)))
 
-#define uobj_get_write(_type, _id, _ufile)                                     \
-       rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
+#define uobj_get_write(_type, _id, _attrs)                                     \
+       rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
                                _uobj_check_id(_id), UVERBS_LOOKUP_WRITE)
 
 int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
-                          struct ib_uverbs_file *ufile, int success_res);
-#define uobj_perform_destroy(_type, _id, _ufile, _success_res)                 \
-       __uobj_perform_destroy(uobj_get_type(_ufile, _type),                   \
-                              _uobj_check_id(_id), _ufile, _success_res)
+                          const struct uverbs_attr_bundle *attrs);
+#define uobj_perform_destroy(_type, _id, _attrs)                               \
+       __uobj_perform_destroy(uobj_get_type(_attrs, _type),                   \
+                              _uobj_check_id(_id), _attrs)
 
 struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
-                                     u32 id, struct ib_uverbs_file *ufile);
+                                     u32 id,
+                                     const struct uverbs_attr_bundle *attrs);
 
-#define uobj_get_destroy(_type, _id, _ufile)                                   \
-       __uobj_get_destroy(uobj_get_type(_ufile, _type), _uobj_check_id(_id),  \
-                          _ufile)
+#define uobj_get_destroy(_type, _id, _attrs)                                   \
+       __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id),  \
+                          _attrs)
 
 static inline void uobj_put_destroy(struct ib_uobject *uobj)
 {
@@ -111,14 +103,13 @@ static inline void uobj_put_write(struct ib_uobject *uobj)
        rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 }
 
-static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj,
-                                                int success_res)
+static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj)
 {
        int ret = rdma_alloc_commit_uobject(uobj);
 
        if (ret)
                return ret;
-       return success_res;
+       return 0;
 }
 
 static inline void uobj_alloc_abort(struct ib_uobject *uobj)
@@ -127,18 +118,18 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj)
 }
 
 static inline struct ib_uobject *
-__uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
-            struct ib_device **ib_dev)
+__uobj_alloc(const struct uverbs_api_object *obj,
+            struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev)
 {
-       struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, ufile);
+       struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs->ufile);
 
        if (!IS_ERR(uobj))
                *ib_dev = uobj->context->device;
        return uobj;
 }
 
-#define uobj_alloc(_type, _ufile, _ib_dev)                                     \
-       __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
+#define uobj_alloc(_type, _attrs, _ib_dev)                                     \
+       __uobj_alloc(uobj_get_type(_attrs, _type), _attrs, _ib_dev)
 
 static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
                                                  struct ib_uobject *uobj,
@@ -191,5 +182,17 @@ static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
        uflow->resources = uflow_res;
 }
 
+struct uverbs_api_object {
+       const struct uverbs_obj_type *type_attrs;
+       const struct uverbs_obj_type_class *type_class;
+       u8 disabled:1;
+       u32 id;
+};
+
+static inline u32 uobj_get_object_id(struct ib_uobject *uobj)
+{
+       return uobj->uapi_object->id;
+}
+
 #endif
 
index c6a984c0c8817cd7e6d7c16127240b221968a564..01ac5853d9ac0832a23fb7e997d8dbc4f8530132 100644 (file)
@@ -6,7 +6,7 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -95,7 +95,7 @@
 #define HFI1_CAP_SDMA_AHG         (1UL <<  2) /* Enable SDMA AHG support */
 #define HFI1_CAP_EXTENDED_PSN     (1UL <<  3) /* Enable Extended PSN support */
 #define HFI1_CAP_HDRSUPP          (1UL <<  4) /* Enable Header Suppression */
-/* 1UL << 5 unused */
+#define HFI1_CAP_TID_RDMA         (1UL <<  5) /* Enable TID RDMA operations */
 #define HFI1_CAP_USE_SDMA_HEAD    (1UL <<  6) /* DMA Hdr Q tail vs. use CSR */
 #define HFI1_CAP_MULTI_PKT_EGR    (1UL <<  7) /* Enable multi-packet Egr buffs*/
 #define HFI1_CAP_NODROP_RHQ_FULL  (1UL <<  8) /* Don't drop on Hdr Q full */
 #define HFI1_CAP_NO_INTEGRITY     (1UL << 13) /* Enable ctxt integrity checks */
 #define HFI1_CAP_PKEY_CHECK       (1UL << 14) /* Enable ctxt PKey checking */
 #define HFI1_CAP_STATIC_RATE_CTRL (1UL << 15) /* Allow PBC.StaticRateControl */
-/* 1UL << 16 unused */
+#define HFI1_CAP_OPFN             (1UL << 16) /* Enable the OPFN protocol */
 #define HFI1_CAP_SDMA_HEAD_CHECK  (1UL << 17) /* SDMA head checking */
 #define HFI1_CAP_EARLY_CREDIT_RETURN (1UL << 18) /* early credit return */
 
index c1f87735514f2239758198ad9cd15b9dc10c3b45..ef3c7ec793a756fe30860108868fc995c5bd6dfd 100644 (file)
@@ -46,6 +46,12 @@ struct hns_roce_ib_create_cq_resp {
        __aligned_u64 cap_flags;
 };
 
+struct hns_roce_ib_create_srq {
+       __aligned_u64 buf_addr;
+       __aligned_u64 db_addr;
+       __aligned_u64 que_addr;
+};
+
 struct hns_roce_ib_create_qp {
        __aligned_u64 buf_addr;
        __aligned_u64 db_addr;
index 1254b51a551a1c370557340a2c480467dd1e5bac..c586fc43739cdfd4ebb765aaeba4b04f6540a391 100644 (file)
@@ -164,6 +164,7 @@ struct ib_uverbs_get_context {
 struct ib_uverbs_get_context_resp {
        __u32 async_fd;
        __u32 num_comp_vectors;
+       __aligned_u64 driver_data[0];
 };
 
 struct ib_uverbs_query_device {
@@ -310,6 +311,7 @@ struct ib_uverbs_alloc_pd {
 
 struct ib_uverbs_alloc_pd_resp {
        __u32 pd_handle;
+       __u32 driver_data[0];
 };
 
 struct ib_uverbs_dealloc_pd {
@@ -325,6 +327,7 @@ struct ib_uverbs_open_xrcd {
 
 struct ib_uverbs_open_xrcd_resp {
        __u32 xrcd_handle;
+       __u32 driver_data[0];
 };
 
 struct ib_uverbs_close_xrcd {
@@ -345,6 +348,7 @@ struct ib_uverbs_reg_mr_resp {
        __u32 mr_handle;
        __u32 lkey;
        __u32 rkey;
+       __u32 driver_data[0];
 };
 
 struct ib_uverbs_rereg_mr {
@@ -356,11 +360,13 @@ struct ib_uverbs_rereg_mr {
        __aligned_u64 hca_va;
        __u32 pd_handle;
        __u32 access_flags;
+       __aligned_u64 driver_data[0];
 };
 
 struct ib_uverbs_rereg_mr_resp {
        __u32 lkey;
        __u32 rkey;
+       __aligned_u64 driver_data[0];
 };
 
 struct ib_uverbs_dereg_mr {
@@ -372,11 +378,13 @@ struct ib_uverbs_alloc_mw {
        __u32 pd_handle;
        __u8  mw_type;
        __u8  reserved[3];
+       __aligned_u64 driver_data[0];
 };
 
 struct ib_uverbs_alloc_mw_resp {
        __u32 mw_handle;
        __u32 rkey;
+       __aligned_u64 driver_data[0];
 };
 
 struct ib_uverbs_dealloc_mw {
@@ -419,6 +427,7 @@ struct ib_uverbs_ex_create_cq {
 struct ib_uverbs_create_cq_resp {
        __u32 cq_handle;
        __u32 cqe;
+       __aligned_u64 driver_data[0];
 };
 
 struct ib_uverbs_ex_create_cq_resp {
@@ -629,6 +638,7 @@ struct ib_uverbs_create_qp_resp {
        __u32 max_recv_sge;
        __u32 max_inline_data;
        __u32 reserved;
+       __u32 driver_data[0];
 };
 
 struct ib_uverbs_ex_create_qp_resp {
@@ -733,9 +743,6 @@ struct ib_uverbs_ex_modify_qp {
        __u32   reserved;
 };
 
-struct ib_uverbs_modify_qp_resp {
-};
-
 struct ib_uverbs_ex_modify_qp_resp {
        __u32  comp_mask;
        __u32  response_length;
@@ -863,10 +870,12 @@ struct ib_uverbs_create_ah {
        __u32 pd_handle;
        __u32 reserved;
        struct ib_uverbs_ah_attr attr;
+       __aligned_u64 driver_data[0];
 };
 
 struct ib_uverbs_create_ah_resp {
        __u32 ah_handle;
+       __u32 driver_data[0];
 };
 
 struct ib_uverbs_destroy_ah {
@@ -1175,6 +1184,7 @@ struct ib_uverbs_create_srq_resp {
        __u32 max_wr;
        __u32 max_sge;
        __u32 srqn;
+       __u32 driver_data[0];
 };
 
 struct ib_uverbs_modify_srq {
index 8fa9f90e2bb19b9d566d65318a10d994c555cad6..87b3198f4b5d7aa02c330f59178d8cc74dba5f6c 100644 (file)
@@ -48,6 +48,7 @@ enum {
        MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6,
        MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7,
        MLX5_QP_FLAG_ALLOW_SCATTER_CQE  = 1 << 8,
+       MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE   = 1 << 9,
 };
 
 enum {
@@ -236,6 +237,7 @@ enum mlx5_ib_query_dev_resp_flags {
        /* Support 128B CQE compression */
        MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0,
        MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD  = 1 << 1,
+       MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2,
 };
 
 enum mlx5_ib_tunnel_offloads {
index 408e220034de2e0ae19daff2a897a740ca332f26..b8d121d457f1511ded3e163741ac82996ee659ce 100644 (file)
@@ -158,6 +158,7 @@ enum mlx5_ib_create_flow_attrs {
        MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
        MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
        MLX5_IB_ATTR_CREATE_FLOW_TAG,
+       MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
 };
 
 enum mlx5_ib_destoy_flow_attrs {