]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'mlx5-packet-credit-fc' into rdma.git
authorJason Gunthorpe <jgg@mellanox.com>
Fri, 7 Dec 2018 20:25:12 +0000 (13:25 -0700)
committerJason Gunthorpe <jgg@mellanox.com>
Fri, 7 Dec 2018 20:25:12 +0000 (13:25 -0700)
Danit Goldberg says:

Packet based credit mode

Packet based credit mode is an alternative end-to-end credit mode for QPs
set during their creation. Credits are transported from the responder to
the requester to optimize the use of its receive resources.  In
packet-based credit mode, credits are issued on a per packet basis.

The advantage of this feature comes while sending large RDMA messages
through switches that are short in memory.

The first commit exposes QP creation flag and the HCA capability. The
second commit adds support for a new DV QP creation flag. The last commit
report packet based credit mode capability via the MLX5DV device
capabilities.

* branch 'mlx5-packet-credit-fc':
  IB/mlx5: Report packet based credit mode device capability
  IB/mlx5: Add packet based credit mode support
  net/mlx5: Expose packet based credit mode

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
1  2 
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/qp.c
include/linux/mlx5/mlx5_ifc.h

index 2b09e6896e5ab3cf9df76bd0965c5afc5fc0755f,f985d0d9b883743ddd47d34d26a1fd1130aad019..1b2e5465b882960b43a2b0c7c3a2c5fe77eb41a6
@@@ -1018,6 -1018,9 +1018,9 @@@ static int mlx5_ib_query_device(struct 
  
                if (MLX5_CAP_GEN(mdev, cqe_128_always))
                        resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD;
+               if (MLX5_CAP_GEN(mdev, qp_packet_based))
+                       resp.flags |=
+                               MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
        }
  
        if (field_avail(typeof(resp), sw_parsing_caps,
@@@ -1763,7 -1766,7 +1766,7 @@@ static struct ib_ucontext *mlx5_ib_allo
  #endif
  
        if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
 -              err = mlx5_ib_devx_create(dev);
 +              err = mlx5_ib_devx_create(dev, true);
                if (err < 0)
                        goto out_uars;
                context->devx_uid = err;
@@@ -3717,8 -3720,7 +3720,8 @@@ _create_raw_flow_rule(struct mlx5_ib_de
                      struct mlx5_flow_destination *dst,
                      struct mlx5_ib_flow_matcher  *fs_matcher,
                      struct mlx5_flow_act *flow_act,
 -                    void *cmd_in, int inlen)
 +                    void *cmd_in, int inlen,
 +                    int dst_num)
  {
        struct mlx5_ib_flow_handler *handler;
        struct mlx5_flow_spec *spec;
        spec->match_criteria_enable = fs_matcher->match_criteria_enable;
  
        handler->rule = mlx5_add_flow_rules(ft, spec,
 -                                          flow_act, dst, 1);
 +                                          flow_act, dst, dst_num);
  
        if (IS_ERR(handler->rule)) {
                err = PTR_ERR(handler->rule);
@@@ -3803,14 -3805,12 +3806,14 @@@ struct mlx5_ib_flow_handler 
  mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
                        struct mlx5_ib_flow_matcher *fs_matcher,
                        struct mlx5_flow_act *flow_act,
 +                      u32 counter_id,
                        void *cmd_in, int inlen, int dest_id,
                        int dest_type)
  {
        struct mlx5_flow_destination *dst;
        struct mlx5_ib_flow_prio *ft_prio;
        struct mlx5_ib_flow_handler *handler;
 +      int dst_num = 0;
        bool mcast;
        int err;
  
        if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
                return ERR_PTR(-ENOMEM);
  
 -      dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 +      dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL);
        if (!dst)
                return ERR_PTR(-ENOMEM);
  
        }
  
        if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
 -              dst->type = dest_type;
 -              dst->tir_num = dest_id;
 +              dst[dst_num].type = dest_type;
 +              dst[dst_num].tir_num = dest_id;
                flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
 -              dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
 -              dst->ft_num = dest_id;
 +              dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
 +              dst[dst_num].ft_num = dest_id;
                flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        } else {
 -              dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
 +              dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
                flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
        }
  
 +      dst_num++;
 +
 +      if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 +              dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 +              dst[dst_num].counter_id = counter_id;
 +              dst_num++;
 +      }
 +
        handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
 -                                      cmd_in, inlen);
 +                                      cmd_in, inlen, dst_num);
  
        if (IS_ERR(handler)) {
                err = PTR_ERR(handler);
@@@ -5397,6 -5389,14 +5400,6 @@@ static void init_delay_drop(struct mlx5
                mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
  }
  
 -static const struct cpumask *
 -mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
 -{
 -      struct mlx5_ib_dev *dev = to_mdev(ibdev);
 -
 -      return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector);
 -}
 -
  /* The mlx5_ib_multiport_mutex should be held when calling this function */
  static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
                                      struct mlx5_ib_multiport_info *mpi)
@@@ -5624,17 -5624,30 +5627,17 @@@ ADD_UVERBS_ATTRIBUTES_SIMPLE
        UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
                             enum mlx5_ib_uapi_flow_action_flags));
  
 -static int populate_specs_root(struct mlx5_ib_dev *dev)
 -{
 -      const struct uverbs_object_tree_def **trees = dev->driver_trees;
 -      size_t num_trees = 0;
 -
 -      if (mlx5_accel_ipsec_device_caps(dev->mdev) &
 -          MLX5_ACCEL_IPSEC_CAP_DEVICE)
 -              trees[num_trees++] = &mlx5_ib_flow_action;
 -
 -      if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
 -              trees[num_trees++] = &mlx5_ib_dm;
 -
 -      if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
 -          MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
 -              trees[num_trees++] = mlx5_ib_get_devx_tree();
 -
 -      num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
 -
 -      WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
 -      trees[num_trees] = NULL;
 -      dev->ib_dev.driver_specs = trees;
 +static const struct uapi_definition mlx5_ib_defs[] = {
 +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 +      UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
 +      UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
 +#endif
  
 -      return 0;
 -}
 +      UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
 +                              &mlx5_ib_flow_action),
 +      UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
 +      {}
 +};
  
  static int mlx5_ib_read_counters(struct ib_counters *counters,
                                 struct ib_counters_read_attr *read_attr,
@@@ -5897,6 -5910,7 +5900,6 @@@ int mlx5_ib_stage_caps_init(struct mlx5
        dev->ib_dev.map_mr_sg           = mlx5_ib_map_mr_sg;
        dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
        dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
 -      dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
        if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
            IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
                dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params;
        dev->ib_dev.uverbs_ex_cmd_mask |=
                        (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
                        (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
 -      dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp;
 +      if (mlx5_accel_ipsec_device_caps(dev->mdev) &
 +          MLX5_ACCEL_IPSEC_CAP_DEVICE) {
 +              dev->ib_dev.create_flow_action_esp =
 +                      mlx5_ib_create_flow_action_esp;
 +              dev->ib_dev.modify_flow_action_esp =
 +                      mlx5_ib_modify_flow_action_esp;
 +      }
        dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
 -      dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
        dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
        dev->ib_dev.create_counters = mlx5_ib_create_counters;
        dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
        dev->ib_dev.read_counters = mlx5_ib_read_counters;
  
 +      if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
 +              dev->ib_dev.driver_def = mlx5_ib_defs;
 +
        err = init_node_data(dev);
        if (err)
                return err;
@@@ -6167,6 -6173,11 +6170,6 @@@ void mlx5_ib_stage_bfrag_cleanup(struc
        mlx5_free_bfreg(dev->mdev, &dev->bfreg);
  }
  
 -static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
 -{
 -      return populate_specs_root(dev);
 -}
 -
  int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
  {
        const char *name;
@@@ -6234,7 -6245,7 +6237,7 @@@ static int mlx5_ib_stage_devx_init(stru
  {
        int uid;
  
 -      uid = mlx5_ib_devx_create(dev);
 +      uid = mlx5_ib_devx_create(dev, false);
        if (uid > 0)
                dev->devx_whitelist_uid = uid;
  
@@@ -6328,6 -6339,9 +6331,6 @@@ static const struct mlx5_ib_profile pf_
        STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
                     NULL,
                     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
 -      STAGE_CREATE(MLX5_IB_STAGE_SPECS,
 -                   mlx5_ib_stage_populate_specs,
 -                   NULL),
        STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
                     mlx5_ib_stage_devx_init,
                     mlx5_ib_stage_devx_cleanup),
@@@ -6379,6 -6393,9 +6382,6 @@@ static const struct mlx5_ib_profile nic
        STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
                     NULL,
                     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
 -      STAGE_CREATE(MLX5_IB_STAGE_SPECS,
 -                   mlx5_ib_stage_populate_specs,
 -                   NULL),
        STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
                     mlx5_ib_stage_ib_reg_init,
                     mlx5_ib_stage_ib_reg_cleanup),
index 24cb2f793210ede97bfcc75de88cc133a3468941,3e034bc85bde1d01ba0f686465e9d6fadd2d2371..7145f512f94876a5efbf57756d82bea8b6e8d3db
@@@ -258,7 -258,6 +258,7 @@@ enum mlx5_ib_rq_flags 
  };
  
  struct mlx5_ib_wq {
 +      struct mlx5_frag_buf_ctrl fbc;
        u64                    *wrid;
        u32                    *wr_data;
        struct wr_list         *w_list;
        unsigned                tail;
        u16                     cur_post;
        u16                     last_poll;
 -      void                   *qend;
 +      void                    *cur_edge;
  };
  
  enum mlx5_ib_wq_flags {
@@@ -462,6 -461,7 +462,7 @@@ enum mlx5_ib_qp_flags 
        MLX5_IB_QP_UNDERLAY                     = 1 << 10,
        MLX5_IB_QP_PCI_WRITE_END_PADDING        = 1 << 11,
        MLX5_IB_QP_TUNNEL_OFFLOAD               = 1 << 12,
+       MLX5_IB_QP_PACKET_BASED_CREDIT          = 1 << 13,
  };
  
  struct mlx5_umr_wr {
@@@ -525,7 -525,6 +526,7 @@@ struct mlx5_ib_srq 
        struct mlx5_core_srq    msrq;
        struct mlx5_frag_buf    buf;
        struct mlx5_db          db;
 +      struct mlx5_frag_buf_ctrl fbc;
        u64                    *wrid;
        /* protect SRQ hanlding
         */
  struct mlx5_ib_xrcd {
        struct ib_xrcd          ibxrcd;
        u32                     xrcdn;
 -      u16                     uid;
  };
  
  enum mlx5_ib_mtt_access_flags {
@@@ -785,6 -785,7 +786,6 @@@ enum mlx5_ib_stages 
        MLX5_IB_STAGE_UAR,
        MLX5_IB_STAGE_BFREG,
        MLX5_IB_STAGE_PRE_IB_REG_UMR,
 -      MLX5_IB_STAGE_SPECS,
        MLX5_IB_STAGE_WHITELIST_UID,
        MLX5_IB_STAGE_IB_REG,
        MLX5_IB_STAGE_POST_IB_REG_UMR,
@@@ -896,6 -897,7 +897,6 @@@ struct mlx5_ib_pf_eq 
  
  struct mlx5_ib_dev {
        struct ib_device                ib_dev;
 -      const struct uverbs_object_tree_def *driver_trees[7];
        struct mlx5_core_dev            *mdev;
        struct notifier_block           mdev_events;
        struct mlx5_roce                roce[MLX5_MAX_PORTS];
@@@ -1267,29 -1269,32 +1268,29 @@@ void mlx5_ib_put_native_port_mdev(struc
                                  u8 port_num);
  
  #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev);
 +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
  void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
  const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
 +extern const struct uapi_definition mlx5_ib_devx_defs[];
 +extern const struct uapi_definition mlx5_ib_flow_defs[];
  struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
        struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
 -      struct mlx5_flow_act *flow_act, void *cmd_in, int inlen,
 -      int dest_id, int dest_type);
 +      struct mlx5_flow_act *flow_act, u32 counter_id,
 +      void *cmd_in, int inlen, int dest_id, int dest_type);
  bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
 +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id);
  int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
  void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
  #else
  static inline int
 -mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; };
 +mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
 +                         bool is_user) { return -EOPNOTSUPP; }
  static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
 -static inline const struct uverbs_object_tree_def *
 -mlx5_ib_get_devx_tree(void) { return NULL; }
  static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
                                             int *dest_type)
  {
        return false;
  }
 -static inline int
 -mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
 -{
 -      return 0;
 -}
  static inline void
  mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
  {
index 48f2e1fbeff8be7bb4a6409627103ce0bfabb742,d5095fcd4cdab75e7cea363ed27b4f1541305286..7789367114f7054a70af5fb669e36d9dab271a13
@@@ -108,6 -108,21 +108,6 @@@ static int is_sqp(enum ib_qp_type qp_ty
        return is_qp0(qp_type) || is_qp1(qp_type);
  }
  
 -static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
 -{
 -      return mlx5_buf_offset(&qp->buf, offset);
 -}
 -
 -static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
 -{
 -      return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
 -}
 -
 -void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
 -{
 -      return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
 -}
 -
  /**
   * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
   *
@@@ -775,7 -790,6 +775,7 @@@ static int create_user_qp(struct mlx5_i
        __be64 *pas;
        void *qpc;
        int err;
 +      u16 uid;
  
        err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
        if (err) {
                goto err_umem;
        }
  
 -      MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid);
 +      uid = (attr->qp_type != IB_QPT_XRC_TGT) ? to_mpd(pd)->uid : 0;
 +      MLX5_SET(create_qp_in, *in, uid, uid);
        pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
        if (ubuffer->umem)
                mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
@@@ -904,30 -917,6 +904,30 @@@ static void destroy_qp_user(struct mlx5
                mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
  }
  
 +/* get_sq_edge - Get the next nearby edge.
 + *
 + * An 'edge' is defined as the first following address after the end
 + * of the fragment or the SQ. Accordingly, during the WQE construction
 + * which repetitively increases the pointer to write the next data, it
 + * simply should check if it gets to an edge.
 + *
 + * @sq - SQ buffer.
 + * @idx - Stride index in the SQ buffer.
 + *
 + * Return:
 + *    The new edge.
 + */
 +static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
 +{
 +      void *fragment_end;
 +
 +      fragment_end = mlx5_frag_buf_get_wqe
 +              (&sq->fbc,
 +               mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx));
 +
 +      return fragment_end + MLX5_SEND_WQE_BB;
 +}
 +
  static int create_kernel_qp(struct mlx5_ib_dev *dev,
                            struct ib_qp_init_attr *init_attr,
                            struct mlx5_ib_qp *qp,
        qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
        base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
  
 -      err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
 +      err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size,
 +                                     &qp->buf, dev->mdev->priv.numa_node);
        if (err) {
                mlx5_ib_dbg(dev, "err %d\n", err);
                return err;
        }
  
 -      qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
 +      if (qp->rq.wqe_cnt)
 +              mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift,
 +                            ilog2(qp->rq.wqe_cnt), &qp->rq.fbc);
 +
 +      if (qp->sq.wqe_cnt) {
 +              int sq_strides_offset = (qp->sq.offset  & (PAGE_SIZE - 1)) /
 +                                      MLX5_SEND_WQE_BB;
 +              mlx5_init_fbc_offset(qp->buf.frags +
 +                                   (qp->sq.offset / PAGE_SIZE),
 +                                   ilog2(MLX5_SEND_WQE_BB),
 +                                   ilog2(qp->sq.wqe_cnt),
 +                                   sq_strides_offset, &qp->sq.fbc);
 +
 +              qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
 +      }
 +
        *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
                 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
        *in = kvzalloc(*inlen, GFP_KERNEL);
                qp->flags |= MLX5_IB_QP_SQPN_QP1;
        }
  
 -      mlx5_fill_page_array(&qp->buf,
 -                           (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
 +      mlx5_fill_page_frag_array(&qp->buf,
 +                                (__be64 *)MLX5_ADDR_OF(create_qp_in,
 +                                                       *in, pas));
  
        err = mlx5_db_alloc(dev->mdev, &qp->db);
        if (err) {
@@@ -1052,7 -1024,7 +1052,7 @@@ err_free
        kvfree(*in);
  
  err_buf:
 -      mlx5_buf_free(dev->mdev, &qp->buf);
 +      mlx5_frag_buf_free(dev->mdev, &qp->buf);
        return err;
  }
  
@@@ -1064,7 -1036,7 +1064,7 @@@ static void destroy_qp_kernel(struct ml
        kvfree(qp->sq.wr_data);
        kvfree(qp->rq.wrid);
        mlx5_db_free(dev->mdev, &qp->db);
 -      mlx5_buf_free(dev->mdev, &qp->buf);
 +      mlx5_frag_buf_free(dev->mdev, &qp->buf);
  }
  
  static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
@@@ -1917,7 -1889,8 +1917,8 @@@ static int create_qp_common(struct mlx5
                                              MLX5_QP_FLAG_BFREG_INDEX |
                                              MLX5_QP_FLAG_TYPE_DCT |
                                              MLX5_QP_FLAG_TYPE_DCI |
-                                             MLX5_QP_FLAG_ALLOW_SCATTER_CQE))
+                                             MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
+                                             MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE))
                        return -EINVAL;
  
                err = get_qp_user_index(to_mucontext(pd->uobject->context),
                        qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
                }
  
+               if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) {
+                       if (init_attr->qp_type != IB_QPT_RC ||
+                               !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) {
+                               mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n");
+                               return -EOPNOTSUPP;
+                       }
+                       qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT;
+               }
                if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
                        if (init_attr->qp_type != IB_QPT_UD ||
                            (MLX5_CAP_GEN(dev->mdev, port_type) !=
                MLX5_SET(qpc, qpc, cd_slave_send, 1);
        if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
                MLX5_SET(qpc, qpc, cd_slave_receive, 1);
+       if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT)
+               MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1);
        if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
                configure_responder_scat_cqe(init_attr, qpc);
                configure_requester_scat_cqe(dev, init_attr,
@@@ -3504,8 -3487,6 +3515,8 @@@ static int __mlx5_ib_modify_qp(struct i
                qp->sq.head = 0;
                qp->sq.tail = 0;
                qp->sq.cur_post = 0;
 +              if (qp->sq.wqe_cnt)
 +                      qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
                qp->sq.last_poll = 0;
                qp->db.db[MLX5_RCV_DBR] = 0;
                qp->db.db[MLX5_SND_DBR] = 0;
@@@ -3546,7 -3527,7 +3557,7 @@@ static bool modify_dci_qp_is_ok(enum ib
                return is_valid_mask(attr_mask, req, opt);
        } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
                req |= IB_QP_PATH_MTU;
 -              opt = IB_QP_PKEY_INDEX;
 +              opt = IB_QP_PKEY_INDEX | IB_QP_AV;
                return is_valid_mask(attr_mask, req, opt);
        } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
                req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
        return err;
  }
  
 +static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
 +                                 u32 wqe_sz, void **cur_edge)
 +{
 +      u32 idx;
 +
 +      idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
 +      *cur_edge = get_sq_edge(sq, idx);
 +
 +      *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
 +}
 +
 +/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
 + * next nearby edge and get new address translation for current WQE position.
 + * @sq - SQ buffer.
 + * @seg: Current WQE position (16B aligned).
 + * @wqe_sz: Total current WQE size [16B].
 + * @cur_edge: Updated current edge.
 + */
 +static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
 +                                       u32 wqe_sz, void **cur_edge)
 +{
 +      if (likely(*seg != *cur_edge))
 +              return;
 +
 +      _handle_post_send_edge(sq, seg, wqe_sz, cur_edge);
 +}
 +
 +/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
 + * pointers. At the end @seg is aligned to 16B regardless the copied size.
 + * @sq - SQ buffer.
 + * @cur_edge: Updated current edge.
 + * @seg: Current WQE position (16B aligned).
 + * @wqe_sz: Total current WQE size [16B].
 + * @src: Pointer to copy from.
 + * @n: Number of bytes to copy.
 + */
 +static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
 +                                 void **seg, u32 *wqe_sz, const void *src,
 +                                 size_t n)
 +{
 +      while (likely(n)) {
 +              size_t leftlen = *cur_edge - *seg;
 +              size_t copysz = min_t(size_t, leftlen, n);
 +              size_t stride;
 +
 +              memcpy(*seg, src, copysz);
 +
 +              n -= copysz;
 +              src += copysz;
 +              stride = !n ? ALIGN(copysz, 16) : copysz;
 +              *seg += stride;
 +              *wqe_sz += stride >> 4;
 +              handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
 +      }
 +}
 +
  static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
  {
        struct mlx5_ib_cq *cq;
@@@ -3861,10 -3786,11 +3872,10 @@@ static __always_inline void set_raddr_s
        rseg->reserved = 0;
  }
  
 -static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
 -                       const struct ib_send_wr *wr, void *qend,
 -                       struct mlx5_ib_qp *qp, int *size)
 +static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
 +                      void **seg, int *size, void **cur_edge)
  {
 -      void *seg = eseg;
 +      struct mlx5_wqe_eth_seg *eseg = *seg;
  
        memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
  
                eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
                                 MLX5_ETH_WQE_L4_CSUM;
  
 -      seg += sizeof(struct mlx5_wqe_eth_seg);
 -      *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
 -
        if (wr->opcode == IB_WR_LSO) {
                struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
 -              int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start);
 -              u64 left, leftlen, copysz;
 +              size_t left, copysz;
                void *pdata = ud_wr->header;
 +              size_t stride;
  
                left = ud_wr->hlen;
                eseg->mss = cpu_to_be16(ud_wr->mss);
                eseg->inline_hdr.sz = cpu_to_be16(left);
  
 -              /*
 -               * check if there is space till the end of queue, if yes,
 -               * copy all in one shot, otherwise copy till the end of queue,
 -               * rollback and than the copy the left
 +              /* memcpy_send_wqe should get a 16B align address. Hence, we
 +               * first copy up to the current edge and then, if needed,
 +               * fall-through to memcpy_send_wqe.
                 */
 -              leftlen = qend - (void *)eseg->inline_hdr.start;
 -              copysz = min_t(u64, leftlen, left);
 -
 -              memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
 -
 -              if (likely(copysz > size_of_inl_hdr_start)) {
 -                      seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
 -                      *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
 -              }
 -
 -              if (unlikely(copysz < left)) { /* the last wqe in the queue */
 -                      seg = mlx5_get_send_wqe(qp, 0);
 +              copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
 +                             left);
 +              memcpy(eseg->inline_hdr.start, pdata, copysz);
 +              stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
 +                             sizeof(eseg->inline_hdr.start) + copysz, 16);
 +              *size += stride / 16;
 +              *seg += stride;
 +
 +              if (copysz < left) {
 +                      handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
                        left -= copysz;
                        pdata += copysz;
 -                      memcpy(seg, pdata, left);
 -                      seg += ALIGN(left, 16);
 -                      *size += ALIGN(left, 16) / 16;
 +                      memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
 +                                      left);
                }
 +
 +              return;
        }
  
 -      return seg;
 +      *seg += sizeof(struct mlx5_wqe_eth_seg);
 +      *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
  }
  
  static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
@@@ -4165,6 -4095,24 +4176,6 @@@ static void set_reg_data_seg(struct mlx
        dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
  }
  
 -static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp,
 -                                 struct mlx5_ib_mr *mr, int mr_list_size)
 -{
 -      void *qend = qp->sq.qend;
 -      void *addr = mr->descs;
 -      int copy;
 -
 -      if (unlikely(seg + mr_list_size > qend)) {
 -              copy = qend - seg;
 -              memcpy(seg, addr, copy);
 -              addr += copy;
 -              mr_list_size -= copy;
 -              seg = mlx5_get_send_wqe(qp, 0);
 -      }
 -      memcpy(seg, addr, mr_list_size);
 -      seg += mr_list_size;
 -}
 -
  static __be32 send_ieth(const struct ib_send_wr *wr)
  {
        switch (wr->opcode) {
@@@ -4198,48 -4146,40 +4209,48 @@@ static u8 wq_sig(void *wqe
  }
  
  static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
 -                          void *wqe, int *sz)
 +                          void **wqe, int *wqe_sz, void **cur_edge)
  {
        struct mlx5_wqe_inline_seg *seg;
 -      void *qend = qp->sq.qend;
 -      void *addr;
 +      size_t offset;
        int inl = 0;
 -      int copy;
 -      int len;
        int i;
  
 -      seg = wqe;
 -      wqe += sizeof(*seg);
 +      seg = *wqe;
 +      *wqe += sizeof(*seg);
 +      offset = sizeof(*seg);
 +
        for (i = 0; i < wr->num_sge; i++) {
 -              addr = (void *)(unsigned long)(wr->sg_list[i].addr);
 -              len  = wr->sg_list[i].length;
 +              size_t len  = wr->sg_list[i].length;
 +              void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
 +
                inl += len;
  
                if (unlikely(inl > qp->max_inline_data))
                        return -ENOMEM;
  
 -              if (unlikely(wqe + len > qend)) {
 -                      copy = qend - wqe;
 -                      memcpy(wqe, addr, copy);
 -                      addr += copy;
 -                      len -= copy;
 -                      wqe = mlx5_get_send_wqe(qp, 0);
 +              while (likely(len)) {
 +                      size_t leftlen;
 +                      size_t copysz;
 +
 +                      handle_post_send_edge(&qp->sq, wqe,
 +                                            *wqe_sz + (offset >> 4),
 +                                            cur_edge);
 +
 +                      leftlen = *cur_edge - *wqe;
 +                      copysz = min_t(size_t, leftlen, len);
 +
 +                      memcpy(*wqe, addr, copysz);
 +                      len -= copysz;
 +                      addr += copysz;
 +                      *wqe += copysz;
 +                      offset += copysz;
                }
 -              memcpy(wqe, addr, len);
 -              wqe += len;
        }
  
        seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
  
 -      *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
 +      *wqe_sz +=  ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
  
        return 0;
  }
@@@ -4352,8 -4292,7 +4363,8 @@@ static int mlx5_set_bsf(struct ib_mr *s
  }
  
  static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
 -                              struct mlx5_ib_qp *qp, void **seg, int *size)
 +                              struct mlx5_ib_qp *qp, void **seg,
 +                              int *size, void **cur_edge)
  {
        struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
        struct ib_mr *sig_mr = wr->sig_mr;
  
        *seg += wqe_size;
        *size += wqe_size / 16;
 -      if (unlikely((*seg == qp->sq.qend)))
 -              *seg = mlx5_get_send_wqe(qp, 0);
 +      handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
  
        bsf = *seg;
        ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
  
        *seg += sizeof(*bsf);
        *size += sizeof(*bsf) / 16;
 -      if (unlikely((*seg == qp->sq.qend)))
 -              *seg = mlx5_get_send_wqe(qp, 0);
 +      handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
  
        return 0;
  }
@@@ -4484,8 -4425,7 +4495,8 @@@ static void set_sig_umr_segment(struct 
  
  
  static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
 -                        struct mlx5_ib_qp *qp, void **seg, int *size)
 +                        struct mlx5_ib_qp *qp, void **seg, int *size,
 +                        void **cur_edge)
  {
        const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
        struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
        set_sig_umr_segment(*seg, xlt_size);
        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
 -      if (unlikely((*seg == qp->sq.qend)))
 -              *seg = mlx5_get_send_wqe(qp, 0);
 +      handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
  
        set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
        *seg += sizeof(struct mlx5_mkey_seg);
        *size += sizeof(struct mlx5_mkey_seg) / 16;
 -      if (unlikely((*seg == qp->sq.qend)))
 -              *seg = mlx5_get_send_wqe(qp, 0);
 +      handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
  
 -      ret = set_sig_data_segment(wr, qp, seg, size);
 +      ret = set_sig_data_segment(wr, qp, seg, size, cur_edge);
        if (ret)
                return ret;
  
@@@ -4561,11 -4503,11 +4572,11 @@@ static int set_psv_wr(struct ib_sig_dom
  
  static int set_reg_wr(struct mlx5_ib_qp *qp,
                      const struct ib_reg_wr *wr,
 -                    void **seg, int *size)
 +                    void **seg, int *size, void **cur_edge)
  {
        struct mlx5_ib_mr *mr = to_mmr(wr->mr);
        struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
 -      int mr_list_size = mr->ndescs * mr->desc_size;
 +      size_t mr_list_size = mr->ndescs * mr->desc_size;
        bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
  
        if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
        set_reg_umr_seg(*seg, mr, umr_inline);
        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
 -      if (unlikely((*seg == qp->sq.qend)))
 -              *seg = mlx5_get_send_wqe(qp, 0);
 +      handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
  
        set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
        *seg += sizeof(struct mlx5_mkey_seg);
        *size += sizeof(struct mlx5_mkey_seg) / 16;
 -      if (unlikely((*seg == qp->sq.qend)))
 -              *seg = mlx5_get_send_wqe(qp, 0);
 +      handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
  
        if (umr_inline) {
 -              set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size);
 -              *size += get_xlt_octo(mr_list_size);
 +              memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
 +                              mr_list_size);
 +              *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
        } else {
                set_reg_data_seg(*seg, mr, pd);
                *seg += sizeof(struct mlx5_wqe_data_seg);
        return 0;
  }
  
 -static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
 +static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
 +                      void **cur_edge)
  {
        set_linv_umr_seg(*seg);
        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
 -      if (unlikely((*seg == qp->sq.qend)))
 -              *seg = mlx5_get_send_wqe(qp, 0);
 +      handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
        set_linv_mkey_seg(*seg);
        *seg += sizeof(struct mlx5_mkey_seg);
        *size += sizeof(struct mlx5_mkey_seg) / 16;
 -      if (unlikely((*seg == qp->sq.qend)))
 -              *seg = mlx5_get_send_wqe(qp, 0);
 +      handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
  }
  
 -static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
 +static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
  {
        __be32 *p = NULL;
 -      int tidx = idx;
 +      u32 tidx = idx;
        int i, j;
  
 -      pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
 +      pr_debug("dump WQE index %u:\n", idx);
        for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
                if ((i & 0xf) == 0) {
 -                      void *buf = mlx5_get_send_wqe(qp, tidx);
                        tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
 -                      p = buf;
 +                      p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx);
 +                      pr_debug("WQBB at %p:\n", (void *)p);
                        j = 0;
                }
                pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
  }
  
  static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 -                   struct mlx5_wqe_ctrl_seg **ctrl,
 -                   const struct ib_send_wr *wr, unsigned *idx,
 -                   int *size, int nreq, bool send_signaled, bool solicited)
 +                     struct mlx5_wqe_ctrl_seg **ctrl,
 +                     const struct ib_send_wr *wr, unsigned int *idx,
 +                     int *size, void **cur_edge, int nreq,
 +                     bool send_signaled, bool solicited)
  {
        if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
                return -ENOMEM;
  
        *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
 -      *seg = mlx5_get_send_wqe(qp, *idx);
 +      *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
        *ctrl = *seg;
        *(uint32_t *)(*seg + 8) = 0;
        (*ctrl)->imm = send_ieth(wr);
  
        *seg += sizeof(**ctrl);
        *size = sizeof(**ctrl) / 16;
 +      *cur_edge = qp->sq.cur_edge;
  
        return 0;
  }
  static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
                     struct mlx5_wqe_ctrl_seg **ctrl,
                     const struct ib_send_wr *wr, unsigned *idx,
 -                   int *size, int nreq)
 +                   int *size, void **cur_edge, int nreq)
  {
 -      return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq,
 +      return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
                           wr->send_flags & IB_SEND_SIGNALED,
                           wr->send_flags & IB_SEND_SOLICITED);
  }
  
  static void finish_wqe(struct mlx5_ib_qp *qp,
                       struct mlx5_wqe_ctrl_seg *ctrl,
 -                     u8 size, unsigned idx, u64 wr_id,
 -                     int nreq, u8 fence, u32 mlx5_opcode)
 +                     void *seg, u8 size, void *cur_edge,
 +                     unsigned int idx, u64 wr_id, int nreq, u8 fence,
 +                     u32 mlx5_opcode)
  {
        u8 opmod = 0;
  
        qp->sq.wqe_head[idx] = qp->sq.head + nreq;
        qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
        qp->sq.w_list[idx].next = qp->sq.cur_post;
 +
 +      /* We save the edge which was possibly updated during the WQE
 +       * construction, into SQ's cache.
 +       */
 +      seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
 +      qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
 +                        get_sq_edge(&qp->sq, qp->sq.cur_post &
 +                                    (qp->sq.wqe_cnt - 1)) :
 +                        cur_edge;
  }
  
  static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
        struct mlx5_core_dev *mdev = dev->mdev;
        struct mlx5_ib_qp *qp;
        struct mlx5_ib_mr *mr;
 -      struct mlx5_wqe_data_seg *dpseg;
        struct mlx5_wqe_xrc_seg *xrc;
        struct mlx5_bf *bf;
 +      void *cur_edge;
        int uninitialized_var(size);
 -      void *qend;
        unsigned long flags;
        unsigned idx;
        int err = 0;
  
        qp = to_mqp(ibqp);
        bf = &qp->bf;
 -      qend = qp->sq.qend;
  
        spin_lock_irqsave(&qp->sq.lock, flags);
  
                        goto out;
                }
  
 -              err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
 +              err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
 +                              nreq);
                if (err) {
                        mlx5_ib_warn(dev, "\n");
                        err = -ENOMEM;
                        case IB_WR_LOCAL_INV:
                                qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
                                ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
 -                              set_linv_wr(qp, &seg, &size);
 +                              set_linv_wr(qp, &seg, &size, &cur_edge);
                                num_sge = 0;
                                break;
  
                        case IB_WR_REG_MR:
                                qp->sq.wr_data[idx] = IB_WR_REG_MR;
                                ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
 -                              err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
 +                              err = set_reg_wr(qp, reg_wr(wr), &seg, &size,
 +                                               &cur_edge);
                                if (err) {
                                        *bad_wr = wr;
                                        goto out;
                                mr = to_mmr(sig_handover_wr(wr)->sig_mr);
  
                                ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
 -                              err = set_sig_umr_wr(wr, qp, &seg, &size);
 +                              err = set_sig_umr_wr(wr, qp, &seg, &size,
 +                                                   &cur_edge);
                                if (err) {
                                        mlx5_ib_warn(dev, "\n");
                                        *bad_wr = wr;
                                        goto out;
                                }
  
 -                              finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
 -                                         fence, MLX5_OPCODE_UMR);
 +                              finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
 +                                         wr->wr_id, nreq, fence,
 +                                         MLX5_OPCODE_UMR);
                                /*
                                 * SET_PSV WQEs are not signaled and solicited
                                 * on error
                                 */
                                err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
 -                                                &size, nreq, false, true);
 +                                                &size, &cur_edge, nreq, false,
 +                                                true);
                                if (err) {
                                        mlx5_ib_warn(dev, "\n");
                                        err = -ENOMEM;
                                        goto out;
                                }
  
 -                              finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
 -                                         fence, MLX5_OPCODE_SET_PSV);
 +                              finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
 +                                         wr->wr_id, nreq, fence,
 +                                         MLX5_OPCODE_SET_PSV);
                                err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
 -                                                &size, nreq, false, true);
 +                                                &size, &cur_edge, nreq, false,
 +                                                true);
                                if (err) {
                                        mlx5_ib_warn(dev, "\n");
                                        err = -ENOMEM;
                                        goto out;
                                }
  
 -                              finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
 -                                         fence, MLX5_OPCODE_SET_PSV);
 +                              finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
 +                                         wr->wr_id, nreq, fence,
 +                                         MLX5_OPCODE_SET_PSV);
                                qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
                                num_sge = 0;
                                goto skip_psv;
                        set_datagram_seg(seg, wr);
                        seg += sizeof(struct mlx5_wqe_datagram_seg);
                        size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
 -                      if (unlikely((seg == qend)))
 -                              seg = mlx5_get_send_wqe(qp, 0);
 +                      handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
 +
                        break;
                case IB_QPT_UD:
                        set_datagram_seg(seg, wr);
                        seg += sizeof(struct mlx5_wqe_datagram_seg);
                        size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
 -
 -                      if (unlikely((seg == qend)))
 -                              seg = mlx5_get_send_wqe(qp, 0);
 +                      handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
  
                        /* handle qp that supports ud offload */
                        if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
                                memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
                                seg += sizeof(struct mlx5_wqe_eth_pad);
                                size += sizeof(struct mlx5_wqe_eth_pad) / 16;
 -
 -                              seg = set_eth_seg(seg, wr, qend, qp, &size);
 -
 -                              if (unlikely((seg == qend)))
 -                                      seg = mlx5_get_send_wqe(qp, 0);
 +                              set_eth_seg(wr, qp, &seg, &size, &cur_edge);
 +                              handle_post_send_edge(&qp->sq, &seg, size,
 +                                                    &cur_edge);
                        }
                        break;
                case MLX5_IB_QPT_REG_UMR:
                                goto out;
                        seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
                        size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
 -                      if (unlikely((seg == qend)))
 -                              seg = mlx5_get_send_wqe(qp, 0);
 +                      handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
                        set_reg_mkey_segment(seg, wr);
                        seg += sizeof(struct mlx5_mkey_seg);
                        size += sizeof(struct mlx5_mkey_seg) / 16;
 -                      if (unlikely((seg == qend)))
 -                              seg = mlx5_get_send_wqe(qp, 0);
 +                      handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
                        break;
  
                default:
                }
  
                if (wr->send_flags & IB_SEND_INLINE && num_sge) {
 -                      int uninitialized_var(sz);
 -
 -                      err = set_data_inl_seg(qp, wr, seg, &sz);
 +                      err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
                        if (unlikely(err)) {
                                mlx5_ib_warn(dev, "\n");
                                *bad_wr = wr;
                                goto out;
                        }
 -                      size += sz;
                } else {
 -                      dpseg = seg;
                        for (i = 0; i < num_sge; i++) {
 -                              if (unlikely(dpseg == qend)) {
 -                                      seg = mlx5_get_send_wqe(qp, 0);
 -                                      dpseg = seg;
 -                              }
 +                              handle_post_send_edge(&qp->sq, &seg, size,
 +                                                    &cur_edge);
                                if (likely(wr->sg_list[i].length)) {
 -                                      set_data_ptr_seg(dpseg, wr->sg_list + i);
 +                                      set_data_ptr_seg
 +                                      ((struct mlx5_wqe_data_seg *)seg,
 +                                       wr->sg_list + i);
                                        size += sizeof(struct mlx5_wqe_data_seg) / 16;
 -                                      dpseg++;
 +                                      seg += sizeof(struct mlx5_wqe_data_seg);
                                }
                        }
                }
  
                qp->next_fence = next_fence;
 -              finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence,
 -                         mlx5_ib_opcode[wr->opcode]);
 +              finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
 +                         fence, mlx5_ib_opcode[wr->opcode]);
  skip_psv:
                if (0)
                        dump_wqe(qp, idx, size);
@@@ -5068,7 -5004,7 +5079,7 @@@ static int _mlx5_ib_post_recv(struct ib
                        goto out;
                }
  
 -              scat = get_recv_wqe(qp, ind);
 +              scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
                if (qp->wq_sig)
                        scat++;
  
@@@ -5516,6 -5452,7 +5527,6 @@@ struct ib_xrcd *mlx5_ib_alloc_xrcd(stru
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
        struct mlx5_ib_xrcd *xrcd;
        int err;
 -      u16 uid;
  
        if (!MLX5_CAP_GEN(dev->mdev, xrc))
                return ERR_PTR(-ENOSYS);
        if (!xrcd)
                return ERR_PTR(-ENOMEM);
  
 -      uid = context ? to_mucontext(context)->devx_uid : 0;
 -      err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid);
 +      err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
        if (err) {
                kfree(xrcd);
                return ERR_PTR(-ENOMEM);
        }
  
 -      xrcd->uid = uid;
        return &xrcd->ibxrcd;
  }
  
@@@ -5537,9 -5476,10 +5548,9 @@@ int mlx5_ib_dealloc_xrcd(struct ib_xrc
  {
        struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
        u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
 -      u16 uid =  to_mxrcd(xrcd)->uid;
        int err;
  
 -      err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid);
 +      err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
        if (err)
                mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
  
index 171d6866364020d215f72c36be00056e041cca9e,91d6e85e3cefb979b36b6bf8b0fe13d177208aea..52393fbcf3b402c3ec7be691d3f4e57af1495d51
@@@ -144,9 -144,6 +144,9 @@@ enum 
        MLX5_CMD_OP_DESTROY_XRQ                   = 0x718,
        MLX5_CMD_OP_QUERY_XRQ                     = 0x719,
        MLX5_CMD_OP_ARM_XRQ                       = 0x71a,
 +      MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY     = 0x725,
 +      MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY       = 0x726,
 +      MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS        = 0x727,
        MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
        MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
        MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT       = 0x752,
        MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c,
        MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d,
        MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e,
 +      MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT = 0x93f,
        MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
        MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
        MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
        MLX5_CMD_OP_MAX
  };
  
 +/* Valid range for general commands that don't work over an object */
 +enum {
 +      MLX5_CMD_OP_GENERAL_START = 0xb00,
 +      MLX5_CMD_OP_GENERAL_END = 0xd00,
 +};
 +
  struct mlx5_ifc_flow_table_fields_supported_bits {
        u8         outer_dmac[0x1];
        u8         outer_smac[0x1];
@@@ -1057,7 -1047,8 +1057,8 @@@ struct mlx5_ifc_cmd_hca_cap_bits 
        u8         vector_calc[0x1];
        u8         umr_ptr_rlky[0x1];
        u8         imaicl[0x1];
-       u8         reserved_at_232[0x4];
+       u8         qp_packet_based[0x1];
+       u8         reserved_at_233[0x3];
        u8         qkv[0x1];
        u8         pkv[0x1];
        u8         set_deth_sqpn[0x1];
@@@ -2269,7 -2260,8 +2270,8 @@@ struct mlx5_ifc_qpc_bits 
        u8         st[0x8];
        u8         reserved_at_10[0x3];
        u8         pm_state[0x2];
-       u8         reserved_at_15[0x3];
+       u8         reserved_at_15[0x1];
+       u8         req_e2e_credit_mode[0x2];
        u8         offload_type[0x4];
        u8         end_padding_mode[0x2];
        u8         reserved_at_1e[0x2];