]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'mlx5-vport-loopback' into rdma.get
authorDoug Ledford <dledford@redhat.com>
Sat, 22 Sep 2018 00:41:58 +0000 (20:41 -0400)
committerDoug Ledford <dledford@redhat.com>
Sat, 22 Sep 2018 00:41:58 +0000 (20:41 -0400)
For dependencies, branch based on 'mlx5-next' of
    git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux.git

mlx5 mcast/ucast loopback control enhancements from Leon Romanovsky:

====================
This is short series from Mark which extends handling of loopback
traffic. Originally mlx5 IB dynamically enabled/disabled both unicast
and multicast based on number of users. However RAW ethernet QPs need
more granular access.
====================

Fixed failed automerge in mlx5_ib.h (minor context conflict issue)

mlx5-vport-loopback branch:
    RDMA/mlx5: Enable vport loopback when user context or QP mandate
    RDMA/mlx5: Allow creating RAW ethernet QP with loopback support
    RDMA/mlx5: Refactor transport domain bookkeeping logic
    net/mlx5: Rename incorrect naming in IFC file

Signed-off-by: Doug Ledford <dledford@redhat.com>
1  2 
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/qp.c

index 1348a08261a949806ddb9399675bfaf326bb1b97,131a1286a767bcdcf76d6e6ea75c3cd6d1309a68..853574345d91eb40b654ebd5eb592aa3c9525b11
@@@ -1571,6 -1571,48 +1571,48 @@@ static void deallocate_uars(struct mlx5
                        mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
  }
  
+ int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+ {
+       int err = 0;
+       mutex_lock(&dev->lb.mutex);
+       if (td)
+               dev->lb.user_td++;
+       if (qp)
+               dev->lb.qps++;
+       if (dev->lb.user_td == 2 ||
+           dev->lb.qps == 1) {
+               if (!dev->lb.enabled) {
+                       err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+                       dev->lb.enabled = true;
+               }
+       }
+       mutex_unlock(&dev->lb.mutex);
+       return err;
+ }
+ void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+ {
+       mutex_lock(&dev->lb.mutex);
+       if (td)
+               dev->lb.user_td--;
+       if (qp)
+               dev->lb.qps--;
+       if (dev->lb.user_td == 1 &&
+           dev->lb.qps == 0) {
+               if (dev->lb.enabled) {
+                       mlx5_nic_vport_update_local_lb(dev->mdev, false);
+                       dev->lb.enabled = false;
+               }
+       }
+       mutex_unlock(&dev->lb.mutex);
+ }
  static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
  {
        int err;
             !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
                return err;
  
-       mutex_lock(&dev->lb_mutex);
-       dev->user_td++;
-       if (dev->user_td == 2)
-               err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
-       mutex_unlock(&dev->lb_mutex);
-       return err;
+       return mlx5_ib_enable_lb(dev, true, false);
  }
  
  static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
             !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
                return;
  
-       mutex_lock(&dev->lb_mutex);
-       dev->user_td--;
-       if (dev->user_td < 2)
-               mlx5_nic_vport_update_local_lb(dev->mdev, false);
-       mutex_unlock(&dev->lb_mutex);
+       mlx5_ib_disable_lb(dev, true, false);
  }
  
  static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
                        goto out_mdev;
        }
  
 -      INIT_LIST_HEAD(&context->vma_private_list);
 -      mutex_init(&context->vma_private_list_mutex);
        INIT_LIST_HEAD(&context->db_page_list);
        mutex_init(&context->db_page_mutex);
  
        context->lib_caps = req.lib_caps;
        print_lib_caps(dev, context->lib_caps);
  
 +      if (mlx5_lag_is_active(dev->mdev)) {
 +              u8 port = mlx5_core_native_port_num(dev->mdev);
 +
 +              atomic_set(&context->tx_port_affinity,
 +                         atomic_add_return(
 +                                 1, &dev->roce[port].tx_port_affinity));
 +      }
 +
        return &context->ibucontext;
  
  out_mdev:
@@@ -1861,13 -1884,6 +1890,13 @@@ static int mlx5_ib_dealloc_ucontext(str
        struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
        struct mlx5_bfreg_info *bfregi;
  
 +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 +      /* All umem's must be destroyed before destroying the ucontext. */
 +      mutex_lock(&ibcontext->per_mm_list_lock);
 +      WARN_ON(!list_empty(&ibcontext->per_mm_list));
 +      mutex_unlock(&ibcontext->per_mm_list_lock);
 +#endif
 +
        if (context->devx_uid)
                mlx5_ib_devx_destroy(dev, context);
  
@@@ -1913,9 -1929,94 +1942,9 @@@ static int get_extended_index(unsigned 
        return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
  }
  
 -static void  mlx5_ib_vma_open(struct vm_area_struct *area)
 -{
 -      /* vma_open is called when a new VMA is created on top of our VMA.  This
 -       * is done through either mremap flow or split_vma (usually due to
 -       * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
 -       * as this VMA is strongly hardware related.  Therefore we set the
 -       * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
 -       * calling us again and trying to do incorrect actions.  We assume that
 -       * the original VMA size is exactly a single page, and therefore all
 -       * "splitting" operation will not happen to it.
 -       */
 -      area->vm_ops = NULL;
 -}
 -
 -static void  mlx5_ib_vma_close(struct vm_area_struct *area)
 -{
 -      struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
 -
 -      /* It's guaranteed that all VMAs opened on a FD are closed before the
 -       * file itself is closed, therefore no sync is needed with the regular
 -       * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
 -       * However need a sync with accessing the vma as part of
 -       * mlx5_ib_disassociate_ucontext.
 -       * The close operation is usually called under mm->mmap_sem except when
 -       * process is exiting.
 -       * The exiting case is handled explicitly as part of
 -       * mlx5_ib_disassociate_ucontext.
 -       */
 -      mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
 -
 -      /* setting the vma context pointer to null in the mlx5_ib driver's
 -       * private data, to protect a race condition in
 -       * mlx5_ib_disassociate_ucontext().
 -       */
 -      mlx5_ib_vma_priv_data->vma = NULL;
 -      mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
 -      list_del(&mlx5_ib_vma_priv_data->list);
 -      mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
 -      kfree(mlx5_ib_vma_priv_data);
 -}
 -
 -static const struct vm_operations_struct mlx5_ib_vm_ops = {
 -      .open = mlx5_ib_vma_open,
 -      .close = mlx5_ib_vma_close
 -};
 -
 -static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
 -                              struct mlx5_ib_ucontext *ctx)
 -{
 -      struct mlx5_ib_vma_private_data *vma_prv;
 -      struct list_head *vma_head = &ctx->vma_private_list;
 -
 -      vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
 -      if (!vma_prv)
 -              return -ENOMEM;
 -
 -      vma_prv->vma = vma;
 -      vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
 -      vma->vm_private_data = vma_prv;
 -      vma->vm_ops =  &mlx5_ib_vm_ops;
 -
 -      mutex_lock(&ctx->vma_private_list_mutex);
 -      list_add(&vma_prv->list, vma_head);
 -      mutex_unlock(&ctx->vma_private_list_mutex);
 -
 -      return 0;
 -}
  
  static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
  {
 -      struct vm_area_struct *vma;
 -      struct mlx5_ib_vma_private_data *vma_private, *n;
 -      struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
 -
 -      mutex_lock(&context->vma_private_list_mutex);
 -      list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
 -                               list) {
 -              vma = vma_private->vma;
 -              zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
 -              /* context going to be destroyed, should
 -               * not access ops any more.
 -               */
 -              vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
 -              vma->vm_ops = NULL;
 -              list_del(&vma_private->list);
 -              kfree(vma_private);
 -      }
 -      mutex_unlock(&context->vma_private_list_mutex);
  }
  
  static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
@@@ -1938,6 -2039,9 +1967,6 @@@ static int mlx5_ib_mmap_clock_info_page
                                        struct vm_area_struct *vma,
                                        struct mlx5_ib_ucontext *context)
  {
 -      phys_addr_t pfn;
 -      int err;
 -
        if (vma->vm_end - vma->vm_start != PAGE_SIZE)
                return -EINVAL;
  
        if (!dev->mdev->clock_info_page)
                return -EOPNOTSUPP;
  
 -      pfn = page_to_pfn(dev->mdev->clock_info_page);
 -      err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
 -                            vma->vm_page_prot);
 -      if (err)
 -              return err;
 -
 -      return mlx5_ib_set_vma_data(vma, context);
 +      return rdma_user_mmap_page(&context->ibucontext, vma,
 +                                 dev->mdev->clock_info_page, PAGE_SIZE);
  }
  
  static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
        pfn = uar_index2pfn(dev, uar_index);
        mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
  
 -      vma->vm_page_prot = prot;
 -      err = io_remap_pfn_range(vma, vma->vm_start, pfn,
 -                               PAGE_SIZE, vma->vm_page_prot);
 +      err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
 +                              prot);
        if (err) {
                mlx5_ib_err(dev,
 -                          "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n",
 +                          "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
                            err, mmap_cmd2str(cmd));
 -              err = -EAGAIN;
                goto err;
        }
  
 -      err = mlx5_ib_set_vma_data(vma, context);
 -      if (err)
 -              goto err;
 -
        if (dyn_uar)
                bfregi->sys_pages[idx] = uar_index;
        return 0;
@@@ -2074,6 -2189,7 +2103,6 @@@ static int dm_mmap(struct ib_ucontext *
        size_t map_size = vma->vm_end - vma->vm_start;
        u32 npages = map_size >> PAGE_SHIFT;
        phys_addr_t pfn;
 -      pgprot_t prot;
  
        if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
            page_idx + npages)
              MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
              PAGE_SHIFT) +
              page_idx;
 -      prot = pgprot_writecombine(vma->vm_page_prot);
 -      vma->vm_page_prot = prot;
 -
 -      if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
 -                             vma->vm_page_prot))
 -              return -EAGAIN;
 -
 -      return mlx5_ib_set_vma_data(vma, mctx);
 +      return rdma_user_mmap_io(context, vma, pfn, map_size,
 +                               pgprot_writecombine(vma->vm_page_prot));
  }
  
  static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@@ -2359,50 -2481,20 +2388,50 @@@ static int check_mpls_supp_fields(u32 f
                   offsetof(typeof(filter), field) -\
                   sizeof(filter.field))
  
 -static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,
 -                                const struct ib_flow_attr *flow_attr,
 -                                struct mlx5_flow_act *action)
 +int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
 +                         bool is_egress,
 +                         struct mlx5_flow_act *action)
  {
 -      struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act);
  
        switch (maction->ib_action.type) {
        case IB_FLOW_ACTION_ESP:
 +              if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
 +                                    MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
 +                      return -EINVAL;
                /* Currently only AES_GCM keymat is supported by the driver */
                action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
 -              action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ?
 +              action->action |= is_egress ?
                        MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
                        MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
                return 0;
 +      case IB_FLOW_ACTION_UNSPECIFIED:
 +              if (maction->flow_action_raw.sub_type ==
 +                  MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
 +                      if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 +                              return -EINVAL;
 +                      action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 +                      action->modify_id = maction->flow_action_raw.action_id;
 +                      return 0;
 +              }
 +              if (maction->flow_action_raw.sub_type ==
 +                  MLX5_IB_FLOW_ACTION_DECAP) {
 +                      if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
 +                              return -EINVAL;
 +                      action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
 +                      return 0;
 +              }
 +              if (maction->flow_action_raw.sub_type ==
 +                  MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
 +                      if (action->action &
 +                          MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
 +                              return -EINVAL;
 +                      action->action |=
 +                              MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
 +                      action->reformat_id =
 +                              maction->flow_action_raw.action_id;
 +                      return 0;
 +              }
 +              /* fall through */
        default:
                return -EOPNOTSUPP;
        }
@@@ -2739,8 -2831,7 +2768,8 @@@ static int parse_flow_attr(struct mlx5_
                action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
                break;
        case IB_FLOW_SPEC_ACTION_HANDLE:
 -              ret = parse_flow_flow_action(ib_spec, flow_attr, action);
 +              ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
 +                      flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
                if (ret)
                        return ret;
                break;
@@@ -2821,7 -2912,7 +2850,7 @@@ is_valid_esp_aes_gcm(struct mlx5_core_d
         * rules would be supported, always return VALID_SPEC_NA.
         */
        if (!is_crypto)
 -              return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
 +              return VALID_SPEC_NA;
  
        return is_crypto && is_ipsec &&
                (!egress || (!is_drop && !flow_act->has_flow_tag)) ?
@@@ -2964,15 -3055,14 +2993,15 @@@ enum flow_table_type 
  static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
                                           struct mlx5_ib_flow_prio *prio,
                                           int priority,
 -                                         int num_entries, int num_groups)
 +                                         int num_entries, int num_groups,
 +                                         u32 flags)
  {
        struct mlx5_flow_table *ft;
  
        ft = mlx5_create_auto_grouped_flow_table(ns, priority,
                                                 num_entries,
                                                 num_groups,
 -                                               0, 0);
 +                                               0, flags);
        if (IS_ERR(ft))
                return ERR_CAST(ft);
  
@@@ -2992,43 -3082,26 +3021,43 @@@ static struct mlx5_ib_flow_prio *get_fl
        int max_table_size;
        int num_entries;
        int num_groups;
 +      u32 flags = 0;
        int priority;
  
        max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
                                                       log_max_ft_size));
        if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
 -              if (ft_type == MLX5_IB_FT_TX)
 -                      priority = 0;
 -              else if (flow_is_multicast_only(flow_attr) &&
 -                       !dont_trap)
 +              enum mlx5_flow_namespace_type fn_type;
 +
 +              if (flow_is_multicast_only(flow_attr) &&
 +                  !dont_trap)
                        priority = MLX5_IB_FLOW_MCAST_PRIO;
                else
                        priority = ib_prio_to_core_prio(flow_attr->priority,
                                                        dont_trap);
 -              ns = mlx5_get_flow_namespace(dev->mdev,
 -                                           ft_type == MLX5_IB_FT_TX ?
 -                                           MLX5_FLOW_NAMESPACE_EGRESS :
 -                                           MLX5_FLOW_NAMESPACE_BYPASS);
 +              if (ft_type == MLX5_IB_FT_RX) {
 +                      fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
 +                      prio = &dev->flow_db->prios[priority];
 +                      if (!dev->rep &&
 +                          MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
 +                              flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
 +                      if (!dev->rep &&
 +                          MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 +                                      reformat_l3_tunnel_to_l2))
 +                              flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 +              } else {
 +                      max_table_size =
 +                              BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
 +                                                            log_max_ft_size));
 +                      fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
 +                      prio = &dev->flow_db->egress_prios[priority];
 +                      if (!dev->rep &&
 +                          MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
 +                              flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 +              }
 +              ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
                num_entries = MLX5_FS_MAX_ENTRIES;
                num_groups = MLX5_FS_MAX_TYPES;
 -              prio = &dev->flow_db->prios[priority];
        } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
                   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
                ns = mlx5_get_flow_namespace(dev->mdev,
  
        ft = prio->flow_table;
        if (!ft)
 -              return _get_prio(ns, prio, priority, num_entries, num_groups);
 +              return _get_prio(ns, prio, priority, num_entries, num_groups,
 +                               flags);
  
        return prio;
  }
@@@ -3228,9 -3300,6 +3257,9 @@@ static struct mlx5_ib_flow_handler *_cr
        if (!is_valid_attr(dev->mdev, flow_attr))
                return ERR_PTR(-EINVAL);
  
 +      if (dev->rep && is_egress)
 +              return ERR_PTR(-EINVAL);
 +
        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
        handler = kzalloc(sizeof(*handler), GFP_KERNEL);
        if (!handler || !spec) {
@@@ -3618,54 -3687,34 +3647,54 @@@ free_ucmd
        return ERR_PTR(err);
  }
  
 -static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev,
 -                                               int priority, bool mcast)
 +static struct mlx5_ib_flow_prio *
 +_get_flow_table(struct mlx5_ib_dev *dev,
 +              struct mlx5_ib_flow_matcher *fs_matcher,
 +              bool mcast)
  {
 -      int max_table_size;
        struct mlx5_flow_namespace *ns = NULL;
        struct mlx5_ib_flow_prio *prio;
 +      int max_table_size;
 +      u32 flags = 0;
 +      int priority;
 +
 +      if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
 +              max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 +                                      log_max_ft_size));
 +              if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
 +                      flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
 +              if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 +                                            reformat_l3_tunnel_to_l2))
 +                      flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 +      } else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */
 +              max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
 +                                      log_max_ft_size));
 +              if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
 +                      flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 +      }
  
 -      max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 -                           log_max_ft_size));
        if (max_table_size < MLX5_FS_MAX_ENTRIES)
                return ERR_PTR(-ENOMEM);
  
        if (mcast)
                priority = MLX5_IB_FLOW_MCAST_PRIO;
        else
 -              priority = ib_prio_to_core_prio(priority, false);
 +              priority = ib_prio_to_core_prio(fs_matcher->priority, false);
  
 -      ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS);
 +      ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
        if (!ns)
                return ERR_PTR(-ENOTSUPP);
  
 -      prio = &dev->flow_db->prios[priority];
 +      if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
 +              prio = &dev->flow_db->prios[priority];
 +      else
 +              prio = &dev->flow_db->egress_prios[priority];
  
        if (prio->flow_table)
                return prio;
  
        return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES,
 -                       MLX5_FS_MAX_TYPES);
 +                       MLX5_FS_MAX_TYPES, flags);
  }
  
  static struct mlx5_ib_flow_handler *
@@@ -3673,10 -3722,10 +3702,10 @@@ _create_raw_flow_rule(struct mlx5_ib_de
                      struct mlx5_ib_flow_prio *ft_prio,
                      struct mlx5_flow_destination *dst,
                      struct mlx5_ib_flow_matcher  *fs_matcher,
 +                    struct mlx5_flow_act *flow_act,
                      void *cmd_in, int inlen)
  {
        struct mlx5_ib_flow_handler *handler;
 -      struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
        struct mlx5_flow_spec *spec;
        struct mlx5_flow_table *ft = ft_prio->flow_table;
        int err = 0;
               fs_matcher->mask_len);
        spec->match_criteria_enable = fs_matcher->match_criteria_enable;
  
 -      flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        handler->rule = mlx5_add_flow_rules(ft, spec,
 -                                          &flow_act, dst, 1);
 +                                          flow_act, dst, 1);
  
        if (IS_ERR(handler->rule)) {
                err = PTR_ERR(handler->rule);
@@@ -3758,12 -3808,12 +3787,12 @@@ static bool raw_fs_is_multicast(struct 
  struct mlx5_ib_flow_handler *
  mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
                        struct mlx5_ib_flow_matcher *fs_matcher,
 +                      struct mlx5_flow_act *flow_act,
                        void *cmd_in, int inlen, int dest_id,
                        int dest_type)
  {
        struct mlx5_flow_destination *dst;
        struct mlx5_ib_flow_prio *ft_prio;
 -      int priority = fs_matcher->priority;
        struct mlx5_ib_flow_handler *handler;
        bool mcast;
        int err;
        mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
        mutex_lock(&dev->flow_db->lock);
  
 -      ft_prio = _get_flow_table(dev, priority, mcast);
 +      ft_prio = _get_flow_table(dev, fs_matcher, mcast);
        if (IS_ERR(ft_prio)) {
                err = PTR_ERR(ft_prio);
                goto unlock;
        if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
                dst->type = dest_type;
                dst->tir_num = dest_id;
 -      } else {
 +              flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 +      } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
                dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
                dst->ft_num = dest_id;
 +              flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 +      } else {
 +              dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
 +              flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
        }
  
 -      handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in,
 -                                      inlen);
 +      handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
 +                                      cmd_in, inlen);
  
        if (IS_ERR(handler)) {
                err = PTR_ERR(handler);
@@@ -3979,9 -4024,6 +4008,9 @@@ static int mlx5_ib_destroy_flow_action(
                 */
                mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
                break;
 +      case IB_FLOW_ACTION_UNSPECIFIED:
 +              mlx5_ib_destroy_flow_action_raw(maction);
 +              break;
        default:
                WARN_ON(true);
                break;
@@@ -5867,7 -5909,7 +5896,7 @@@ int mlx5_ib_stage_caps_init(struct mlx5
        if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
            (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
             MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
-               mutex_init(&dev->lb_mutex);
+               mutex_init(&dev->lb.mutex);
  
        return 0;
  }
index dc34ffa4c8b331de1ac3a062db51d8c38bd3f3a1,8376408e2bc96e8e8b4c81e75636dfdfd9874538..a28d04d4c9df8a4d36151ffda594f4ddf4cbb786
  #include <rdma/ib_smi.h>
  #include <linux/mlx5/driver.h>
  #include <linux/mlx5/cq.h>
 +#include <linux/mlx5/fs.h>
  #include <linux/mlx5/qp.h>
  #include <linux/mlx5/srq.h>
 +#include <linux/mlx5/fs.h>
  #include <linux/types.h>
  #include <linux/mlx5/transobj.h>
  #include <rdma/ib_user_verbs.h>
@@@ -116,6 -114,13 +116,6 @@@ enum 
        MLX5_MEMIC_BASE_SIZE    = 1 << MLX5_MEMIC_BASE_ALIGN,
  };
  
 -struct mlx5_ib_vma_private_data {
 -      struct list_head list;
 -      struct vm_area_struct *vma;
 -      /* protect vma_private_list add/del */
 -      struct mutex *vma_private_list_mutex;
 -};
 -
  struct mlx5_ib_ucontext {
        struct ib_ucontext      ibucontext;
        struct list_head        db_page_list;
        u8                      cqe_version;
        /* Transport Domain number */
        u32                     tdn;
 -      struct list_head        vma_private_list;
 -      /* protect vma_private_list add/del */
 -      struct mutex            vma_private_list_mutex;
  
        u64                     lib_caps;
        DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
        u16                     devx_uid;
 +      /* For RoCE LAG TX affinity */
 +      atomic_t                tx_port_affinity;
  };
  
  static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@@ -145,12 -151,6 +145,12 @@@ struct mlx5_ib_pd 
        u32                     pdn;
  };
  
 +enum {
 +      MLX5_IB_FLOW_ACTION_MODIFY_HEADER,
 +      MLX5_IB_FLOW_ACTION_PACKET_REFORMAT,
 +      MLX5_IB_FLOW_ACTION_DECAP,
 +};
 +
  #define MLX5_IB_FLOW_MCAST_PRIO               (MLX5_BY_PASS_NUM_PRIOS - 1)
  #define MLX5_IB_FLOW_LAST_PRIO                (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1)
  #if (MLX5_IB_FLOW_LAST_PRIO <= 0)
@@@ -180,7 -180,6 +180,7 @@@ struct mlx5_ib_flow_matcher 
        struct mlx5_ib_match_params matcher_mask;
        int                     mask_len;
        enum mlx5_ib_flow_type  flow_type;
 +      enum mlx5_flow_namespace_type ns_type;
        u16                     priority;
        struct mlx5_core_dev    *mdev;
        atomic_t                usecnt;
  
  struct mlx5_ib_flow_db {
        struct mlx5_ib_flow_prio        prios[MLX5_IB_NUM_FLOW_FT];
 +      struct mlx5_ib_flow_prio        egress_prios[MLX5_IB_NUM_FLOW_FT];
        struct mlx5_ib_flow_prio        sniffer[MLX5_IB_NUM_SNIFFER_FTS];
        struct mlx5_ib_flow_prio        egress[MLX5_IB_NUM_EGRESS_FTS];
        struct mlx5_flow_table          *lag_demux_ft;
@@@ -430,7 -428,7 +430,7 @@@ struct mlx5_ib_qp 
        struct list_head        cq_send_list;
        struct mlx5_rate_limit  rl;
        u32                     underlay_qpn;
-       bool                    tunnel_offload_en;
+       u32                     flags_en;
        /* storage for qp sub type when core qp type is IB_QPT_DRIVER */
        enum ib_qp_type         qp_sub_type;
  };
@@@ -701,7 -699,7 +701,7 @@@ struct mlx5_roce 
        rwlock_t                netdev_lock;
        struct net_device       *netdev;
        struct notifier_block   nb;
 -      atomic_t                next_port;
 +      atomic_t                tx_port_affinity;
        enum ib_port_state last_port_state;
        struct mlx5_ib_dev      *dev;
        u8                      native_port_num;
@@@ -816,11 -814,6 +816,11 @@@ struct mlx5_ib_flow_action 
                        u64                         ib_flags;
                        struct mlx5_accel_esp_xfrm *ctx;
                } esp_aes_gcm;
 +              struct {
 +                      struct mlx5_ib_dev *dev;
 +                      u32 sub_type;
 +                      u32 action_id;
 +              } flow_action_raw;
        };
  };
  
@@@ -865,12 -858,17 +865,20 @@@ to_mcounters(struct ib_counters *ibcntr
        return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs);
  }
  
 +int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
 +                         bool is_egress,
 +                         struct mlx5_flow_act *action);
+ struct mlx5_ib_lb_state {
+       /* protect the user_td */
+       struct mutex            mutex;
+       u32                     user_td;
+       int                     qps;
+       bool                    enabled;
+ };
  struct mlx5_ib_dev {
        struct ib_device                ib_dev;
 -      const struct uverbs_object_tree_def *driver_trees[6];
 +      const struct uverbs_object_tree_def *driver_trees[7];
        struct mlx5_core_dev            *mdev;
        struct mlx5_roce                roce[MLX5_MAX_PORTS];
        int                             num_ports;
        const struct mlx5_ib_profile    *profile;
        struct mlx5_eswitch_rep         *rep;
  
-       /* protect the user_td */
-       struct mutex            lb_mutex;
-       u32                     user_td;
+       struct mlx5_ib_lb_state         lb;
        u8                      umr_fence;
        struct list_head        ib_dev_list;
        u64                     sys_image_guid;
@@@ -1026,6 -1022,8 +1032,8 @@@ int mlx5_ib_query_srq(struct ib_srq *ib
  int mlx5_ib_destroy_srq(struct ib_srq *srq);
  int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
                          const struct ib_recv_wr **bad_wr);
+ int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
+ void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
  struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
                                struct ib_qp_init_attr *init_attr,
                                struct ib_udata *udata);
@@@ -1150,7 -1148,7 +1158,7 @@@ void mlx5_ib_pfault(struct mlx5_core_de
  int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
  int __init mlx5_ib_odp_init(void);
  void mlx5_ib_odp_cleanup(void);
 -void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 +void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
                              unsigned long end);
  void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
  void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
@@@ -1245,11 -1243,9 +1253,11 @@@ void mlx5_ib_devx_destroy(struct mlx5_i
  const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
  struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
        struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
 -      void *cmd_in, int inlen, int dest_id, int dest_type);
 +      struct mlx5_flow_act *flow_act, void *cmd_in, int inlen,
 +      int dest_id, int dest_type);
  bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
  int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
 +void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
  #else
  static inline int
  mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
@@@ -1268,11 -1264,6 +1276,11 @@@ mlx5_ib_get_flow_trees(const struct uve
  {
        return 0;
  }
 +static inline void
 +mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
 +{
 +      return;
 +};
  #endif
  static inline void init_query_mad(struct ib_smp *mad)
  {
index 1f35ecbefffe3e59bfe46f1b61bcc7e995439296,fcaa5c4d6feb0180e3348b245e0ab70123bc7470..1f318a47040cf1d754e007ecac62c4340eab40a0
@@@ -1256,10 -1256,21 +1256,21 @@@ static bool tunnel_offload_supported(st
                 MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx));
  }
  
+ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+                                     struct mlx5_ib_rq *rq,
+                                     u32 qp_flags_en)
+ {
+       if (qp_flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+                          MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
+               mlx5_ib_disable_lb(dev, false, true);
+       mlx5_core_destroy_tir(dev->mdev, rq->tirn);
+ }
  static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
                                    struct mlx5_ib_rq *rq, u32 tdn,
-                                   bool tunnel_offload_en)
+                                   u32 *qp_flags_en)
  {
+       u8 lb_flag = 0;
        u32 *in;
        void *tirc;
        int inlen;
        MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
        MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
        MLX5_SET(tirc, tirc, transport_domain, tdn);
-       if (tunnel_offload_en)
+       if (*qp_flags_en & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
                MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
  
-       if (dev->rep)
-               MLX5_SET(tirc, tirc, self_lb_block,
-                        MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+       if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
+               lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+       if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
+               lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+       if (dev->rep) {
+               lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+               *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+       }
+       MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
  
        err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
  
+       if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
+               err = mlx5_ib_enable_lb(dev, false, true);
+               if (err)
+                       destroy_raw_packet_qp_tir(dev, rq, 0);
+       }
        kvfree(in);
  
        return err;
  }
  
- static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
-                                     struct mlx5_ib_rq *rq)
- {
-       mlx5_core_destroy_tir(dev->mdev, rq->tirn);
- }
  static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
                                u32 *in, size_t inlen,
                                struct ib_pd *pd)
                        goto err_destroy_sq;
  
  
-               err = create_raw_packet_qp_tir(dev, rq, tdn,
-                                              qp->tunnel_offload_en);
+               err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en);
                if (err)
                        goto err_destroy_rq;
        }
@@@ -1363,7 -1382,7 +1382,7 @@@ static void destroy_raw_packet_qp(struc
        struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
  
        if (qp->rq.wqe_cnt) {
-               destroy_raw_packet_qp_tir(dev, rq);
+               destroy_raw_packet_qp_tir(dev, rq, qp->flags_en);
                destroy_raw_packet_qp_rq(dev, rq);
        }
  
@@@ -1387,6 -1406,9 +1406,9 @@@ static void raw_packet_qp_copy_info(str
  
  static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
  {
+       if (qp->flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+                           MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
+               mlx5_ib_disable_lb(dev, false, true);
        mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
  }
  
@@@ -1410,6 -1432,7 +1432,7 @@@ static int create_rss_raw_qp_tir(struc
        u32 tdn = mucontext->tdn;
        struct mlx5_ib_create_qp_rss ucmd = {};
        size_t required_cmd_sz;
+       u8 lb_flag = 0;
  
        if (init_attr->qp_type != IB_QPT_RAW_PACKET)
                return -EOPNOTSUPP;
                return -EOPNOTSUPP;
        }
  
-       if (ucmd.flags & ~MLX5_QP_FLAG_TUNNEL_OFFLOADS) {
+       if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+                          MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+                          MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) {
                mlx5_ib_dbg(dev, "invalid flags\n");
                return -EOPNOTSUPP;
        }
                return -EOPNOTSUPP;
        }
  
+       if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->rep) {
+               lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+               qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+       }
+       if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
+               lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+               qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
+       }
        err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
        if (err) {
                mlx5_ib_dbg(dev, "copy failed\n");
        if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
                MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
  
+       MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
        if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER)
                hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
        else
        MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
  
  create_tir:
-       if (dev->rep)
-               MLX5_SET(tirc, tirc, self_lb_block,
-                        MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
        err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
  
+       if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
+               err = mlx5_ib_enable_lb(dev, false, true);
+               if (err)
+                       mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
+       }
        if (err)
                goto err;
  
@@@ -1710,7 -1750,23 +1750,23 @@@ static int create_qp_common(struct mlx5
                                mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n");
                                return -EOPNOTSUPP;
                        }
-                       qp->tunnel_offload_en = true;
+                       qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS;
+               }
+               if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) {
+                       if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+                               mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n");
+                               return -EOPNOTSUPP;
+                       }
+                       qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+               }
+               if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
+                       if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+                               mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n");
+                               return -EOPNOTSUPP;
+                       }
+                       qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
                }
  
                if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
@@@ -2909,37 -2965,6 +2965,37 @@@ static int modify_raw_packet_qp(struct 
        return 0;
  }
  
 +static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
 +                                  struct mlx5_ib_pd *pd,
 +                                  struct mlx5_ib_qp_base *qp_base,
 +                                  u8 port_num)
 +{
 +      struct mlx5_ib_ucontext *ucontext = NULL;
 +      unsigned int tx_port_affinity;
 +
 +      if (pd && pd->ibpd.uobject && pd->ibpd.uobject->context)
 +              ucontext = to_mucontext(pd->ibpd.uobject->context);
 +
 +      if (ucontext) {
 +              tx_port_affinity = (unsigned int)atomic_add_return(
 +                                         1, &ucontext->tx_port_affinity) %
 +                                         MLX5_MAX_PORTS +
 +                                 1;
 +              mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
 +                              tx_port_affinity, qp_base->mqp.qpn, ucontext);
 +      } else {
 +              tx_port_affinity =
 +                      (unsigned int)atomic_add_return(
 +                              1, &dev->roce[port_num].tx_port_affinity) %
 +                              MLX5_MAX_PORTS +
 +                      1;
 +              mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n",
 +                              tx_port_affinity, qp_base->mqp.qpn);
 +      }
 +
 +      return tx_port_affinity;
 +}
 +
  static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                               const struct ib_qp_attr *attr, int attr_mask,
                               enum ib_qp_state cur_state, enum ib_qp_state new_state,
        if (!context)
                return -ENOMEM;
  
 +      pd = get_pd(qp);
        context->flags = cpu_to_be32(mlx5_st << 16);
  
        if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
                    (ibqp->qp_type == IB_QPT_XRC_TGT)) {
                        if (mlx5_lag_is_active(dev->mdev)) {
                                u8 p = mlx5_core_native_port_num(dev->mdev);
 -                              tx_affinity = (unsigned int)atomic_add_return(1,
 -                                              &dev->roce[p].next_port) %
 -                                              MLX5_MAX_PORTS + 1;
 +                              tx_affinity = get_tx_affinity(dev, pd, base, p);
                                context->flags |= cpu_to_be32(tx_affinity << 24);
                        }
                }
                        goto out;
        }
  
 -      pd = get_pd(qp);
        get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
                &send_cq, &recv_cq);
  
@@@ -3272,9 -3299,7 +3328,9 @@@ static bool modify_dci_qp_is_ok(enum ib
        int req = IB_QP_STATE;
        int opt = 0;
  
 -      if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
 +      if (new_state == IB_QPS_RESET) {
 +              return is_valid_mask(attr_mask, req, opt);
 +      } else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
                req |= IB_QP_PKEY_INDEX | IB_QP_PORT;
                return is_valid_mask(attr_mask, req, opt);
        } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@@ -4402,12 -4427,6 +4458,12 @@@ static int _mlx5_ib_post_send(struct ib
        u8 next_fence = 0;
        u8 fence;
  
 +      if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
 +                   !drain)) {
 +              *bad_wr = wr;
 +              return -EIO;
 +      }
 +
        if (unlikely(ibqp->qp_type == IB_QPT_GSI))
                return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
  
  
        spin_lock_irqsave(&qp->sq.lock, flags);
  
 -      if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
 -              err = -EIO;
 -              *bad_wr = wr;
 -              nreq = 0;
 -              goto out;
 -      }
 -
        for (nreq = 0; wr; nreq++, wr = wr->next) {
                if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
                        mlx5_ib_warn(dev, "\n");
@@@ -4730,17 -4756,18 +4786,17 @@@ static int _mlx5_ib_post_recv(struct ib
        int ind;
        int i;
  
 +      if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
 +                   !drain)) {
 +              *bad_wr = wr;
 +              return -EIO;
 +      }
 +
        if (unlikely(ibqp->qp_type == IB_QPT_GSI))
                return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
  
        spin_lock_irqsave(&qp->rq.lock, flags);
  
 -      if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
 -              err = -EIO;
 -              *bad_wr = wr;
 -              nreq = 0;
 -              goto out;
 -      }
 -
        ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
  
        for (nreq = 0; wr; nreq++, wr = wr->next) {