]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
net/mlx5e: Use inline MTTs in UMR WQEs
authorTariq Toukan <tariqt@mellanox.com>
Mon, 10 Jul 2017 09:52:36 +0000 (12:52 +0300)
committerSaeed Mahameed <saeedm@mellanox.com>
Fri, 30 Mar 2018 23:16:17 +0000 (16:16 -0700)
When modifying the page mapping of a HW memory region
(via a UMR post), post the new values inlined in WQE,
instead of using a data pointer.

This is a micro-optimization, inline UMR WQEs of different
rings scale better in HW.

In addition, this obsoletes a few control flows and helps
delete ~50 LOC.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index 1f89e2194b61d387be5ad8843b1f6699c27c2c18..c1d3a29388bdd189335bc42be46ee04acde922a9 100644 (file)
 
 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x2
 
-#define MLX5_UMR_ALIGN                         (2048)
 #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD      (256)
 
 #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
 #define MLX5E_UPDATE_STATS_INTERVAL    200 /* msecs */
 #define MLX5E_SQ_RECOVER_MIN_INTERVAL  500 /* msecs */
 
-#define MLX5E_ICOSQ_MAX_WQEBBS \
-       (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB))
+#define MLX5E_UMR_WQE_INLINE_SZ \
+       (sizeof(struct mlx5e_umr_wqe) + \
+        ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \
+              MLX5_UMR_MTT_ALIGNMENT))
+#define MLX5E_UMR_WQEBBS \
+       (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
+#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS
 
 #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
 #define MLX5E_XDP_TX_DS_COUNT \
@@ -183,7 +187,7 @@ struct mlx5e_umr_wqe {
        struct mlx5_wqe_ctrl_seg       ctrl;
        struct mlx5_wqe_umr_ctrl_seg   uctrl;
        struct mlx5_mkey_seg           mkc;
-       struct mlx5_wqe_data_seg       data;
+       struct mlx5_mtt                inline_mtts[0];
 };
 
 extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
@@ -421,7 +425,6 @@ struct mlx5e_icosq {
        void __iomem              *uar_map;
        u32                        sqn;
        u16                        edge;
-       __be32                     mkey_be;
        unsigned long              state;
 
        /* control path */
@@ -446,8 +449,6 @@ struct mlx5e_wqe_frag_info {
 };
 
 struct mlx5e_umr_dma_info {
-       __be64                *mtt;
-       dma_addr_t             mtt_addr;
        struct mlx5e_dma_info  dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
        struct mlx5e_umr_wqe   wqe;
 };
@@ -490,7 +491,6 @@ struct mlx5e_rq {
                } wqe;
                struct {
                        struct mlx5e_mpw_info *info;
-                       void                  *mtt_no_align;
                        u16                    num_strides;
                        u8                     log_stride_sz;
                        bool                   umr_in_progress;
index e627b81cebe991e2f3fe4623b72c63439151ef44..42dc350c5ab114faadc5d95c965ffb8708820946 100644 (file)
@@ -73,9 +73,20 @@ struct mlx5e_channel_param {
 
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
-       return MLX5_CAP_GEN(mdev, striding_rq) &&
+       bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
                MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
                MLX5_CAP_ETH(mdev, reg_umr_sq);
+       u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq);
+       bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap;
+
+       if (!striding_rq_umr)
+               return false;
+       if (!inline_umr) {
+               mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n",
+                              (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap);
+               return false;
+       }
+       return true;
 }
 
 static u32 mlx5e_mpwqe_get_linear_frag_sz(struct mlx5e_params *params)
@@ -258,16 +269,6 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
        synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
 }
 
-static inline int mlx5e_get_wqe_mtt_sz(void)
-{
-       /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
-        * To avoid copying garbage after the mtt array, we allocate
-        * a little more.
-        */
-       return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64),
-                    MLX5_UMR_MTT_ALIGNMENT);
-}
-
 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
                                       struct mlx5e_icosq *sq,
                                       struct mlx5e_umr_wqe *wqe,
@@ -275,9 +276,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 {
        struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
        struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
-       struct mlx5_wqe_data_seg      *dseg = &wqe->data;
-       struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
-       u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
+       u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
        u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix);
 
        cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
@@ -285,80 +284,32 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
        cseg->fm_ce_se  = MLX5_WQE_CTRL_CQ_UPDATE;
        cseg->imm       = rq->mkey_be;
 
-       ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN;
+       ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
        ucseg->xlt_octowords =
                cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
        ucseg->bsf_octowords =
                cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset));
        ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
-
-       dseg->lkey = sq->mkey_be;
-       dseg->addr = cpu_to_be64(wi->umr.mtt_addr);
 }
 
 static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
                                     struct mlx5e_channel *c)
 {
        int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
-       int mtt_sz = mlx5e_get_wqe_mtt_sz();
-       int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
        int i;
 
        rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
                                      GFP_KERNEL, cpu_to_node(c->cpu));
        if (!rq->mpwqe.info)
-               goto err_out;
-
-       /* We allocate more than mtt_sz as we will align the pointer */
-       rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
-                                       cpu_to_node(c->cpu));
-       if (unlikely(!rq->mpwqe.mtt_no_align))
-               goto err_free_wqe_info;
+               return -ENOMEM;
 
        for (i = 0; i < wq_sz; i++) {
                struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
 
-               wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc,
-                                       MLX5_UMR_ALIGN);
-               wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz,
-                                                 PCI_DMA_TODEVICE);
-               if (unlikely(dma_mapping_error(c->pdev, wi->umr.mtt_addr)))
-                       goto err_unmap_mtts;
-
                mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i);
        }
 
        return 0;
-
-err_unmap_mtts:
-       while (--i >= 0) {
-               struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
-
-               dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz,
-                                PCI_DMA_TODEVICE);
-       }
-       kfree(rq->mpwqe.mtt_no_align);
-err_free_wqe_info:
-       kfree(rq->mpwqe.info);
-
-err_out:
-       return -ENOMEM;
-}
-
-static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
-{
-       int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
-       int mtt_sz = mlx5e_get_wqe_mtt_sz();
-       int i;
-
-       for (i = 0; i < wq_sz; i++) {
-               struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
-
-               dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz,
-                                PCI_DMA_TODEVICE);
-       }
-       kfree(rq->mpwqe.mtt_no_align);
-       kfree(rq->mpwqe.info);
 }
 
 static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
@@ -579,7 +530,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 
        switch (rq->wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               mlx5e_rq_free_mpwqe_info(rq);
+               kfree(rq->mpwqe.info);
                mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -918,7 +869,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
        struct mlx5_core_dev *mdev = c->mdev;
        int err;
 
-       sq->mkey_be   = c->mkey_be;
        sq->channel   = c;
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 
index 8eb9e7e89b0916a6fe0a347f41c626075cf974de..539dbe9382ee265578faa9465f12ffc08de4c0c1 100644 (file)
@@ -381,17 +381,25 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
        struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
        struct mlx5e_icosq *sq = &rq->channel->icosq;
        struct mlx5_wq_cyc *wq = &sq->wq;
-       struct mlx5e_umr_wqe *wqe;
-       u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB);
+       struct mlx5e_umr_wqe *umr_wqe;
+       int cpy = offsetof(struct mlx5e_umr_wqe, inline_mtts);
        int err;
        u16 pi;
        int i;
 
+       /* fill sq edge with nops to avoid wqe wrap around */
+       while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
+               sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+               mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+       }
+
+       umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+       memcpy(umr_wqe, &wi->umr.wqe, cpy);
        for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
                err = mlx5e_page_alloc_mapped(rq, dma_info);
                if (unlikely(err))
                        goto err_unmap;
-               wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
+               umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
                page_ref_add(dma_info->page, pg_strides);
        }
 
@@ -400,21 +408,13 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 
        rq->mpwqe.umr_in_progress = true;
 
-       /* fill sq edge with nops to avoid wqe wrap around */
-       while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
-               sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
-               mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-       }
-
-       wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-       memcpy(wqe, &wi->umr.wqe, sizeof(*wqe));
-       wqe->ctrl.opmod_idx_opcode =
+       umr_wqe->ctrl.opmod_idx_opcode =
                cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
                            MLX5_OPCODE_UMR);
 
        sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
-       sq->pc += num_wqebbs;
-       mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
+       sq->pc += MLX5E_UMR_WQEBBS;
+       mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &umr_wqe->ctrl);
 
        return 0;