]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
scsi: csiostor: update ingress pack and pad boundary value
authorVarun Prakash <varun@chelsio.com>
Tue, 17 Jul 2018 14:25:26 +0000 (19:55 +0530)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 20 Jul 2018 01:57:39 +0000 (21:57 -0400)
T5/T6 can have different pack and pad boundary value. This patch sets
packing boundary based on cache line size and PCI-E maximum payload size
and sets smallest padding boundary value.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/csiostor/csio_wr.c

index faa357b62c61686fba4b9548895b4e3709c9d6f7..5022e82ccc4fedcf628636e0976128eded1f1449 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/page.h>
 #include <linux/cache.h>
 
+#include "t4_values.h"
 #include "csio_hw.h"
 #include "csio_wr.h"
 #include "csio_mb.h"
@@ -1309,8 +1310,11 @@ csio_wr_fixup_host_params(struct csio_hw *hw)
        struct csio_sge *sge = &wrm->sge;
        uint32_t clsz = L1_CACHE_BYTES;
        uint32_t s_hps = PAGE_SHIFT - 10;
-       uint32_t ingpad = 0;
        uint32_t stat_len = clsz > 64 ? 128 : 64;
+       u32 fl_align = clsz < 32 ? 32 : clsz;
+       u32 pack_align;
+       u32 ingpad, ingpack;
+       int pcie_cap;
 
        csio_wr_reg32(hw, HOSTPAGESIZEPF0_V(s_hps) | HOSTPAGESIZEPF1_V(s_hps) |
                      HOSTPAGESIZEPF2_V(s_hps) | HOSTPAGESIZEPF3_V(s_hps) |
@@ -1318,14 +1322,82 @@ csio_wr_fixup_host_params(struct csio_hw *hw)
                      HOSTPAGESIZEPF6_V(s_hps) | HOSTPAGESIZEPF7_V(s_hps),
                      SGE_HOST_PAGE_SIZE_A);
 
-       sge->csio_fl_align = clsz < 32 ? 32 : clsz;
-       ingpad = ilog2(sge->csio_fl_align) - 5;
+       /* T5 introduced the separation of the Free List Padding and
+        * Packing Boundaries.  Thus, we can select a smaller Padding
+        * Boundary to avoid uselessly chewing up PCIe Link and Memory
+        * Bandwidth, and use a Packing Boundary which is large enough
+        * to avoid false sharing between CPUs, etc.
+        *
+        * For the PCI Link, the smaller the Padding Boundary the
+        * better.  For the Memory Controller, a smaller Padding
+        * Boundary is better until we cross under the Memory Line
+        * Size (the minimum unit of transfer to/from Memory).  If we
+        * have a Padding Boundary which is smaller than the Memory
+        * Line Size, that'll involve a Read-Modify-Write cycle on the
+        * Memory Controller which is never good.
+        */
+
+       /* We want the Packing Boundary to be based on the Cache Line
+        * Size in order to help avoid False Sharing performance
+        * issues between CPUs, etc.  We also want the Packing
+        * Boundary to incorporate the PCI-E Maximum Payload Size.  We
+        * get best performance when the Packing Boundary is a
+        * multiple of the Maximum Payload Size.
+        */
+       pack_align = fl_align;
+       pcie_cap = pci_find_capability(hw->pdev, PCI_CAP_ID_EXP);
+       if (pcie_cap) {
+               u32 mps, mps_log;
+               u16 devctl;
+
+               /* The PCIe Device Control Maximum Payload Size field
+                * [bits 7:5] encodes sizes as powers of 2 starting at
+                * 128 bytes.
+                */
+               pci_read_config_word(hw->pdev,
+                                    pcie_cap + PCI_EXP_DEVCTL,
+                                    &devctl);
+               mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
+               mps = 1 << mps_log;
+               if (mps > pack_align)
+                       pack_align = mps;
+       }
+
+       /* T5/T6 have a special interpretation of the "0"
+        * value for the Packing Boundary.  This corresponds to 16
+        * bytes instead of the expected 32 bytes.
+        */
+       if (pack_align <= 16) {
+               ingpack = INGPACKBOUNDARY_16B_X;
+               fl_align = 16;
+       } else if (pack_align == 32) {
+               ingpack = INGPACKBOUNDARY_64B_X;
+               fl_align = 64;
+       } else {
+               u32 pack_align_log = fls(pack_align) - 1;
+
+               ingpack = pack_align_log - INGPACKBOUNDARY_SHIFT_X;
+               fl_align = pack_align;
+       }
+
+       /* Use the smallest Ingress Padding which isn't smaller than
+        * the Memory Controller Read/Write Size.  We'll take that as
+        * being 8 bytes since we don't know of any system with a
+        * wider Memory Controller Bus Width.
+        */
+       if (csio_is_t5(hw->pdev->device & CSIO_HW_CHIP_MASK))
+               ingpad = INGPADBOUNDARY_32B_X;
+       else
+               ingpad = T6_INGPADBOUNDARY_8B_X;
 
        csio_set_reg_field(hw, SGE_CONTROL_A,
                           INGPADBOUNDARY_V(INGPADBOUNDARY_M) |
                           EGRSTATUSPAGESIZE_F,
                           INGPADBOUNDARY_V(ingpad) |
                           EGRSTATUSPAGESIZE_V(stat_len != 64));
+       csio_set_reg_field(hw, SGE_CONTROL2_A,
+                          INGPACKBOUNDARY_V(INGPACKBOUNDARY_M),
+                          INGPACKBOUNDARY_V(ingpack));
 
        /* FL BUFFER SIZE#0 is Page size i,e already aligned to cache line */
        csio_wr_reg32(hw, PAGE_SIZE, SGE_FL_BUFFER_SIZE0_A);
@@ -1337,14 +1409,16 @@ csio_wr_fixup_host_params(struct csio_hw *hw)
        if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS) {
                csio_wr_reg32(hw,
                        (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE2_A) +
-                       sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1),
+                       fl_align - 1) & ~(fl_align - 1),
                        SGE_FL_BUFFER_SIZE2_A);
                csio_wr_reg32(hw,
                        (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE3_A) +
-                       sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1),
+                       fl_align - 1) & ~(fl_align - 1),
                        SGE_FL_BUFFER_SIZE3_A);
        }
 
+       sge->csio_fl_align = fl_align;
+
        csio_wr_reg32(hw, HPZ0_V(PAGE_SHIFT - 12), ULP_RX_TDDP_PSZ_A);
 
        /* default value of rx_dma_offset of the NIC driver */