]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorDavid S. Miller <davem@davemloft.net>
Mon, 19 Aug 2019 18:54:03 +0000 (11:54 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 19 Aug 2019 18:54:03 +0000 (11:54 -0700)
Merge conflict of mlx5 resolved using instructions in merge
commit 9566e650bf7fdf58384bb06df634f7531ca3a97e.

Signed-off-by: David S. Miller <davem@davemloft.net>
42 files changed:
1  2 
MAINTAINERS
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
drivers/net/ethernet/myricom/myri10ge/myri10ge.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/netdevsim/dev.c
drivers/net/netdevsim/netdevsim.h
drivers/net/phy/at803x.c
drivers/net/phy/phy_device.c
drivers/net/usb/lan78xx.c
drivers/net/xen-netback/netback.c
drivers/s390/net/qeth_core_main.c
drivers/staging/unisys/visornic/visornic_main.c
include/linux/mlx5/mlx5_ifc.h
include/linux/skbuff.h
include/net/netfilter/nf_tables.h
include/net/netfilter/nf_tables_offload.h
include/net/pkt_cls.h
include/uapi/linux/bpf.h
net/ipv4/tcp.c
net/ipv4/tcp_output.c
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_offload.c
net/rxrpc/ar-internal.h
net/sched/sch_taprio.c
net/tipc/link.c
net/tipc/msg.h
net/tls/tls_device.c
tools/include/uapi/linux/bpf.h
tools/lib/bpf/libbpf.c

diff --combined MAINTAINERS
index 96d3e60697f50c4a4dcb38a0ca15266785c57496,08176d64eed5afd0a8c024ce74d0f5b0b22c42f4..a406947b369ebd1fe41866049691f6e7f65543e7
@@@ -183,7 -183,7 +183,7 @@@ M: Realtek linux nic maintainers <nic_s
  M:    Heiner Kallweit <hkallweit1@gmail.com>
  L:    netdev@vger.kernel.org
  S:    Maintained
- F:    drivers/net/ethernet/realtek/r8169.c
+ F:    drivers/net/ethernet/realtek/r8169*
  
  8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
  M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@@ -938,14 -938,6 +938,14 @@@ S:       Supporte
  F:    drivers/mux/adgs1408.c
  F:    Documentation/devicetree/bindings/mux/adi,adgs1408.txt
  
 +ANALOG DEVICES INC ADIN DRIVER
 +M:    Alexandru Ardelean <alexaundru.ardelean@analog.com>
 +L:    netdev@vger.kernel.org
 +W:    http://ez.analog.com/community/linux-device-drivers
 +S:    Supported
 +F:    drivers/net/phy/adin.c
 +F:    Documentation/devicetree/bindings/net/adi,adin.yaml
 +
  ANALOG DEVICES INC ADIS DRIVER LIBRARY
  M:    Alexandru Ardelean <alexandru.ardelean@analog.com>
  S:    Supported
@@@ -3643,12 -3635,9 +3643,12 @@@ S:    Maintaine
  F:    Documentation/devicetree/bindings/net/can/
  F:    drivers/net/can/
  F:    include/linux/can/dev.h
 +F:    include/linux/can/led.h
 +F:    include/linux/can/rx-offload.h
  F:    include/linux/can/platform/
  F:    include/uapi/linux/can/error.h
  F:    include/uapi/linux/can/netlink.h
 +F:    include/uapi/linux/can/vxcan.h
  
  CAN NETWORK LAYER
  M:    Oliver Hartkopp <socketcan@hartkopp.net>
@@@ -3661,8 -3650,6 +3661,8 @@@ S:      Maintaine
  F:    Documentation/networking/can.rst
  F:    net/can/
  F:    include/linux/can/core.h
 +F:    include/linux/can/skb.h
 +F:    include/net/netns/can.h
  F:    include/uapi/linux/can.h
  F:    include/uapi/linux/can/bcm.h
  F:    include/uapi/linux/can/raw.h
@@@ -6078,7 -6065,7 +6078,7 @@@ M:      Florian Fainelli <f.fainelli@gmail.c
  M:    Heiner Kallweit <hkallweit1@gmail.com>
  L:    netdev@vger.kernel.org
  S:    Maintained
- F:    Documentation/ABI/testing/sysfs-bus-mdio
+ F:    Documentation/ABI/testing/sysfs-class-net-phydev
  F:    Documentation/devicetree/bindings/net/ethernet-phy.yaml
  F:    Documentation/devicetree/bindings/net/mdio*
  F:    Documentation/networking/phy.rst
@@@ -6357,7 -6344,7 +6357,7 @@@ FPGA MANAGER FRAMEWOR
  M:    Moritz Fischer <mdf@kernel.org>
  L:    linux-fpga@vger.kernel.org
  S:    Maintained
- T:    git git://git.kernel.org/pub/scm/linux/kernel/git/atull/linux-fpga.git
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git
  Q:    http://patchwork.kernel.org/project/linux-fpga/list/
  F:    Documentation/fpga/
  F:    Documentation/driver-api/fpga/
@@@ -6390,7 -6377,7 +6390,7 @@@ FRAMEBUFFER LAYE
  M:    Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
  L:    dri-devel@lists.freedesktop.org
  L:    linux-fbdev@vger.kernel.org
- T:    git git://github.com/bzolnier/linux.git
+ T:    git git://anongit.freedesktop.org/drm/drm-misc
  Q:    http://patchwork.kernel.org/project/linux-fbdev/list/
  S:    Maintained
  F:    Documentation/fb/
@@@ -6454,6 -6441,14 +6454,14 @@@ S:    Maintaine
  F:    drivers/perf/fsl_imx8_ddr_perf.c
  F:    Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
  
+ FREESCALE IMX I2C DRIVER
+ M:    Oleksij Rempel <o.rempel@pengutronix.de>
+ R:    Pengutronix Kernel Team <kernel@pengutronix.de>
+ L:    linux-i2c@vger.kernel.org
+ S:    Maintained
+ F:    drivers/i2c/busses/i2c-imx.c
+ F:    Documentation/devicetree/bindings/i2c/i2c-imx.txt
  FREESCALE IMX LPI2C DRIVER
  M:    Dong Aisheng <aisheng.dong@nxp.com>
  L:    linux-i2c@vger.kernel.org
@@@ -7465,7 -7460,7 +7473,7 @@@ F:      drivers/net/hyperv
  F:    drivers/scsi/storvsc_drv.c
  F:    drivers/uio/uio_hv_generic.c
  F:    drivers/video/fbdev/hyperv_fb.c
- F:    drivers/iommu/hyperv_iommu.c
+ F:    drivers/iommu/hyperv-iommu.c
  F:    net/vmw_vsock/hyperv_transport.c
  F:    include/clocksource/hyperv_timer.h
  F:    include/linux/hyperv.h
@@@ -8055,6 -8050,7 +8063,7 @@@ S:      Maintaine
  F:    drivers/video/fbdev/i810/
  
  INTEL ASoC DRIVERS
+ M:    Cezary Rojewski <cezary.rojewski@intel.com>
  M:    Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
  M:    Liam Girdwood <liam.r.girdwood@linux.intel.com>
  M:    Jie Yang <yang.jie@linux.intel.com>
@@@ -8076,6 -8072,13 +8085,13 @@@ T:    git git://git.code.sf.net/p/intel-sa
  S:    Supported
  F:    drivers/scsi/isci/
  
+ INTEL CPU family model numbers
+ M:    Tony Luck <tony.luck@intel.com>
+ M:    x86@kernel.org
+ L:    linux-kernel@vger.kernel.org
+ S:    Supported
+ F:    arch/x86/include/asm/intel-family.h
  INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
  M:    Jani Nikula <jani.nikula@linux.intel.com>
  M:    Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
@@@ -8427,7 -8430,6 +8443,6 @@@ L:      linux-xfs@vger.kernel.or
  L:    linux-fsdevel@vger.kernel.org
  T:    git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
  S:    Supported
- F:    fs/iomap.c
  F:    fs/iomap/
  F:    include/linux/iomap.h
  
@@@ -11156,7 -11158,6 +11171,7 @@@ S:   Maintaine
  W:    https://fedorahosted.org/dropwatch/
  F:    net/core/drop_monitor.c
  F:    include/uapi/linux/net_dropmon.h
 +F:    include/net/drop_monitor.h
  
  NETWORKING DRIVERS
  M:    "David S. Miller" <davem@davemloft.net>
@@@ -11336,6 -11337,7 +11351,6 @@@ F:   include/net/nfc
  F:    include/uapi/linux/nfc.h
  F:    drivers/nfc/
  F:    include/linux/platform_data/nfcmrvl.h
 -F:    include/linux/platform_data/nxp-nci.h
  F:    Documentation/devicetree/bindings/net/nfc/
  
  NFS, SUNRPC, AND LOCKD CLIENTS
@@@ -13230,7 -13232,7 +13245,7 @@@ M:   Manish Chopra <manishc@marvell.com
  M:    GR-Linux-NIC-Dev@marvell.com
  L:    netdev@vger.kernel.org
  S:    Supported
 -F:    drivers/net/ethernet/qlogic/qlge/
 +F:    drivers/staging/qlge/
  
  QM1D1B0004 MEDIA DRIVER
  M:    Akihiro Tsukada <tskd08@gmail.com>
@@@ -16097,7 -16099,7 +16112,7 @@@ S:   Maintaine
  F:    drivers/net/ethernet/ti/netcp*
  
  TI PCM3060 ASoC CODEC DRIVER
- M:    Kirill Marinushkin <kmarinushkin@birdec.tech>
+ M:    Kirill Marinushkin <kmarinushkin@birdec.com>
  L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/pcm3060.txt
index 94be97b7952ce12d62b82252c732ec59b09fbe4a,8dce4069472b6c02ed6d5d9cb58cce5a7b1ddeaf..4c790ffa1a73a4f5d07af39263f696d2a5f17155
@@@ -116,9 -116,6 +116,9 @@@ enum board_idx 
        BCM57508,
        BCM57504,
        BCM57502,
 +      BCM57508_NPAR,
 +      BCM57504_NPAR,
 +      BCM57502_NPAR,
        BCM58802,
        BCM58804,
        BCM58808,
@@@ -164,9 -161,6 +164,9 @@@ static const struct 
        [BCM57508] = { "Broadcom BCM57508 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" },
        [BCM57504] = { "Broadcom BCM57504 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" },
        [BCM57502] = { "Broadcom BCM57502 NetXtreme-E 10Gb/25Gb/50Gb Ethernet" },
 +      [BCM57508_NPAR] = { "Broadcom BCM57508 NetXtreme-E Ethernet Partition" },
 +      [BCM57504_NPAR] = { "Broadcom BCM57504 NetXtreme-E Ethernet Partition" },
 +      [BCM57502_NPAR] = { "Broadcom BCM57502 NetXtreme-E Ethernet Partition" },
        [BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" },
        [BCM58804] = { "Broadcom BCM58804 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
        [BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
@@@ -215,12 -209,6 +215,12 @@@ static const struct pci_device_id bnxt_
        { PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 },
        { PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 },
        { PCI_VDEVICE(BROADCOM, 0x1752), .driver_data = BCM57502 },
 +      { PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57508_NPAR },
 +      { PCI_VDEVICE(BROADCOM, 0x1801), .driver_data = BCM57504_NPAR },
 +      { PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57502_NPAR },
 +      { PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57508_NPAR },
 +      { PCI_VDEVICE(BROADCOM, 0x1804), .driver_data = BCM57504_NPAR },
 +      { PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57502_NPAR },
        { PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 },
        { PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 },
  #ifdef CONFIG_BNXT_SRIOV
@@@ -840,41 -828,16 +840,41 @@@ static inline int bnxt_alloc_rx_page(st
        return 0;
  }
  
 -static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 cp_cons,
 -                                 u32 agg_bufs)
 +static struct rx_agg_cmp *bnxt_get_agg(struct bnxt *bp,
 +                                     struct bnxt_cp_ring_info *cpr,
 +                                     u16 cp_cons, u16 curr)
 +{
 +      struct rx_agg_cmp *agg;
 +
 +      cp_cons = RING_CMP(ADV_RAW_CMP(cp_cons, curr));
 +      agg = (struct rx_agg_cmp *)
 +              &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
 +      return agg;
 +}
 +
 +static struct rx_agg_cmp *bnxt_get_tpa_agg_p5(struct bnxt *bp,
 +                                            struct bnxt_rx_ring_info *rxr,
 +                                            u16 agg_id, u16 curr)
 +{
 +      struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[agg_id];
 +
 +      return &tpa_info->agg_arr[curr];
 +}
 +
 +static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
 +                                 u16 start, u32 agg_bufs, bool tpa)
  {
        struct bnxt_napi *bnapi = cpr->bnapi;
        struct bnxt *bp = bnapi->bp;
        struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
        u16 prod = rxr->rx_agg_prod;
        u16 sw_prod = rxr->rx_sw_agg_prod;
 +      bool p5_tpa = false;
        u32 i;
  
 +      if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
 +              p5_tpa = true;
 +
        for (i = 0; i < agg_bufs; i++) {
                u16 cons;
                struct rx_agg_cmp *agg;
                struct rx_bd *prod_bd;
                struct page *page;
  
 -              agg = (struct rx_agg_cmp *)
 -                      &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
 +              if (p5_tpa)
 +                      agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, start + i);
 +              else
 +                      agg = bnxt_get_agg(bp, cpr, idx, start + i);
                cons = agg->rx_agg_cmp_opaque;
                __clear_bit(cons, rxr->rx_agg_bmap);
  
  
                prod = NEXT_RX_AGG(prod);
                sw_prod = NEXT_RX_AGG(sw_prod);
 -              cp_cons = NEXT_CMP(cp_cons);
        }
        rxr->rx_agg_prod = prod;
        rxr->rx_sw_agg_prod = sw_prod;
@@@ -926,7 -888,7 +926,7 @@@ static struct sk_buff *bnxt_rx_page_skb
  {
        unsigned int payload = offset_and_len >> 16;
        unsigned int len = offset_and_len & 0xffff;
 -      struct skb_frag_struct *frag;
 +      skb_frag_t *frag;
        struct page *page = data;
        u16 prod = rxr->rx_prod;
        struct sk_buff *skb;
  
        frag = &skb_shinfo(skb)->frags[0];
        skb_frag_size_sub(frag, payload);
 -      frag->page_offset += payload;
 +      skb_frag_off_add(frag, payload);
        skb->data_len -= payload;
        skb->tail += payload;
  
@@@ -995,19 -957,15 +995,19 @@@ static struct sk_buff *bnxt_rx_skb(stru
  
  static struct sk_buff *bnxt_rx_pages(struct bnxt *bp,
                                     struct bnxt_cp_ring_info *cpr,
 -                                   struct sk_buff *skb, u16 cp_cons,
 -                                   u32 agg_bufs)
 +                                   struct sk_buff *skb, u16 idx,
 +                                   u32 agg_bufs, bool tpa)
  {
        struct bnxt_napi *bnapi = cpr->bnapi;
        struct pci_dev *pdev = bp->pdev;
        struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
        u16 prod = rxr->rx_agg_prod;
 +      bool p5_tpa = false;
        u32 i;
  
 +      if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
 +              p5_tpa = true;
 +
        for (i = 0; i < agg_bufs; i++) {
                u16 cons, frag_len;
                struct rx_agg_cmp *agg;
                struct page *page;
                dma_addr_t mapping;
  
 -              agg = (struct rx_agg_cmp *)
 -                      &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
 +              if (p5_tpa)
 +                      agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, i);
 +              else
 +                      agg = bnxt_get_agg(bp, cpr, idx, i);
                cons = agg->rx_agg_cmp_opaque;
                frag_len = (le32_to_cpu(agg->rx_agg_cmp_len_flags_type) &
                            RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT;
                         * allocated already.
                         */
                        rxr->rx_agg_prod = prod;
 -                      bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs - i);
 +                      bnxt_reuse_rx_agg_bufs(cpr, idx, i, agg_bufs - i, tpa);
                        return NULL;
                }
  
                skb->truesize += PAGE_SIZE;
  
                prod = NEXT_RX_AGG(prod);
 -              cp_cons = NEXT_CMP(cp_cons);
        }
        rxr->rx_agg_prod = prod;
        return skb;
@@@ -1124,10 -1081,9 +1124,10 @@@ static int bnxt_discard_rx(struct bnxt 
        } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
                struct rx_tpa_end_cmp *tpa_end = cmp;
  
 -              agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
 -                          RX_TPA_END_CMP_AGG_BUFS) >>
 -                         RX_TPA_END_CMP_AGG_BUFS_SHIFT;
 +              if (bp->flags & BNXT_FLAG_CHIP_P5)
 +                      return 0;
 +
 +              agg_bufs = TPA_END_AGG_BUFS(tpa_end);
        }
  
        if (agg_bufs) {
@@@ -1164,60 -1120,26 +1164,60 @@@ static void bnxt_sched_reset(struct bnx
        rxr->rx_next_cons = 0xffff;
  }
  
 +static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
 +{
 +      struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
 +      u16 idx = agg_id & MAX_TPA_P5_MASK;
 +
 +      if (test_bit(idx, map->agg_idx_bmap))
 +              idx = find_first_zero_bit(map->agg_idx_bmap,
 +                                        BNXT_AGG_IDX_BMAP_SIZE);
 +      __set_bit(idx, map->agg_idx_bmap);
 +      map->agg_id_tbl[agg_id] = idx;
 +      return idx;
 +}
 +
 +static void bnxt_free_agg_idx(struct bnxt_rx_ring_info *rxr, u16 idx)
 +{
 +      struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
 +
 +      __clear_bit(idx, map->agg_idx_bmap);
 +}
 +
 +static u16 bnxt_lookup_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
 +{
 +      struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
 +
 +      return map->agg_id_tbl[agg_id];
 +}
 +
  static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
                           struct rx_tpa_start_cmp *tpa_start,
                           struct rx_tpa_start_cmp_ext *tpa_start1)
  {
 -      u8 agg_id = TPA_START_AGG_ID(tpa_start);
 -      u16 cons, prod;
 -      struct bnxt_tpa_info *tpa_info;
        struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf;
 +      struct bnxt_tpa_info *tpa_info;
 +      u16 cons, prod, agg_id;
        struct rx_bd *prod_bd;
        dma_addr_t mapping;
  
 +      if (bp->flags & BNXT_FLAG_CHIP_P5) {
 +              agg_id = TPA_START_AGG_ID_P5(tpa_start);
 +              agg_id = bnxt_alloc_agg_idx(rxr, agg_id);
 +      } else {
 +              agg_id = TPA_START_AGG_ID(tpa_start);
 +      }
        cons = tpa_start->rx_tpa_start_cmp_opaque;
        prod = rxr->rx_prod;
        cons_rx_buf = &rxr->rx_buf_ring[cons];
        prod_rx_buf = &rxr->rx_buf_ring[prod];
        tpa_info = &rxr->rx_tpa[agg_id];
  
 -      if (unlikely(cons != rxr->rx_next_cons)) {
 -              netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n",
 -                          cons, rxr->rx_next_cons);
 +      if (unlikely(cons != rxr->rx_next_cons ||
 +                   TPA_START_ERROR(tpa_start))) {
 +              netdev_warn(bp->dev, "TPA cons %x, expected cons %x, error code %x\n",
 +                          cons, rxr->rx_next_cons,
 +                          TPA_START_ERROR_CODE(tpa_start1));
                bnxt_sched_reset(bp, rxr);
                return;
        }
        tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2);
        tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata);
        tpa_info->hdr_info = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_hdr_info);
 +      tpa_info->agg_count = 0;
  
        rxr->rx_prod = NEXT_RX(prod);
        cons = NEXT_RX(cons);
        cons_rx_buf->data = NULL;
  }
  
 -static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 cp_cons,
 -                         u32 agg_bufs)
 +static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 idx, u32 agg_bufs)
  {
        if (agg_bufs)
 -              bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
 +              bnxt_reuse_rx_agg_bufs(cpr, idx, 0, agg_bufs, true);
  }
  
 +#ifdef CONFIG_INET
 +static void bnxt_gro_tunnel(struct sk_buff *skb, __be16 ip_proto)
 +{
 +      struct udphdr *uh = NULL;
 +
 +      if (ip_proto == htons(ETH_P_IP)) {
 +              struct iphdr *iph = (struct iphdr *)skb->data;
 +
 +              if (iph->protocol == IPPROTO_UDP)
 +                      uh = (struct udphdr *)(iph + 1);
 +      } else {
 +              struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
 +
 +              if (iph->nexthdr == IPPROTO_UDP)
 +                      uh = (struct udphdr *)(iph + 1);
 +      }
 +      if (uh) {
 +              if (uh->check)
 +                      skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
 +              else
 +                      skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
 +      }
 +}
 +#endif
 +
  static struct sk_buff *bnxt_gro_func_5731x(struct bnxt_tpa_info *tpa_info,
                                           int payload_off, int tcp_ts,
                                           struct sk_buff *skb)
        }
  
        if (inner_mac_off) { /* tunnel */
 -              struct udphdr *uh = NULL;
                __be16 proto = *((__be16 *)(skb->data + outer_ip_off -
                                            ETH_HLEN - 2));
  
 -              if (proto == htons(ETH_P_IP)) {
 -                      struct iphdr *iph = (struct iphdr *)skb->data;
 +              bnxt_gro_tunnel(skb, proto);
 +      }
 +#endif
 +      return skb;
 +}
  
 -                      if (iph->protocol == IPPROTO_UDP)
 -                              uh = (struct udphdr *)(iph + 1);
 -              } else {
 -                      struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
 +static struct sk_buff *bnxt_gro_func_5750x(struct bnxt_tpa_info *tpa_info,
 +                                         int payload_off, int tcp_ts,
 +                                         struct sk_buff *skb)
 +{
 +#ifdef CONFIG_INET
 +      u16 outer_ip_off, inner_ip_off, inner_mac_off;
 +      u32 hdr_info = tpa_info->hdr_info;
 +      int iphdr_len, nw_off;
  
 -                      if (iph->nexthdr == IPPROTO_UDP)
 -                              uh = (struct udphdr *)(iph + 1);
 -              }
 -              if (uh) {
 -                      if (uh->check)
 -                              skb_shinfo(skb)->gso_type |=
 -                                      SKB_GSO_UDP_TUNNEL_CSUM;
 -                      else
 -                              skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
 -              }
 +      inner_ip_off = BNXT_TPA_INNER_L3_OFF(hdr_info);
 +      inner_mac_off = BNXT_TPA_INNER_L2_OFF(hdr_info);
 +      outer_ip_off = BNXT_TPA_OUTER_L3_OFF(hdr_info);
 +
 +      nw_off = inner_ip_off - ETH_HLEN;
 +      skb_set_network_header(skb, nw_off);
 +      iphdr_len = (tpa_info->flags2 & RX_TPA_START_CMP_FLAGS2_IP_TYPE) ?
 +                   sizeof(struct ipv6hdr) : sizeof(struct iphdr);
 +      skb_set_transport_header(skb, nw_off + iphdr_len);
 +
 +      if (inner_mac_off) { /* tunnel */
 +              __be16 proto = *((__be16 *)(skb->data + outer_ip_off -
 +                                          ETH_HLEN - 2));
 +
 +              bnxt_gro_tunnel(skb, proto);
        }
  #endif
        return skb;
@@@ -1441,8 -1327,28 +1441,8 @@@ static struct sk_buff *bnxt_gro_func_57
                return NULL;
        }
  
 -      if (nw_off) { /* tunnel */
 -              struct udphdr *uh = NULL;
 -
 -              if (skb->protocol == htons(ETH_P_IP)) {
 -                      struct iphdr *iph = (struct iphdr *)skb->data;
 -
 -                      if (iph->protocol == IPPROTO_UDP)
 -                              uh = (struct udphdr *)(iph + 1);
 -              } else {
 -                      struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
 -
 -                      if (iph->nexthdr == IPPROTO_UDP)
 -                              uh = (struct udphdr *)(iph + 1);
 -              }
 -              if (uh) {
 -                      if (uh->check)
 -                              skb_shinfo(skb)->gso_type |=
 -                                      SKB_GSO_UDP_TUNNEL_CSUM;
 -                      else
 -                              skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
 -              }
 -      }
 +      if (nw_off) /* tunnel */
 +              bnxt_gro_tunnel(skb, skb->protocol);
  #endif
        return skb;
  }
@@@ -1465,10 -1371,9 +1465,10 @@@ static inline struct sk_buff *bnxt_gro_
        skb_shinfo(skb)->gso_size =
                le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len);
        skb_shinfo(skb)->gso_type = tpa_info->gso_type;
 -      payload_off = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
 -                     RX_TPA_END_CMP_PAYLOAD_OFFSET) >>
 -                    RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT;
 +      if (bp->flags & BNXT_FLAG_CHIP_P5)
 +              payload_off = TPA_END_PAYLOAD_OFF_P5(tpa_end1);
 +      else
 +              payload_off = TPA_END_PAYLOAD_OFF(tpa_end);
        skb = bp->gro_func(tpa_info, payload_off, TPA_END_GRO_TS(tpa_end), skb);
        if (likely(skb))
                tcp_gro_complete(skb);
@@@ -1496,14 -1401,14 +1496,14 @@@ static inline struct sk_buff *bnxt_tpa_
  {
        struct bnxt_napi *bnapi = cpr->bnapi;
        struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
 -      u8 agg_id = TPA_END_AGG_ID(tpa_end);
        u8 *data_ptr, agg_bufs;
 -      u16 cp_cons = RING_CMP(*raw_cons);
        unsigned int len;
        struct bnxt_tpa_info *tpa_info;
        dma_addr_t mapping;
        struct sk_buff *skb;
 +      u16 idx = 0, agg_id;
        void *data;
 +      bool gro;
  
        if (unlikely(bnapi->in_reset)) {
                int rc = bnxt_discard_rx(bp, cpr, raw_cons, tpa_end);
                return NULL;
        }
  
 -      tpa_info = &rxr->rx_tpa[agg_id];
 +      if (bp->flags & BNXT_FLAG_CHIP_P5) {
 +              agg_id = TPA_END_AGG_ID_P5(tpa_end);
 +              agg_id = bnxt_lookup_agg_idx(rxr, agg_id);
 +              agg_bufs = TPA_END_AGG_BUFS_P5(tpa_end1);
 +              tpa_info = &rxr->rx_tpa[agg_id];
 +              if (unlikely(agg_bufs != tpa_info->agg_count)) {
 +                      netdev_warn(bp->dev, "TPA end agg_buf %d != expected agg_bufs %d\n",
 +                                  agg_bufs, tpa_info->agg_count);
 +                      agg_bufs = tpa_info->agg_count;
 +              }
 +              tpa_info->agg_count = 0;
 +              *event |= BNXT_AGG_EVENT;
 +              bnxt_free_agg_idx(rxr, agg_id);
 +              idx = agg_id;
 +              gro = !!(bp->flags & BNXT_FLAG_GRO);
 +      } else {
 +              agg_id = TPA_END_AGG_ID(tpa_end);
 +              agg_bufs = TPA_END_AGG_BUFS(tpa_end);
 +              tpa_info = &rxr->rx_tpa[agg_id];
 +              idx = RING_CMP(*raw_cons);
 +              if (agg_bufs) {
 +                      if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons))
 +                              return ERR_PTR(-EBUSY);
 +
 +                      *event |= BNXT_AGG_EVENT;
 +                      idx = NEXT_CMP(idx);
 +              }
 +              gro = !!TPA_END_GRO(tpa_end);
 +      }
        data = tpa_info->data;
        data_ptr = tpa_info->data_ptr;
        prefetch(data_ptr);
        len = tpa_info->len;
        mapping = tpa_info->mapping;
  
 -      agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
 -                  RX_TPA_END_CMP_AGG_BUFS) >> RX_TPA_END_CMP_AGG_BUFS_SHIFT;
 -
 -      if (agg_bufs) {
 -              if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons))
 -                      return ERR_PTR(-EBUSY);
 -
 -              *event |= BNXT_AGG_EVENT;
 -              cp_cons = NEXT_CMP(cp_cons);
 -      }
 -
        if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
 -              bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
 +              bnxt_abort_tpa(cpr, idx, agg_bufs);
                if (agg_bufs > MAX_SKB_FRAGS)
                        netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
                                    agg_bufs, (int)MAX_SKB_FRAGS);
        if (len <= bp->rx_copy_thresh) {
                skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping);
                if (!skb) {
 -                      bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
 +                      bnxt_abort_tpa(cpr, idx, agg_bufs);
                        return NULL;
                }
        } else {
  
                new_data = __bnxt_alloc_rx_data(bp, &new_mapping, GFP_ATOMIC);
                if (!new_data) {
 -                      bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
 +                      bnxt_abort_tpa(cpr, idx, agg_bufs);
                        return NULL;
                }
  
  
                if (!skb) {
                        kfree(data);
 -                      bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
 +                      bnxt_abort_tpa(cpr, idx, agg_bufs);
                        return NULL;
                }
                skb_reserve(skb, bp->rx_offset);
        }
  
        if (agg_bufs) {
 -              skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs);
 +              skb = bnxt_rx_pages(bp, cpr, skb, idx, agg_bufs, true);
                if (!skb) {
                        /* Page reuse already handled by bnxt_rx_pages(). */
                        return NULL;
                        (tpa_info->flags2 & RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3;
        }
  
 -      if (TPA_END_GRO(tpa_end))
 +      if (gro)
                skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb);
  
        return skb;
  }
  
 +static void bnxt_tpa_agg(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 +                       struct rx_agg_cmp *rx_agg)
 +{
 +      u16 agg_id = TPA_AGG_AGG_ID(rx_agg);
 +      struct bnxt_tpa_info *tpa_info;
 +
 +      agg_id = bnxt_lookup_agg_idx(rxr, agg_id);
 +      tpa_info = &rxr->rx_tpa[agg_id];
 +      BUG_ON(tpa_info->agg_count >= MAX_SKB_FRAGS);
 +      tpa_info->agg_arr[tpa_info->agg_count++] = *rx_agg;
 +}
 +
  static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
                             struct sk_buff *skb)
  {
@@@ -1679,13 -1555,6 +1679,13 @@@ static int bnxt_rx_pkt(struct bnxt *bp
        rxcmp = (struct rx_cmp *)
                        &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
  
 +      cmp_type = RX_CMP_TYPE(rxcmp);
 +
 +      if (cmp_type == CMP_TYPE_RX_TPA_AGG_CMP) {
 +              bnxt_tpa_agg(bp, rxr, (struct rx_agg_cmp *)rxcmp);
 +              goto next_rx_no_prod_no_len;
 +      }
 +
        tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
        cp_cons = RING_CMP(tmp_raw_cons);
        rxcmp1 = (struct rx_cmp_ext *)
        if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
                return -EBUSY;
  
 -      cmp_type = RX_CMP_TYPE(rxcmp);
 -
        prod = rxr->rx_prod;
  
        if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) {
  
                bnxt_reuse_rx_data(rxr, cons, data);
                if (agg_bufs)
 -                      bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
 +                      bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, agg_bufs,
 +                                             false);
  
                rc = -EIO;
                if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
                bnxt_reuse_rx_data(rxr, cons, data);
                if (!skb) {
                        if (agg_bufs)
 -                              bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
 +                              bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0,
 +                                                     agg_bufs, false);
                        rc = -ENOMEM;
                        goto next_rx;
                }
        }
  
        if (agg_bufs) {
 -              skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs);
 +              skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs, false);
                if (!skb) {
                        rc = -ENOMEM;
                        goto next_rx;
@@@ -2152,9 -2021,9 +2152,9 @@@ static void __bnxt_poll_work_done(struc
        if (bnapi->events & BNXT_RX_EVENT) {
                struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
  
-               bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
                if (bnapi->events & BNXT_AGG_EVENT)
                        bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+               bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
        }
        bnapi->events = 0;
  }
@@@ -2456,11 -2325,10 +2456,11 @@@ static void bnxt_free_rx_skbs(struct bn
        max_agg_idx = bp->rx_agg_nr_pages * RX_DESC_CNT;
        for (i = 0; i < bp->rx_nr_rings; i++) {
                struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 +              struct bnxt_tpa_idx_map *map;
                int j;
  
                if (rxr->rx_tpa) {
 -                      for (j = 0; j < MAX_TPA; j++) {
 +                      for (j = 0; j < bp->max_tpa; j++) {
                                struct bnxt_tpa_info *tpa_info =
                                                        &rxr->rx_tpa[j];
                                u8 *data = tpa_info->data;
                        __free_page(rxr->rx_page);
                        rxr->rx_page = NULL;
                }
 +              map = rxr->rx_tpa_idx_map;
 +              if (map)
 +                      memset(map->agg_idx_bmap, 0, sizeof(map->agg_idx_bmap));
        }
  }
  
@@@ -2618,61 -2483,6 +2618,61 @@@ static int bnxt_alloc_ring(struct bnxt 
        return 0;
  }
  
 +static void bnxt_free_tpa_info(struct bnxt *bp)
 +{
 +      int i;
 +
 +      for (i = 0; i < bp->rx_nr_rings; i++) {
 +              struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 +
 +              kfree(rxr->rx_tpa_idx_map);
 +              rxr->rx_tpa_idx_map = NULL;
 +              if (rxr->rx_tpa) {
 +                      kfree(rxr->rx_tpa[0].agg_arr);
 +                      rxr->rx_tpa[0].agg_arr = NULL;
 +              }
 +              kfree(rxr->rx_tpa);
 +              rxr->rx_tpa = NULL;
 +      }
 +}
 +
 +static int bnxt_alloc_tpa_info(struct bnxt *bp)
 +{
 +      int i, j, total_aggs = 0;
 +
 +      bp->max_tpa = MAX_TPA;
 +      if (bp->flags & BNXT_FLAG_CHIP_P5) {
 +              if (!bp->max_tpa_v2)
 +                      return 0;
 +              bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
 +              total_aggs = bp->max_tpa * MAX_SKB_FRAGS;
 +      }
 +
 +      for (i = 0; i < bp->rx_nr_rings; i++) {
 +              struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 +              struct rx_agg_cmp *agg;
 +
 +              rxr->rx_tpa = kcalloc(bp->max_tpa, sizeof(struct bnxt_tpa_info),
 +                                    GFP_KERNEL);
 +              if (!rxr->rx_tpa)
 +                      return -ENOMEM;
 +
 +              if (!(bp->flags & BNXT_FLAG_CHIP_P5))
 +                      continue;
 +              agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL);
 +              rxr->rx_tpa[0].agg_arr = agg;
 +              if (!agg)
 +                      return -ENOMEM;
 +              for (j = 1; j < bp->max_tpa; j++)
 +                      rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS;
 +              rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
 +                                            GFP_KERNEL);
 +              if (!rxr->rx_tpa_idx_map)
 +                      return -ENOMEM;
 +      }
 +      return 0;
 +}
 +
  static void bnxt_free_rx_rings(struct bnxt *bp)
  {
        int i;
        if (!bp->rx_ring)
                return;
  
 +      bnxt_free_tpa_info(bp);
        for (i = 0; i < bp->rx_nr_rings; i++) {
                struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
                struct bnxt_ring_struct *ring;
                page_pool_destroy(rxr->page_pool);
                rxr->page_pool = NULL;
  
 -              kfree(rxr->rx_tpa);
 -              rxr->rx_tpa = NULL;
 -
                kfree(rxr->rx_agg_bmap);
                rxr->rx_agg_bmap = NULL;
  
@@@ -2727,7 -2539,7 +2727,7 @@@ static int bnxt_alloc_rx_page_pool(stru
  
  static int bnxt_alloc_rx_rings(struct bnxt *bp)
  {
 -      int i, rc, agg_rings = 0, tpa_rings = 0;
 +      int i, rc = 0, agg_rings = 0;
  
        if (!bp->rx_ring)
                return -ENOMEM;
        if (bp->flags & BNXT_FLAG_AGG_RINGS)
                agg_rings = 1;
  
 -      if (bp->flags & BNXT_FLAG_TPA)
 -              tpa_rings = 1;
 -
        for (i = 0; i < bp->rx_nr_rings; i++) {
                struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
                struct bnxt_ring_struct *ring;
                        rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL);
                        if (!rxr->rx_agg_bmap)
                                return -ENOMEM;
 -
 -                      if (tpa_rings) {
 -                              rxr->rx_tpa = kcalloc(MAX_TPA,
 -                                              sizeof(struct bnxt_tpa_info),
 -                                              GFP_KERNEL);
 -                              if (!rxr->rx_tpa)
 -                                      return -ENOMEM;
 -                      }
                }
        }
 -      return 0;
 +      if (bp->flags & BNXT_FLAG_TPA)
 +              rc = bnxt_alloc_tpa_info(bp);
 +      return rc;
  }
  
  static void bnxt_free_tx_rings(struct bnxt *bp)
@@@ -3132,7 -2953,7 +3132,7 @@@ static int bnxt_init_one_rx_ring(struc
                        u8 *data;
                        dma_addr_t mapping;
  
 -                      for (i = 0; i < MAX_TPA; i++) {
 +                      for (i = 0; i < bp->max_tpa; i++) {
                                data = __bnxt_alloc_rx_data(bp, &mapping,
                                                            GFP_KERNEL);
                                if (!data)
@@@ -3647,7 -3468,7 +3647,7 @@@ static void bnxt_free_ring_stats(struc
        if (!bp->bnapi)
                return;
  
 -      size = sizeof(struct ctx_hw_stats);
 +      size = bp->hw_ring_stats_size;
  
        for (i = 0; i < bp->cp_nr_rings; i++) {
                struct bnxt_napi *bnapi = bp->bnapi[i];
@@@ -3666,7 -3487,7 +3666,7 @@@ static int bnxt_alloc_stats(struct bnx
        u32 size, i;
        struct pci_dev *pdev = bp->pdev;
  
 -      size = sizeof(struct ctx_hw_stats);
 +      size = bp->hw_ring_stats_size;
  
        for (i = 0; i < bp->cp_nr_rings; i++) {
                struct bnxt_napi *bnapi = bp->bnapi[i];
@@@ -4593,7 -4414,6 +4593,7 @@@ static int bnxt_hwrm_clear_vnic_filter(
  static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
  {
        struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
 +      u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX;
        struct hwrm_vnic_tpa_cfg_input req = {0};
  
        if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
                        nsegs = (MAX_SKB_FRAGS - n) / n;
                }
  
 -              segs = ilog2(nsegs);
 +              if (bp->flags & BNXT_FLAG_CHIP_P5) {
 +                      segs = MAX_TPA_SEGS_P5;
 +                      max_aggs = bp->max_tpa;
 +              } else {
 +                      segs = ilog2(nsegs);
 +              }
                req.max_agg_segs = cpu_to_le16(segs);
 -              req.max_aggs = cpu_to_le16(VNIC_TPA_CFG_REQ_MAX_AGGS_MAX);
 +              req.max_aggs = cpu_to_le16(max_aggs);
  
                req.min_agg_len = cpu_to_le32(512);
        }
@@@ -5000,12 -4815,6 +5000,12 @@@ static int bnxt_hwrm_vnic_qcaps(struct 
                if (flags &
                    VNIC_QCAPS_RESP_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP)
                        bp->flags |= BNXT_FLAG_ROCE_MIRROR_CAP;
 +              bp->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported);
 +              if (bp->max_tpa_v2)
 +                      bp->hw_ring_stats_size =
 +                              sizeof(struct ctx_hw_stats_ext);
 +              else
 +                      bp->hw_ring_stats_size = sizeof(struct ctx_hw_stats);
        }
        mutex_unlock(&bp->hwrm_cmd_lock);
        return rc;
@@@ -5255,6 -5064,7 +5255,7 @@@ static void bnxt_set_db(struct bnxt *bp
  
  static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
  {
+       bool agg_rings = !!(bp->flags & BNXT_FLAG_AGG_RINGS);
        int i, rc = 0;
        u32 type;
  
                if (rc)
                        goto err_out;
                bnxt_set_db(bp, &rxr->rx_db, type, map_idx, ring->fw_ring_id);
-               bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
+               /* If we have agg rings, post agg buffers first. */
+               if (!agg_rings)
+                       bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
                bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id;
                if (bp->flags & BNXT_FLAG_CHIP_P5) {
                        struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
                }
        }
  
-       if (bp->flags & BNXT_FLAG_AGG_RINGS) {
+       if (agg_rings) {
                type = HWRM_RING_ALLOC_AGG;
                for (i = 0; i < bp->rx_nr_rings; i++) {
                        struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
                        bnxt_set_db(bp, &rxr->rx_agg_db, type, map_idx,
                                    ring->fw_ring_id);
                        bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+                       bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
                        bp->grp_info[grp_idx].agg_fw_ring_id = ring->fw_ring_id;
                }
        }
@@@ -6203,7 -6016,6 +6207,7 @@@ static int bnxt_hwrm_stat_ctx_alloc(str
  
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1);
  
 +      req.stats_dma_length = cpu_to_le16(bp->hw_ring_stats_size);
        req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
  
        mutex_lock(&bp->hwrm_cmd_lock);
@@@ -7208,19 -7020,29 +7212,29 @@@ static void bnxt_hwrm_clear_vnic_rss(st
                bnxt_hwrm_vnic_set_rss(bp, i, false);
  }
  
- static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
-                                   bool irq_re_init)
+ static void bnxt_clear_vnic(struct bnxt *bp)
  {
-       if (bp->vnic_info) {
-               bnxt_hwrm_clear_vnic_filter(bp);
+       if (!bp->vnic_info)
+               return;
+       bnxt_hwrm_clear_vnic_filter(bp);
+       if (!(bp->flags & BNXT_FLAG_CHIP_P5)) {
                /* clear all RSS setting before free vnic ctx */
                bnxt_hwrm_clear_vnic_rss(bp);
                bnxt_hwrm_vnic_ctx_free(bp);
-               /* before free the vnic, undo the vnic tpa settings */
-               if (bp->flags & BNXT_FLAG_TPA)
-                       bnxt_set_tpa(bp, false);
-               bnxt_hwrm_vnic_free(bp);
        }
+       /* before free the vnic, undo the vnic tpa settings */
+       if (bp->flags & BNXT_FLAG_TPA)
+               bnxt_set_tpa(bp, false);
+       bnxt_hwrm_vnic_free(bp);
+       if (bp->flags & BNXT_FLAG_CHIP_P5)
+               bnxt_hwrm_vnic_ctx_free(bp);
+ }
+ static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
+                                   bool irq_re_init)
+ {
+       bnxt_clear_vnic(bp);
        bnxt_hwrm_ring_free(bp, close_path);
        bnxt_hwrm_ring_grp_free(bp);
        if (irq_re_init) {
@@@ -9484,8 -9306,7 +9498,8 @@@ static int bnxt_set_features(struct net
        if (changes & BNXT_FLAG_TPA) {
                update_tpa = true;
                if ((bp->flags & BNXT_FLAG_TPA) == 0 ||
 -                  (flags & BNXT_FLAG_TPA) == 0)
 +                  (flags & BNXT_FLAG_TPA) == 0 ||
 +                  (bp->flags & BNXT_FLAG_CHIP_P5))
                        re_init = true;
        }
  
        if (flags != bp->flags) {
                u32 old_flags = bp->flags;
  
 -              bp->flags = flags;
 -
                if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
 +                      bp->flags = flags;
                        if (update_tpa)
                                bnxt_set_ring_params(bp);
                        return rc;
  
                if (re_init) {
                        bnxt_close_nic(bp, false, false);
 +                      bp->flags = flags;
                        if (update_tpa)
                                bnxt_set_ring_params(bp);
  
                        return bnxt_open_nic(bp, false, false);
                }
                if (update_tpa) {
 +                      bp->flags = flags;
                        rc = bnxt_set_tpa(bp,
                                          (flags & BNXT_FLAG_TPA) ?
                                          true : false);
@@@ -9908,68 -9728,6 +9922,68 @@@ static void bnxt_init_dflt_coal(struct 
        bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
  }
  
 +static int bnxt_fw_init_one_p1(struct bnxt *bp)
 +{
 +      int rc;
 +
 +      bp->fw_cap = 0;
 +      rc = bnxt_hwrm_ver_get(bp);
 +      if (rc)
 +              return rc;
 +
 +      if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
 +              rc = bnxt_alloc_kong_hwrm_resources(bp);
 +              if (rc)
 +                      bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL;
 +      }
 +
 +      if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
 +          bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
 +              rc = bnxt_alloc_hwrm_short_cmd_req(bp);
 +              if (rc)
 +                      return rc;
 +      }
 +      rc = bnxt_hwrm_func_reset(bp);
 +      if (rc)
 +              return -ENODEV;
 +
 +      bnxt_hwrm_fw_set_time(bp);
 +      return 0;
 +}
 +
 +static int bnxt_fw_init_one_p2(struct bnxt *bp)
 +{
 +      int rc;
 +
 +      /* Get the MAX capabilities for this function */
 +      rc = bnxt_hwrm_func_qcaps(bp);
 +      if (rc) {
 +              netdev_err(bp->dev, "hwrm query capability failure rc: %x\n",
 +                         rc);
 +              return -ENODEV;
 +      }
 +
 +      rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp);
 +      if (rc)
 +              netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
 +                          rc);
 +
 +      rc = bnxt_hwrm_func_drv_rgtr(bp);
 +      if (rc)
 +              return -ENODEV;
 +
 +      rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
 +      if (rc)
 +              return -ENODEV;
 +
 +      bnxt_hwrm_func_qcfg(bp);
 +      bnxt_hwrm_vnic_qcaps(bp);
 +      bnxt_hwrm_port_led_qcaps(bp);
 +      bnxt_ethtool_init(bp);
 +      bnxt_dcb_init(bp);
 +      return 0;
 +}
 +
  static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
  {
        int rc;
@@@ -10925,18 -10683,32 +10939,18 @@@ static int bnxt_init_one(struct pci_de
                goto init_err_pci_clean;
  
        mutex_init(&bp->hwrm_cmd_lock);
 -      rc = bnxt_hwrm_ver_get(bp);
 +
 +      rc = bnxt_fw_init_one_p1(bp);
        if (rc)
                goto init_err_pci_clean;
  
 -      if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
 -              rc = bnxt_alloc_kong_hwrm_resources(bp);
 -              if (rc)
 -                      bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL;
 -      }
 -
 -      if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
 -          bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
 -              rc = bnxt_alloc_hwrm_short_cmd_req(bp);
 -              if (rc)
 -                      goto init_err_pci_clean;
 -      }
 -
        if (BNXT_CHIP_P5(bp))
                bp->flags |= BNXT_FLAG_CHIP_P5;
  
 -      rc = bnxt_hwrm_func_reset(bp);
 +      rc = bnxt_fw_init_one_p2(bp);
        if (rc)
                goto init_err_pci_clean;
  
 -      bnxt_hwrm_fw_set_time(bp);
 -
        dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
                           NETIF_F_TSO | NETIF_F_TSO6 |
                           NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
                bp->gro_func = bnxt_gro_func_5730x;
                if (BNXT_CHIP_P4(bp))
                        bp->gro_func = bnxt_gro_func_5731x;
 +              else if (BNXT_CHIP_P5(bp))
 +                      bp->gro_func = bnxt_gro_func_5750x;
        }
        if (!BNXT_CHIP_P4_PLUS(bp))
                bp->flags |= BNXT_FLAG_DOUBLE_DB;
  
 -      rc = bnxt_hwrm_func_drv_rgtr(bp);
 -      if (rc)
 -              goto init_err_pci_clean;
 -
 -      rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
 -      if (rc)
 -              goto init_err_pci_clean;
 -
        bp->ulp_probe = bnxt_ulp_probe;
  
 -      rc = bnxt_hwrm_queue_qportcfg(bp);
 -      if (rc) {
 -              netdev_err(bp->dev, "hwrm query qportcfg failure rc: %x\n",
 -                         rc);
 -              rc = -1;
 -              goto init_err_pci_clean;
 -      }
 -      /* Get the MAX capabilities for this function */
 -      rc = bnxt_hwrm_func_qcaps(bp);
 -      if (rc) {
 -              netdev_err(bp->dev, "hwrm query capability failure rc: %x\n",
 -                         rc);
 -              rc = -1;
 -              goto init_err_pci_clean;
 -      }
 -
 -      rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp);
 -      if (rc)
 -              netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
 -                          rc);
 -
        rc = bnxt_init_mac_addr(bp);
        if (rc) {
                dev_err(&pdev->dev, "Unable to initialize mac address.\n");
                if (rc)
                        goto init_err_pci_clean;
        }
 -      bnxt_hwrm_func_qcfg(bp);
 -      bnxt_hwrm_vnic_qcaps(bp);
 -      bnxt_hwrm_port_led_qcaps(bp);
 -      bnxt_ethtool_init(bp);
 -      bnxt_dcb_init(bp);
  
        /* MTU range: 60 - FW defined max */
        dev->min_mtu = ETH_ZLEN;
@@@ -11130,7 -10934,8 +11144,7 @@@ shutdown_exit
  #ifdef CONFIG_PM_SLEEP
  static int bnxt_suspend(struct device *device)
  {
 -      struct pci_dev *pdev = to_pci_dev(device);
 -      struct net_device *dev = pci_get_drvdata(pdev);
 +      struct net_device *dev = dev_get_drvdata(device);
        struct bnxt *bp = netdev_priv(dev);
        int rc = 0;
  
  
  static int bnxt_resume(struct device *device)
  {
 -      struct pci_dev *pdev = to_pci_dev(device);
 -      struct net_device *dev = pci_get_drvdata(pdev);
 +      struct net_device *dev = dev_get_drvdata(device);
        struct bnxt *bp = netdev_priv(dev);
        int rc = 0;
  
index 3a3d8a9be5edda25e6ed224d466298bfd4a795d2,8445a0cce849c0c8781a14dc5f11874cba1a53a5..b624174c85942b7e5220daacfad68d89038b2340
@@@ -137,44 -137,7 +137,44 @@@ reset_coalesce
        return rc;
  }
  
 -#define BNXT_NUM_STATS        22
 +static const char * const bnxt_ring_stats_str[] = {
 +      "rx_ucast_packets",
 +      "rx_mcast_packets",
 +      "rx_bcast_packets",
 +      "rx_discards",
 +      "rx_drops",
 +      "rx_ucast_bytes",
 +      "rx_mcast_bytes",
 +      "rx_bcast_bytes",
 +      "tx_ucast_packets",
 +      "tx_mcast_packets",
 +      "tx_bcast_packets",
 +      "tx_discards",
 +      "tx_drops",
 +      "tx_ucast_bytes",
 +      "tx_mcast_bytes",
 +      "tx_bcast_bytes",
 +};
 +
 +static const char * const bnxt_ring_tpa_stats_str[] = {
 +      "tpa_packets",
 +      "tpa_bytes",
 +      "tpa_events",
 +      "tpa_aborts",
 +};
 +
 +static const char * const bnxt_ring_tpa2_stats_str[] = {
 +      "rx_tpa_eligible_pkt",
 +      "rx_tpa_eligible_bytes",
 +      "rx_tpa_pkt",
 +      "rx_tpa_bytes",
 +      "rx_tpa_errors",
 +};
 +
 +static const char * const bnxt_ring_sw_stats_str[] = {
 +      "rx_l4_csum_errors",
 +      "missed_irqs",
 +};
  
  #define BNXT_RX_STATS_ENTRY(counter)  \
        { BNXT_RX_STATS_OFFSET(counter), __stringify(counter) }
        BNXT_TX_STATS_EXT_COS_ENTRY(6),                         \
        BNXT_TX_STATS_EXT_COS_ENTRY(7)                          \
  
 +#define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(n)                        \
 +      BNXT_RX_STATS_EXT_ENTRY(rx_discard_bytes_cos##n),       \
 +      BNXT_RX_STATS_EXT_ENTRY(rx_discard_packets_cos##n)
 +
 +#define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES                         \
 +      BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(0),                         \
 +      BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(1),                         \
 +      BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(2),                         \
 +      BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(3),                         \
 +      BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(4),                         \
 +      BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(5),                         \
 +      BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(6),                         \
 +      BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(7)
 +
  #define BNXT_RX_STATS_PRI_ENTRY(counter, n)           \
        { BNXT_RX_STATS_EXT_OFFSET(counter##_cos0),     \
          __stringify(counter##_pri##n) }
@@@ -403,7 -352,6 +403,7 @@@ static const struct 
        BNXT_RX_STATS_EXT_ENTRY(rx_buffer_passed_threshold),
        BNXT_RX_STATS_EXT_ENTRY(rx_pcs_symbol_err),
        BNXT_RX_STATS_EXT_ENTRY(rx_corrected_bits),
 +      BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES,
  };
  
  static const struct {
@@@ -469,29 -417,9 +469,29 @@@ static const struct 
         ARRAY_SIZE(bnxt_tx_pkts_pri_arr))
  #define BNXT_NUM_PCIE_STATS ARRAY_SIZE(bnxt_pcie_stats_arr)
  
 +static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp)
 +{
 +      if (BNXT_SUPPORTS_TPA(bp)) {
 +              if (bp->max_tpa_v2)
 +                      return ARRAY_SIZE(bnxt_ring_tpa2_stats_str);
 +              return ARRAY_SIZE(bnxt_ring_tpa_stats_str);
 +      }
 +      return 0;
 +}
 +
 +static int bnxt_get_num_ring_stats(struct bnxt *bp)
 +{
 +      int num_stats;
 +
 +      num_stats = ARRAY_SIZE(bnxt_ring_stats_str) +
 +                  ARRAY_SIZE(bnxt_ring_sw_stats_str) +
 +                  bnxt_get_num_tpa_ring_stats(bp);
 +      return num_stats * bp->cp_nr_rings;
 +}
 +
  static int bnxt_get_num_stats(struct bnxt *bp)
  {
 -      int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
 +      int num_stats = bnxt_get_num_ring_stats(bp);
  
        num_stats += BNXT_NUM_SW_FUNC_STATS;
  
@@@ -532,11 -460,10 +532,11 @@@ static void bnxt_get_ethtool_stats(stru
  {
        u32 i, j = 0;
        struct bnxt *bp = netdev_priv(dev);
 -      u32 stat_fields = sizeof(struct ctx_hw_stats) / 8;
 +      u32 stat_fields = ARRAY_SIZE(bnxt_ring_stats_str) +
 +                        bnxt_get_num_tpa_ring_stats(bp);
  
        if (!bp->bnapi) {
 -              j += BNXT_NUM_STATS * bp->cp_nr_rings + BNXT_NUM_SW_FUNC_STATS;
 +              j += bnxt_get_num_ring_stats(bp) + BNXT_NUM_SW_FUNC_STATS;
                goto skip_ring_stats;
        }
  
@@@ -624,39 -551,56 +624,39 @@@ skip_ring_stats
  static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
  {
        struct bnxt *bp = netdev_priv(dev);
 -      u32 i;
 +      static const char * const *str;
 +      u32 i, j, num_str;
  
        switch (stringset) {
 -      /* The number of strings must match BNXT_NUM_STATS defined above. */
        case ETH_SS_STATS:
                for (i = 0; i < bp->cp_nr_rings; i++) {
 -                      sprintf(buf, "[%d]: rx_ucast_packets", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: rx_mcast_packets", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: rx_bcast_packets", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: rx_discards", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: rx_drops", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: rx_ucast_bytes", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: rx_mcast_bytes", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: rx_bcast_bytes", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tx_ucast_packets", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tx_mcast_packets", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tx_bcast_packets", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tx_discards", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tx_drops", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tx_ucast_bytes", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tx_mcast_bytes", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tx_bcast_bytes", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tpa_packets", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tpa_bytes", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tpa_events", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: tpa_aborts", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: rx_l4_csum_errors", i);
 -                      buf += ETH_GSTRING_LEN;
 -                      sprintf(buf, "[%d]: missed_irqs", i);
 -                      buf += ETH_GSTRING_LEN;
 +                      num_str = ARRAY_SIZE(bnxt_ring_stats_str);
 +                      for (j = 0; j < num_str; j++) {
 +                              sprintf(buf, "[%d]: %s", i,
 +                                      bnxt_ring_stats_str[j]);
 +                              buf += ETH_GSTRING_LEN;
 +                      }
 +                      if (!BNXT_SUPPORTS_TPA(bp))
 +                              goto skip_tpa_stats;
 +
 +                      if (bp->max_tpa_v2) {
 +                              num_str = ARRAY_SIZE(bnxt_ring_tpa2_stats_str);
 +                              str = bnxt_ring_tpa2_stats_str;
 +                      } else {
 +                              num_str = ARRAY_SIZE(bnxt_ring_tpa_stats_str);
 +                              str = bnxt_ring_tpa_stats_str;
 +                      }
 +                      for (j = 0; j < num_str; j++) {
 +                              sprintf(buf, "[%d]: %s", i, str[j]);
 +                              buf += ETH_GSTRING_LEN;
 +                      }
 +skip_tpa_stats:
 +                      num_str = ARRAY_SIZE(bnxt_ring_sw_stats_str);
 +                      for (j = 0; j < num_str; j++) {
 +                              sprintf(buf, "[%d]: %s", i,
 +                                      bnxt_ring_sw_stats_str[j]);
 +                              buf += ETH_GSTRING_LEN;
 +                      }
                }
                for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) {
                        strcpy(buf, bnxt_sw_func_stats[i].string);
@@@ -2072,21 -2016,19 +2072,19 @@@ static int bnxt_flash_package_from_file
        mutex_lock(&bp->hwrm_cmd_lock);
        hwrm_err = _hwrm_send_message(bp, &install, sizeof(install),
                                      INSTALL_PACKAGE_TIMEOUT);
-       if (hwrm_err)
-               goto flash_pkg_exit;
-       if (resp->error_code) {
+       if (hwrm_err) {
                u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err;
  
-               if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
+               if (resp->error_code && error_code ==
+                   NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
                        install.flags |= cpu_to_le16(
                               NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG);
                        hwrm_err = _hwrm_send_message(bp, &install,
                                                      sizeof(install),
                                                      INSTALL_PACKAGE_TIMEOUT);
-                       if (hwrm_err)
-                               goto flash_pkg_exit;
                }
+               if (hwrm_err)
+                       goto flash_pkg_exit;
        }
  
        if (resp->result) {
index dd99c55d9a88137773da5054d2a7e47ea857316a,d692251ee252c2e98bb46423002acda19829bf8f..ae6a47dd7dc9b842af6c4f89bf2255ecc200d280
@@@ -3236,8 -3236,10 +3236,10 @@@ static ssize_t blocked_fl_write(struct 
                return -ENOMEM;
  
        err = bitmap_parse_user(ubuf, count, t, adap->sge.egr_sz);
-       if (err)
+       if (err) {
+               kvfree(t);
                return err;
+       }
  
        bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz);
        kvfree(t);
@@@ -3529,6 -3531,7 +3531,6 @@@ int t4_setup_debugfs(struct adapter *ad
  {
        int i;
        u32 size = 0;
 -      struct dentry *de;
  
        static struct t4_debugfs_entry t4_debugfs_files[] = {
                { "cim_la", &cim_la_fops, 0400, 0 },
                }
        }
  
 -      de = debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap,
 -                                    &flash_debugfs_fops, adap->params.sf_size);
 +      debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap,
 +                               &flash_debugfs_fops, adap->params.sf_size);
        debugfs_create_bool("use_backdoor", 0600,
                            adap->debugfs_root, &adap->use_bd);
        debugfs_create_bool("trace_rss", 0600,
index 81a05ea38237e85903e19ce209b95e5d5298ac9c,cebd20f3128d4a6c6344c94b80496875e091d8d7..07efa2b40003916c315c98f95b5e1e3caec38244
@@@ -1485,7 -1485,7 +1485,7 @@@ static netdev_tx_t ibmvnic_xmit(struct 
  
                        memcpy(dst + cur,
                               page_address(skb_frag_page(frag)) +
 -                             frag->page_offset, skb_frag_size(frag));
 +                             skb_frag_off(frag), skb_frag_size(frag));
                        cur += skb_frag_size(frag);
                }
        } else {
                lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
                                               (u64)tx_buff->indir_dma,
                                               (u64)num_entries);
+               dma_unmap_single(dev, tx_buff->indir_dma,
+                                sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
        } else {
                tx_buff->num_entries = num_entries;
                lpar_rc = send_subcrq(adapter, handle_array[queue_num],
@@@ -2788,7 -2790,6 +2790,6 @@@ static int ibmvnic_complete_tx(struct i
        union sub_crq *next;
        int index;
        int i, j;
-       u8 *first;
  
  restart_loop:
        while (pending_scrq(adapter, scrq)) {
  
                                txbuff->data_dma[j] = 0;
                        }
-                       /* if sub_crq was sent indirectly */
-                       first = &txbuff->indir_arr[0].generic.first;
-                       if (*first == IBMVNIC_CRQ_CMD) {
-                               dma_unmap_single(dev, txbuff->indir_dma,
-                                                sizeof(txbuff->indir_arr),
-                                                DMA_TO_DEVICE);
-                               *first = 0;
-                       }
  
                        if (txbuff->last_frag) {
                                dev_kfree_skb_any(txbuff->skb);
index dc7b128c780e82b2ac26f0f20720c32923915bec,7882148abb4320ccf3a979b565fa93990366f991..17b7ae9f46ecdc704cde918339536b08c1ba0357
@@@ -1785,7 -1785,7 +1785,7 @@@ static bool ixgbe_is_non_eop(struct ixg
  static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
                            struct sk_buff *skb)
  {
 -      struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
 +      skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
        unsigned char *va;
        unsigned int pull_len;
  
  
        /* update all of the pointers */
        skb_frag_size_sub(frag, pull_len);
 -      frag->page_offset += pull_len;
 +      skb_frag_off_add(frag, pull_len);
        skb->data_len -= pull_len;
        skb->tail += pull_len;
  }
@@@ -1840,11 -1840,11 +1840,11 @@@ static void ixgbe_dma_sync_frag(struct 
                                              skb_headlen(skb),
                                              DMA_FROM_DEVICE);
        } else {
 -              struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
 +              skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
  
                dma_sync_single_range_for_cpu(rx_ring->dev,
                                              IXGBE_CB(skb)->dma,
 -                                            frag->page_offset,
 +                                            skb_frag_off(frag),
                                              skb_frag_size(frag),
                                              DMA_FROM_DEVICE);
        }
@@@ -7897,11 -7897,8 +7897,8 @@@ static void ixgbe_service_task(struct w
                return;
        }
        if (ixgbe_check_fw_error(adapter)) {
-               if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
-                       rtnl_lock();
+               if (!test_bit(__IXGBE_DOWN, &adapter->state))
                        unregister_netdev(adapter->netdev);
-                       rtnl_unlock();
-               }
                ixgbe_service_event_complete(adapter);
                return;
        }
@@@ -8186,7 -8183,7 +8183,7 @@@ static int ixgbe_tx_map(struct ixgbe_ri
        struct sk_buff *skb = first->skb;
        struct ixgbe_tx_buffer *tx_buffer;
        union ixgbe_adv_tx_desc *tx_desc;
 -      struct skb_frag_struct *frag;
 +      skb_frag_t *frag;
        dma_addr_t dma;
        unsigned int data_len, size;
        u32 tx_flags = first->tx_flags;
@@@ -8605,8 -8602,7 +8602,8 @@@ netdev_tx_t ixgbe_xmit_frame_ring(struc
         * otherwise try next time
         */
        for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
 -              count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
 +              count += TXD_USE_COUNT(skb_frag_size(
 +                                              &skb_shinfo(skb)->frags[f]));
  
        if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
                tx_ring->tx_stats.tx_busy++;
index 0807992090b850ce5dc2c0906ed71cb306cc3163,65bec19a438f15c5db9f0e0d28bd3ac53a7a0c83..8cf548c7ad9cdcf60c1ab85c482896b11e187e29
@@@ -184,8 -184,13 +184,13 @@@ static inline int mlx5e_get_max_num_cha
  
  struct mlx5e_tx_wqe {
        struct mlx5_wqe_ctrl_seg ctrl;
-       struct mlx5_wqe_eth_seg  eth;
-       struct mlx5_wqe_data_seg data[0];
+       union {
+               struct {
+                       struct mlx5_wqe_eth_seg  eth;
+                       struct mlx5_wqe_data_seg data[0];
+               };
+               u8 tls_progress_params_ctx[0];
+       };
  };
  
  struct mlx5e_rx_wqe_ll {
@@@ -351,7 -356,6 +356,7 @@@ enum 
        MLX5E_SQ_STATE_IPSEC,
        MLX5E_SQ_STATE_AM,
        MLX5E_SQ_STATE_TLS,
 +      MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
  };
  
  struct mlx5e_sq_wqe_info {
@@@ -476,6 -480,8 +481,6 @@@ struct mlx5e_xdp_mpwqe 
        struct mlx5e_tx_wqe *wqe;
        u8                   ds_count;
        u8                   pkt_count;
 -      u8                   max_ds_count;
 -      u8                   complete;
        u8                   inline_on;
  };
  
@@@ -1099,6 -1105,8 +1104,8 @@@ u32 mlx5e_ethtool_get_rxfh_key_size(str
  u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
  int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
                              struct ethtool_ts_info *info);
+ int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+                              struct ethtool_flash *flash);
  void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
                                  struct ethtool_pauseparam *pauseparam);
  int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
@@@ -1127,6 -1135,7 +1134,6 @@@ void mlx5e_build_rq_params(struct mlx5_
                           struct mlx5e_params *params);
  void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
                            u16 num_channels);
 -u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
  void mlx5e_rx_dim_work(struct work_struct *work);
  void mlx5e_tx_dim_work(struct work_struct *work);
  
index 6e54fefea410ec823bef32181a9bc80c8f963218,c7f86453c63843c619d6fdad7d078c0ec2acacd3..817c6ea7e3492e34bf289d965e7b9b4e42b374e9
@@@ -1,6 -1,7 +1,6 @@@
  /* SPDX-License-Identifier: GPL-2.0 */
  /* Copyright (c) 2019 Mellanox Technologies. */
  
 -#include <net/devlink.h>
  #include "reporter.h"
  #include "lib/eq.h"
  
@@@ -75,26 -76,21 +75,21 @@@ static int mlx5e_tx_reporter_err_cqe_re
        u8 state;
        int err;
  
-       if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
-               return 0;
        err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
        if (err) {
                netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
                           sq->sqn, err);
-               return err;
+               goto out;
        }
  
-       if (state != MLX5_SQC_STATE_ERR) {
-               netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
-               return -EINVAL;
-       }
+       if (state != MLX5_SQC_STATE_ERR)
+               goto out;
  
        mlx5e_tx_disable_queue(sq->txq);
  
        err = mlx5e_wait_for_sq_flush(sq);
        if (err)
-               return err;
+               goto out;
  
        /* At this point, no new packets will arrive from the stack as TXQ is
         * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
  
        err = mlx5e_sq_to_ready(sq, state);
        if (err)
-               return err;
+               goto out;
  
        mlx5e_reset_txqsq_cc_pc(sq);
        sq->stats->recover++;
+       clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
        mlx5e_activate_txqsq(sq);
  
        return 0;
+ out:
+       clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+       return err;
  }
  
  static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter,
                                 char *err_str,
                                 struct mlx5e_tx_err_ctx *err_ctx)
  {
 -      if (IS_ERR_OR_NULL(tx_reporter)) {
 +      if (!tx_reporter) {
                netdev_err(err_ctx->sq->channel->netdev, err_str);
                return err_ctx->recover(err_ctx->sq);
        }
@@@ -288,27 -288,23 +287,27 @@@ static const struct devlink_health_repo
  
  int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
  {
 +      struct devlink_health_reporter *reporter;
        struct mlx5_core_dev *mdev = priv->mdev;
        struct devlink *devlink = priv_to_devlink(mdev);
  
 -      priv->tx_reporter =
 +      reporter =
                devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
                                               MLX5_REPORTER_TX_GRACEFUL_PERIOD,
                                               true, priv);
 -      if (IS_ERR(priv->tx_reporter))
 +      if (IS_ERR(reporter)) {
                netdev_warn(priv->netdev,
                            "Failed to create tx reporter, err = %ld\n",
 -                          PTR_ERR(priv->tx_reporter));
 -      return IS_ERR_OR_NULL(priv->tx_reporter);
 +                          PTR_ERR(reporter));
 +              return PTR_ERR(reporter);
 +      }
 +      priv->tx_reporter = reporter;
 +      return 0;
  }
  
  void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
  {
 -      if (IS_ERR_OR_NULL(priv->tx_reporter))
 +      if (!priv->tx_reporter)
                return;
  
        devlink_health_reporter_destroy(priv->tx_reporter);
index f701e4f3c0760cc2fa33557483b15e212e8d4603,7f78c004d12f767b2d8f78b281261bebc9bc3557..2c4d1f41596847373e9ef79f4524412ad11977e4
@@@ -60,28 -60,24 +60,28 @@@ int mlx5e_open_xsk(struct mlx5e_priv *p
                   struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
                   struct mlx5e_channel *c)
  {
 -      struct mlx5e_channel_param cparam = {};
 +      struct mlx5e_channel_param *cparam;
        struct dim_cq_moder icocq_moder = {};
        int err;
  
        if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
                return -EINVAL;
  
 -      mlx5e_build_xsk_cparam(priv, params, xsk, &cparam);
 +      cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL);
 +      if (!cparam)
 +              return -ENOMEM;
  
 -      err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq);
 +      mlx5e_build_xsk_cparam(priv, params, xsk, cparam);
 +
 +      err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq);
        if (unlikely(err))
 -              return err;
 +              goto err_free_cparam;
  
 -      err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq);
 +      err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq);
        if (unlikely(err))
                goto err_close_rx_cq;
  
 -      err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq);
 +      err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq);
        if (unlikely(err))
                goto err_close_rq;
  
         * is disabled and then reenabled, but the SQ continues receiving CQEs
         * from the old UMEM.
         */
 -      err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true);
 +      err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true);
        if (unlikely(err))
                goto err_close_tx_cq;
  
 -      err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq);
 +      err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq);
        if (unlikely(err))
                goto err_close_sq;
  
        /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
         * triggered and NAPI to be called on the correct CPU.
         */
 -      err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq);
 +      err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq);
        if (unlikely(err))
                goto err_close_icocq;
  
 +      kvfree(cparam);
 +
        spin_lock_init(&c->xskicosq_lock);
  
        set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
@@@ -129,9 -123,6 +129,9 @@@ err_close_rq
  err_close_rx_cq:
        mlx5e_close_cq(&c->xskrq.cq);
  
 +err_free_cparam:
 +      kvfree(cparam);
 +
        return err;
  }
  
@@@ -152,7 -143,10 +152,10 @@@ void mlx5e_activate_xsk(struct mlx5e_ch
  {
        set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
        /* TX queue is created active. */
+       spin_lock(&c->xskicosq_lock);
        mlx5e_trigger_irq(&c->xskicosq);
+       spin_unlock(&c->xskicosq_lock);
  }
  
  void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
index 02530b50609c5a9c83257219208ca5769e137a73,20e628c907e53a3020c211575e059ab800c22e50..7347d673f448a4203283e1c6d2198cbcad34bfc7
@@@ -1081,6 -1081,14 +1081,14 @@@ int mlx5e_ethtool_set_link_ksettings(st
        link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) :
                mlx5e_port_speed2linkmodes(mdev, speed, !ext);
  
+       if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) &&
+           autoneg != AUTONEG_ENABLE) {
+               netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n",
+                          __func__);
+               err = -EINVAL;
+               goto out;
+       }
        link_modes = link_modes & eproto.cap;
        if (!link_modes) {
                netdev_err(priv->netdev, "%s: Not supported link mode(s) requested",
@@@ -1338,6 -1346,9 +1346,9 @@@ int mlx5e_ethtool_set_pauseparam(struc
        struct mlx5_core_dev *mdev = priv->mdev;
        int err;
  
+       if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+               return -EOPNOTSUPP;
        if (pauseparam->autoneg)
                return -EINVAL;
  
@@@ -1679,6 -1690,40 +1690,40 @@@ static int mlx5e_get_module_eeprom(stru
        return 0;
  }
  
+ int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+                              struct ethtool_flash *flash)
+ {
+       struct mlx5_core_dev *mdev = priv->mdev;
+       struct net_device *dev = priv->netdev;
+       const struct firmware *fw;
+       int err;
+       if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
+               return -EOPNOTSUPP;
+       err = request_firmware_direct(&fw, flash->data, &dev->dev);
+       if (err)
+               return err;
+       dev_hold(dev);
+       rtnl_unlock();
+       err = mlx5_firmware_flash(mdev, fw, NULL);
+       release_firmware(fw);
+       rtnl_lock();
+       dev_put(dev);
+       return err;
+ }
+ static int mlx5e_flash_device(struct net_device *dev,
+                             struct ethtool_flash *flash)
+ {
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       return mlx5e_ethtool_flash_device(priv, flash);
+ }
  static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
                                     bool is_rx_cq)
  {
@@@ -1913,27 -1958,21 +1958,27 @@@ static u32 mlx5e_get_priv_flags(struct 
        return priv->channels.params.pflags;
  }
  
 -#ifndef CONFIG_MLX5_EN_RXNFC
 -/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS
 - * otherwise this function will be defined from en_fs_ethtool.c
 - */
  static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
  {
        struct mlx5e_priv *priv = netdev_priv(dev);
  
 -      if (info->cmd != ETHTOOL_GRXRINGS)
 -              return -EOPNOTSUPP;
 -      /* ring_count is needed by ethtool -x */
 -      info->data = priv->channels.params.num_channels;
 -      return 0;
 +      /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part
 +       * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc,
 +       * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc
 +       * is compiled out via CONFIG_MLX5_EN_RXNFC=n.
 +       */
 +      if (info->cmd == ETHTOOL_GRXRINGS) {
 +              info->data = priv->channels.params.num_channels;
 +              return 0;
 +      }
 +
 +      return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs);
 +}
 +
 +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 +{
 +      return mlx5e_ethtool_set_rxnfc(dev, cmd);
  }
 -#endif
  
  const struct ethtool_ops mlx5e_ethtool_ops = {
        .get_drvinfo       = mlx5e_get_drvinfo,
        .get_rxfh          = mlx5e_get_rxfh,
        .set_rxfh          = mlx5e_set_rxfh,
        .get_rxnfc         = mlx5e_get_rxnfc,
 -#ifdef CONFIG_MLX5_EN_RXNFC
        .set_rxnfc         = mlx5e_set_rxnfc,
 -#endif
        .get_tunable       = mlx5e_get_tunable,
        .set_tunable       = mlx5e_set_tunable,
        .get_pauseparam    = mlx5e_get_pauseparam,
        .set_wol           = mlx5e_set_wol,
        .get_module_info   = mlx5e_get_module_info,
        .get_module_eeprom = mlx5e_get_module_eeprom,
+       .flash_device      = mlx5e_flash_device,
        .get_priv_flags    = mlx5e_get_priv_flags,
        .set_priv_flags    = mlx5e_set_priv_flags,
        .self_test         = mlx5e_self_test,
index 9a2fcef6e7f034896b3d5a3c8e5d265aa0d7032a,9d5f6e56188f825bbbe605f1bfb3af09604d5e42..0c8e847a9eeedd5c531db24b3a8dfc2b9575338f
@@@ -1130,8 -1130,6 +1130,8 @@@ static int mlx5e_alloc_txqsq(struct mlx
        sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
        sq->stop_room = MLX5E_SQ_STOP_ROOM;
        INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
 +      if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
 +              set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
        if (MLX5_IPSEC_DEV(c->priv->mdev))
                set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
        if (mlx5_accel_is_tls_device(c->priv->mdev)) {
@@@ -1323,7 -1321,6 +1323,6 @@@ err_free_txqsq
  void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
  {
        sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
-       clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
        set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
        netdev_tx_reset_queue(sq->txq);
        netif_tx_start_queue(sq->txq);
@@@ -2324,7 -2321,7 +2323,7 @@@ int mlx5e_open_channels(struct mlx5e_pr
                        goto err_close_channels;
        }
  
 -      if (!IS_ERR_OR_NULL(priv->tx_reporter))
 +      if (priv->tx_reporter)
                devlink_health_reporter_state_update(priv->tx_reporter,
                                                     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
  
@@@ -3425,7 -3422,7 +3424,7 @@@ out
  #ifdef CONFIG_MLX5_ESWITCH
  static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
                                     struct flow_cls_offload *cls_flower,
 -                                   int flags)
 +                                   unsigned long flags)
  {
        switch (cls_flower->command) {
        case FLOW_CLS_REPLACE:
  static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
                                   void *cb_priv)
  {
 +      unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
        struct mlx5e_priv *priv = cb_priv;
  
        switch (type) {
        case TC_SETUP_CLSFLOWER:
 -              return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS |
 -                                               MLX5E_TC_NIC_OFFLOAD);
 +              return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
        default:
                return -EOPNOTSUPP;
        }
@@@ -3643,7 -3640,7 +3642,7 @@@ static int set_feature_tc_num_filters(s
  {
        struct mlx5e_priv *priv = netdev_priv(netdev);
  
 -      if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) {
 +      if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
                netdev_err(netdev,
                           "Active offloaded tc filters, can't turn hw_tc_offload off\n");
                return -EINVAL;
@@@ -3784,10 -3781,9 +3783,10 @@@ static netdev_features_t mlx5e_fix_feat
                        netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
        }
        if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
 -              features &= ~NETIF_F_LRO;
 -              if (params->lro_en)
 +              if (features & NETIF_F_LRO) {
                        netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
 +                      features &= ~NETIF_F_LRO;
 +              }
        }
  
        if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
@@@ -3954,8 -3950,7 +3953,8 @@@ int mlx5e_hwstamp_set(struct mlx5e_pri
        case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
        case HWTSTAMP_FILTER_NTP_ALL:
                /* Disable CQE compression */
 -              netdev_warn(priv->netdev, "Disabling cqe compression");
 +              if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
 +                      netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
                err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
                if (err) {
                        netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
@@@ -4773,7 -4768,7 +4772,7 @@@ void mlx5e_build_nic_params(struct mlx5
        mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
  
        /* TX inline */
 -      params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
 +      mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
  
        /* RSS */
        mlx5e_build_rss_params(rss_params, params->num_channels);
index 5be3da62149948c069d9b057054ba96c338b17b2,00b2d4a86159f142e491278769fe1679ce5a2d20..c57f7533a6d0b74e2fede85392b2a5a1779b405e
@@@ -38,8 -38,6 +38,8 @@@
  #include <linux/mlx5/fs.h>
  #include <linux/mlx5/device.h>
  #include <linux/rhashtable.h>
 +#include <linux/refcount.h>
 +#include <linux/completion.h>
  #include <net/tc_act/tc_mirred.h>
  #include <net/tc_act/tc_vlan.h>
  #include <net/tc_act/tc_tunnel_key.h>
@@@ -67,20 -65,19 +67,20 @@@ struct mlx5_nic_flow_attr 
        struct mlx5_fc          *counter;
  };
  
 -#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
 +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
  
  enum {
 -      MLX5E_TC_FLOW_INGRESS   = MLX5E_TC_INGRESS,
 -      MLX5E_TC_FLOW_EGRESS    = MLX5E_TC_EGRESS,
 -      MLX5E_TC_FLOW_ESWITCH   = MLX5E_TC_ESW_OFFLOAD,
 -      MLX5E_TC_FLOW_NIC       = MLX5E_TC_NIC_OFFLOAD,
 -      MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE),
 -      MLX5E_TC_FLOW_HAIRPIN   = BIT(MLX5E_TC_FLOW_BASE + 1),
 -      MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2),
 -      MLX5E_TC_FLOW_SLOW        = BIT(MLX5E_TC_FLOW_BASE + 3),
 -      MLX5E_TC_FLOW_DUP         = BIT(MLX5E_TC_FLOW_BASE + 4),
 -      MLX5E_TC_FLOW_NOT_READY   = BIT(MLX5E_TC_FLOW_BASE + 5),
 +      MLX5E_TC_FLOW_FLAG_INGRESS      = MLX5E_TC_FLAG_INGRESS_BIT,
 +      MLX5E_TC_FLOW_FLAG_EGRESS       = MLX5E_TC_FLAG_EGRESS_BIT,
 +      MLX5E_TC_FLOW_FLAG_ESWITCH      = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
 +      MLX5E_TC_FLOW_FLAG_NIC          = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
 +      MLX5E_TC_FLOW_FLAG_OFFLOADED    = MLX5E_TC_FLOW_BASE,
 +      MLX5E_TC_FLOW_FLAG_HAIRPIN      = MLX5E_TC_FLOW_BASE + 1,
 +      MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS  = MLX5E_TC_FLOW_BASE + 2,
 +      MLX5E_TC_FLOW_FLAG_SLOW         = MLX5E_TC_FLOW_BASE + 3,
 +      MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
 +      MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
 +      MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
  };
  
  #define MLX5E_TC_MAX_SPLITS 1
   *        container_of(helper item, containing struct type, helper field[index])
   */
  struct encap_flow_item {
 +      struct mlx5e_encap_entry *e; /* attached encap instance */
        struct list_head list;
        int index;
  };
@@@ -112,7 -108,7 +112,7 @@@ struct mlx5e_tc_flow 
        struct rhash_head       node;
        struct mlx5e_priv       *priv;
        u64                     cookie;
 -      u16                     flags;
 +      unsigned long           flags;
        struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
        /* Flow can be associated with multiple encap IDs.
         * The number of encaps is bounded by the number of supported
         */
        struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
        struct mlx5e_tc_flow    *peer_flow;
 +      struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */
        struct list_head        mod_hdr; /* flows sharing the same mod hdr ID */
 +      struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
        struct list_head        hairpin; /* flows sharing the same hairpin */
        struct list_head        peer;    /* flows with peer flow */
        struct list_head        unready; /* flows not ready to be offloaded (e.g due to missing route) */
 +      refcount_t              refcnt;
 +      struct rcu_head         rcu_head;
        union {
                struct mlx5_esw_flow_attr esw_attr[0];
                struct mlx5_nic_flow_attr nic_attr[0];
@@@ -165,20 -157,12 +165,20 @@@ struct mlx5e_hairpin_entry 
        /* a node of a hash table which keeps all the  hairpin entries */
        struct hlist_node hairpin_hlist;
  
 +      /* protects flows list */
 +      spinlock_t flows_lock;
        /* flows sharing the same hairpin */
        struct list_head flows;
 +      /* hpe's that were not fully initialized when dead peer update event
 +       * function traversed them.
 +       */
 +      struct list_head dead_peer_wait_list;
  
        u16 peer_vhca_id;
        u8 prio;
        struct mlx5e_hairpin *hp;
 +      refcount_t refcnt;
 +      struct completion res_ready;
  };
  
  struct mod_hdr_key {
@@@ -190,93 -174,16 +190,93 @@@ struct mlx5e_mod_hdr_entry 
        /* a node of a hash table which keeps all the mod_hdr entries */
        struct hlist_node mod_hdr_hlist;
  
 +      /* protects flows list */
 +      spinlock_t flows_lock;
        /* flows sharing the same mod_hdr entry */
        struct list_head flows;
  
        struct mod_hdr_key key;
  
        u32 mod_hdr_id;
 +
 +      refcount_t refcnt;
 +      struct completion res_ready;
 +      int compl_result;
  };
  
  #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
  
 +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 +                            struct mlx5e_tc_flow *flow);
 +
 +static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
 +{
 +      if (!flow || !refcount_inc_not_zero(&flow->refcnt))
 +              return ERR_PTR(-EINVAL);
 +      return flow;
 +}
 +
 +static void mlx5e_flow_put(struct mlx5e_priv *priv,
 +                         struct mlx5e_tc_flow *flow)
 +{
 +      if (refcount_dec_and_test(&flow->refcnt)) {
 +              mlx5e_tc_del_flow(priv, flow);
 +              kfree_rcu(flow, rcu_head);
 +      }
 +}
 +
 +static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
 +{
 +      /* Complete all memory stores before setting bit. */
 +      smp_mb__before_atomic();
 +      set_bit(flag, &flow->flags);
 +}
 +
 +#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
 +
 +static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
 +                                   unsigned long flag)
 +{
 +      /* test_and_set_bit() provides all necessary barriers */
 +      return test_and_set_bit(flag, &flow->flags);
 +}
 +
 +#define flow_flag_test_and_set(flow, flag)                    \
 +      __flow_flag_test_and_set(flow,                          \
 +                               MLX5E_TC_FLOW_FLAG_##flag)
 +
 +static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
 +{
 +      /* Complete all memory stores before clearing bit. */
 +      smp_mb__before_atomic();
 +      clear_bit(flag, &flow->flags);
 +}
 +
 +#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
 +                                                    MLX5E_TC_FLOW_FLAG_##flag)
 +
 +static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
 +{
 +      bool ret = test_bit(flag, &flow->flags);
 +
 +      /* Read fields of flow structure only after checking flags. */
 +      smp_mb__after_atomic();
 +      return ret;
 +}
 +
 +#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
 +                                                  MLX5E_TC_FLOW_FLAG_##flag)
 +
 +static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
 +{
 +      return flow_flag_test(flow, ESWITCH);
 +}
 +
 +static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
 +{
 +      return flow_flag_test(flow, OFFLOADED);
 +}
 +
  static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
  {
        return jhash(key->actions,
@@@ -292,62 -199,15 +292,62 @@@ static inline int cmp_mod_hdr_info(stru
        return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
  }
  
 +static struct mod_hdr_tbl *
 +get_mod_hdr_table(struct mlx5e_priv *priv, int namespace)
 +{
 +      struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 +
 +      return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr :
 +              &priv->fs.tc.mod_hdr;
 +}
 +
 +static struct mlx5e_mod_hdr_entry *
 +mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key)
 +{
 +      struct mlx5e_mod_hdr_entry *mh, *found = NULL;
 +
 +      hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
 +              if (!cmp_mod_hdr_info(&mh->key, key)) {
 +                      refcount_inc(&mh->refcnt);
 +                      found = mh;
 +                      break;
 +              }
 +      }
 +
 +      return found;
 +}
 +
 +static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
 +                            struct mlx5e_mod_hdr_entry *mh,
 +                            int namespace)
 +{
 +      struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace);
 +
 +      if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
 +              return;
 +      hash_del(&mh->mod_hdr_hlist);
 +      mutex_unlock(&tbl->lock);
 +
 +      WARN_ON(!list_empty(&mh->flows));
 +      if (mh->compl_result > 0)
 +              mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
 +
 +      kfree(mh);
 +}
 +
 +static int get_flow_name_space(struct mlx5e_tc_flow *flow)
 +{
 +      return mlx5e_is_eswitch_flow(flow) ?
 +              MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
 +}
  static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
                                struct mlx5e_tc_flow *flow,
                                struct mlx5e_tc_flow_parse_attr *parse_attr)
  {
 -      struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        int num_actions, actions_size, namespace, err;
        struct mlx5e_mod_hdr_entry *mh;
 +      struct mod_hdr_tbl *tbl;
        struct mod_hdr_key key;
 -      bool found = false;
        u32 hash_key;
  
        num_actions  = parse_attr->num_mod_hdr_actions;
  
        hash_key = hash_mod_hdr_info(&key);
  
 -      if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 -              namespace = MLX5_FLOW_NAMESPACE_FDB;
 -              hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
 -                                     mod_hdr_hlist, hash_key) {
 -                      if (!cmp_mod_hdr_info(&mh->key, &key)) {
 -                              found = true;
 -                              break;
 -                      }
 -              }
 -      } else {
 -              namespace = MLX5_FLOW_NAMESPACE_KERNEL;
 -              hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
 -                                     mod_hdr_hlist, hash_key) {
 -                      if (!cmp_mod_hdr_info(&mh->key, &key)) {
 -                              found = true;
 -                              break;
 -                      }
 -              }
 -      }
 +      namespace = get_flow_name_space(flow);
 +      tbl = get_mod_hdr_table(priv, namespace);
 +
 +      mutex_lock(&tbl->lock);
 +      mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
 +      if (mh) {
 +              mutex_unlock(&tbl->lock);
 +              wait_for_completion(&mh->res_ready);
  
 -      if (found)
 +              if (mh->compl_result < 0) {
 +                      err = -EREMOTEIO;
 +                      goto attach_header_err;
 +              }
                goto attach_flow;
 +      }
  
        mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
 -      if (!mh)
 +      if (!mh) {
 +              mutex_unlock(&tbl->lock);
                return -ENOMEM;
 +      }
  
        mh->key.actions = (void *)mh + sizeof(*mh);
        memcpy(mh->key.actions, key.actions, actions_size);
        mh->key.num_actions = num_actions;
 +      spin_lock_init(&mh->flows_lock);
        INIT_LIST_HEAD(&mh->flows);
 +      refcount_set(&mh->refcnt, 1);
 +      init_completion(&mh->res_ready);
 +
 +      hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
 +      mutex_unlock(&tbl->lock);
  
        err = mlx5_modify_header_alloc(priv->mdev, namespace,
                                       mh->key.num_actions,
                                       mh->key.actions,
                                       &mh->mod_hdr_id);
 -      if (err)
 -              goto out_err;
 -
 -      if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 -              hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
 -      else
 -              hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
 +      if (err) {
 +              mh->compl_result = err;
 +              goto alloc_header_err;
 +      }
 +      mh->compl_result = 1;
 +      complete_all(&mh->res_ready);
  
  attach_flow:
 +      flow->mh = mh;
 +      spin_lock(&mh->flows_lock);
        list_add(&flow->mod_hdr, &mh->flows);
 -      if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 +      spin_unlock(&mh->flows_lock);
 +      if (mlx5e_is_eswitch_flow(flow))
                flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
        else
                flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
  
        return 0;
  
 -out_err:
 -      kfree(mh);
 +alloc_header_err:
 +      complete_all(&mh->res_ready);
 +attach_header_err:
 +      mlx5e_mod_hdr_put(priv, mh, namespace);
        return err;
  }
  
  static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
                                 struct mlx5e_tc_flow *flow)
  {
 -      struct list_head *next = flow->mod_hdr.next;
 +      /* flow wasn't fully initialized */
 +      if (!flow->mh)
 +              return;
  
 +      spin_lock(&flow->mh->flows_lock);
        list_del(&flow->mod_hdr);
 +      spin_unlock(&flow->mh->flows_lock);
  
 -      if (list_empty(next)) {
 -              struct mlx5e_mod_hdr_entry *mh;
 -
 -              mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
 -
 -              mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
 -              hash_del(&mh->mod_hdr_hlist);
 -              kfree(mh);
 -      }
 +      mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow));
 +      flow->mh = NULL;
  }
  
  static
@@@ -697,35 -555,13 +697,35 @@@ static struct mlx5e_hairpin_entry *mlx5
  
        hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
                               hairpin_hlist, hash_key) {
 -              if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio)
 +              if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
 +                      refcount_inc(&hpe->refcnt);
                        return hpe;
 +              }
        }
  
        return NULL;
  }
  
 +static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
 +                            struct mlx5e_hairpin_entry *hpe)
 +{
 +      /* no more hairpin flows for us, release the hairpin pair */
 +      if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
 +              return;
 +      hash_del(&hpe->hairpin_hlist);
 +      mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 +
 +      if (!IS_ERR_OR_NULL(hpe->hp)) {
 +              netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
 +                         dev_name(hpe->hp->pair->peer_mdev->device));
 +
 +              mlx5e_hairpin_destroy(hpe->hp);
 +      }
 +
 +      WARN_ON(!list_empty(&hpe->flows));
 +      kfree(hpe);
 +}
 +
  #define UNKNOWN_MATCH_PRIO 8
  
  static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
@@@ -791,37 -627,17 +791,37 @@@ static int mlx5e_hairpin_flow_add(struc
                                     extack);
        if (err)
                return err;
 +
 +      mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
        hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
 -      if (hpe)
 +      if (hpe) {
 +              mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 +              wait_for_completion(&hpe->res_ready);
 +
 +              if (IS_ERR(hpe->hp)) {
 +                      err = -EREMOTEIO;
 +                      goto out_err;
 +              }
                goto attach_flow;
 +      }
  
        hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
 -      if (!hpe)
 +      if (!hpe) {
 +              mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
                return -ENOMEM;
 +      }
  
 +      spin_lock_init(&hpe->flows_lock);
        INIT_LIST_HEAD(&hpe->flows);
 +      INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
        hpe->peer_vhca_id = peer_id;
        hpe->prio = match_prio;
 +      refcount_set(&hpe->refcnt, 1);
 +      init_completion(&hpe->res_ready);
 +
 +      hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
 +               hash_hairpin_info(peer_id, match_prio));
 +      mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
  
        params.log_data_size = 15;
        params.log_data_size = min_t(u8, params.log_data_size,
        params.num_channels = link_speed64;
  
        hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
 +      hpe->hp = hp;
 +      complete_all(&hpe->res_ready);
        if (IS_ERR(hp)) {
                err = PTR_ERR(hp);
 -              goto create_hairpin_err;
 +              goto out_err;
        }
  
        netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
                   dev_name(hp->pair->peer_mdev->device),
                   hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
  
 -      hpe->hp = hp;
 -      hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
 -               hash_hairpin_info(peer_id, match_prio));
 -
  attach_flow:
        if (hpe->hp->num_channels > 1) {
 -              flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS;
 +              flow_flag_set(flow, HAIRPIN_RSS);
                flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
        } else {
                flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
        }
 +
 +      flow->hpe = hpe;
 +      spin_lock(&hpe->flows_lock);
        list_add(&flow->hairpin, &hpe->flows);
 +      spin_unlock(&hpe->flows_lock);
  
        return 0;
  
 -create_hairpin_err:
 -      kfree(hpe);
 +out_err:
 +      mlx5e_hairpin_put(priv, hpe);
        return err;
  }
  
  static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
                                   struct mlx5e_tc_flow *flow)
  {
 -      struct list_head *next = flow->hairpin.next;
 +      /* flow wasn't fully initialized */
 +      if (!flow->hpe)
 +              return;
  
 +      spin_lock(&flow->hpe->flows_lock);
        list_del(&flow->hairpin);
 +      spin_unlock(&flow->hpe->flows_lock);
  
 -      /* no more hairpin flows for us, release the hairpin pair */
 -      if (list_empty(next)) {
 -              struct mlx5e_hairpin_entry *hpe;
 -
 -              hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
 -
 -              netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
 -                         dev_name(hpe->hp->pair->peer_mdev->device));
 -
 -              mlx5e_hairpin_destroy(hpe->hp);
 -              hash_del(&hpe->hairpin_hlist);
 -              kfree(hpe);
 -      }
 +      mlx5e_hairpin_put(priv, flow->hpe);
 +      flow->hpe = NULL;
  }
  
  static int
@@@ -906,17 -727,18 +906,17 @@@ mlx5e_tc_add_nic_flow(struct mlx5e_pri
                .flags    = FLOW_ACT_NO_APPEND,
        };
        struct mlx5_fc *counter = NULL;
 -      bool table_created = false;
        int err, dest_ix = 0;
  
        flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
        flow_context->flow_tag = attr->flow_tag;
  
 -      if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
 +      if (flow_flag_test(flow, HAIRPIN)) {
                err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
 -              if (err) {
 -                      goto err_add_hairpin_flow;
 -              }
 -              if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
 +              if (err)
 +                      return err;
 +
 +              if (flow_flag_test(flow, HAIRPIN_RSS)) {
                        dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
                        dest[dest_ix].ft = attr->hairpin_ft;
                } else {
  
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
                counter = mlx5_fc_create(dev, true);
 -              if (IS_ERR(counter)) {
 -                      err = PTR_ERR(counter);
 -                      goto err_fc_create;
 -              }
 +              if (IS_ERR(counter))
 +                      return PTR_ERR(counter);
 +
                dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
                dest[dest_ix].counter_id = mlx5_fc_id(counter);
                dest_ix++;
                flow_act.modify_id = attr->mod_hdr_id;
                kfree(parse_attr->mod_hdr_actions);
                if (err)
 -                      goto err_create_mod_hdr_id;
 +                      return err;
        }
  
 +      mutex_lock(&priv->fs.tc.t_lock);
        if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
                int tc_grp_size, tc_tbl_size;
                u32 max_flow_counter;
                                                            MLX5E_TC_TABLE_NUM_GROUPS,
                                                            MLX5E_TC_FT_LEVEL, 0);
                if (IS_ERR(priv->fs.tc.t)) {
 +                      mutex_unlock(&priv->fs.tc.t_lock);
                        NL_SET_ERR_MSG_MOD(extack,
                                           "Failed to create tc offload table\n");
                        netdev_err(priv->netdev,
                                   "Failed to create tc offload table\n");
 -                      err = PTR_ERR(priv->fs.tc.t);
 -                      goto err_create_ft;
 +                      return PTR_ERR(priv->fs.tc.t);
                }
 -
 -              table_created = true;
        }
  
        if (attr->match_level != MLX5_MATCH_NONE)
  
        flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
                                            &flow_act, dest, dest_ix);
 +      mutex_unlock(&priv->fs.tc.t_lock);
  
 -      if (IS_ERR(flow->rule[0])) {
 -              err = PTR_ERR(flow->rule[0]);
 -              goto err_add_rule;
 -      }
 +      if (IS_ERR(flow->rule[0]))
 +              return PTR_ERR(flow->rule[0]);
  
        return 0;
 -
 -err_add_rule:
 -      if (table_created) {
 -              mlx5_destroy_flow_table(priv->fs.tc.t);
 -              priv->fs.tc.t = NULL;
 -      }
 -err_create_ft:
 -      if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 -              mlx5e_detach_mod_hdr(priv, flow);
 -err_create_mod_hdr_id:
 -      mlx5_fc_destroy(dev, counter);
 -err_fc_create:
 -      if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
 -              mlx5e_hairpin_flow_del(priv, flow);
 -err_add_hairpin_flow:
 -      return err;
  }
  
  static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
        struct mlx5_fc *counter = NULL;
  
        counter = attr->counter;
 -      mlx5_del_flow_rules(flow->rule[0]);
 +      if (!IS_ERR_OR_NULL(flow->rule[0]))
 +              mlx5_del_flow_rules(flow->rule[0]);
        mlx5_fc_destroy(priv->mdev, counter);
  
 -      if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)  && priv->fs.tc.t) {
 +      mutex_lock(&priv->fs.tc.t_lock);
 +      if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
                mlx5_destroy_flow_table(priv->fs.tc.t);
                priv->fs.tc.t = NULL;
        }
 +      mutex_unlock(&priv->fs.tc.t_lock);
  
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
                mlx5e_detach_mod_hdr(priv, flow);
  
 -      if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
 +      if (flow_flag_test(flow, HAIRPIN))
                mlx5e_hairpin_flow_del(priv, flow);
  }
  
@@@ -1047,6 -885,7 +1047,6 @@@ mlx5e_tc_offload_fdb_rules(struct mlx5_
                }
        }
  
 -      flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
        return rule;
  }
  
@@@ -1055,7 -894,7 +1055,7 @@@ mlx5e_tc_unoffload_fdb_rules(struct mlx
                             struct mlx5e_tc_flow *flow,
                           struct mlx5_esw_flow_attr *attr)
  {
 -      flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
 +      flow_flag_clear(flow, OFFLOADED);
  
        if (attr->split_count)
                mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
@@@ -1078,7 -917,7 +1078,7 @@@ mlx5e_tc_offload_to_slow_path(struct ml
  
        rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
        if (!IS_ERR(rule))
 -              flow->flags |= MLX5E_TC_FLOW_SLOW;
 +              flow_flag_set(flow, SLOW);
  
        return rule;
  }
@@@ -1093,26 -932,7 +1093,26 @@@ mlx5e_tc_unoffload_from_slow_path(struc
        slow_attr->split_count = 0;
        slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
        mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
 -      flow->flags &= ~MLX5E_TC_FLOW_SLOW;
 +      flow_flag_clear(flow, SLOW);
 +}
 +
 +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
 + * function.
 + */
 +static void unready_flow_add(struct mlx5e_tc_flow *flow,
 +                           struct list_head *unready_flows)
 +{
 +      flow_flag_set(flow, NOT_READY);
 +      list_add_tail(&flow->unready, unready_flows);
 +}
 +
 +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
 + * function.
 + */
 +static void unready_flow_del(struct mlx5e_tc_flow *flow)
 +{
 +      list_del(&flow->unready);
 +      flow_flag_clear(flow, NOT_READY);
  }
  
  static void add_unready_flow(struct mlx5e_tc_flow *flow)
        rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
        uplink_priv = &rpriv->uplink_priv;
  
 -      flow->flags |= MLX5E_TC_FLOW_NOT_READY;
 -      list_add_tail(&flow->unready, &uplink_priv->unready_flows);
 +      mutex_lock(&uplink_priv->unready_flows_lock);
 +      unready_flow_add(flow, &uplink_priv->unready_flows);
 +      mutex_unlock(&uplink_priv->unready_flows_lock);
  }
  
  static void remove_unready_flow(struct mlx5e_tc_flow *flow)
  {
 -      list_del(&flow->unready);
 -      flow->flags &= ~MLX5E_TC_FLOW_NOT_READY;
 +      struct mlx5_rep_uplink_priv *uplink_priv;
 +      struct mlx5e_rep_priv *rpriv;
 +      struct mlx5_eswitch *esw;
 +
 +      esw = flow->priv->mdev->priv.eswitch;
 +      rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 +      uplink_priv = &rpriv->uplink_priv;
 +
 +      mutex_lock(&uplink_priv->unready_flows_lock);
 +      unready_flow_del(flow);
 +      mutex_unlock(&uplink_priv->unready_flows_lock);
  }
  
  static int
@@@ -1170,12 -980,14 +1170,12 @@@ mlx5e_tc_add_fdb_flow(struct mlx5e_pri
  
        if (attr->chain > max_chain) {
                NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
 -              err = -EOPNOTSUPP;
 -              goto err_max_prio_chain;
 +              return -EOPNOTSUPP;
        }
  
        if (attr->prio > max_prio) {
                NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
 -              err = -EOPNOTSUPP;
 -              goto err_max_prio_chain;
 +              return -EOPNOTSUPP;
        }
  
        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
                err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
                                         extack, &encap_dev, &encap_valid);
                if (err)
 -                      goto err_attach_encap;
 +                      return err;
  
                out_priv = netdev_priv(encap_dev);
                rpriv = out_priv->ppriv;
  
        err = mlx5_eswitch_add_vlan_action(esw, attr);
        if (err)
 -              goto err_add_vlan;
 +              return err;
  
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
                kfree(parse_attr->mod_hdr_actions);
                if (err)
 -                      goto err_mod_hdr;
 +                      return err;
        }
  
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
                counter = mlx5_fc_create(attr->counter_dev, true);
 -              if (IS_ERR(counter)) {
 -                      err = PTR_ERR(counter);
 -                      goto err_create_counter;
 -              }
 +              if (IS_ERR(counter))
 +                      return PTR_ERR(counter);
  
                attr->counter = counter;
        }
                flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
        }
  
 -      if (IS_ERR(flow->rule[0])) {
 -              err = PTR_ERR(flow->rule[0]);
 -              goto err_add_rule;
 -      }
 +      if (IS_ERR(flow->rule[0]))
 +              return PTR_ERR(flow->rule[0]);
 +      else
 +              flow_flag_set(flow, OFFLOADED);
  
        return 0;
 -
 -err_add_rule:
 -      mlx5_fc_destroy(attr->counter_dev, counter);
 -err_create_counter:
 -      if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 -              mlx5e_detach_mod_hdr(priv, flow);
 -err_mod_hdr:
 -      mlx5_eswitch_del_vlan_action(esw, attr);
 -err_add_vlan:
 -      for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
 -              if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
 -                      mlx5e_detach_encap(priv, flow, out_index);
 -err_attach_encap:
 -err_max_prio_chain:
 -      return err;
  }
  
  static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
@@@ -1259,14 -1088,14 +1259,14 @@@ static void mlx5e_tc_del_fdb_flow(struc
        struct mlx5_esw_flow_attr slow_attr;
        int out_index;
  
 -      if (flow->flags & MLX5E_TC_FLOW_NOT_READY) {
 +      if (flow_flag_test(flow, NOT_READY)) {
                remove_unready_flow(flow);
                kvfree(attr->parse_attr);
                return;
        }
  
 -      if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 -              if (flow->flags & MLX5E_TC_FLOW_SLOW)
 +      if (mlx5e_is_offloaded_flow(flow)) {
 +              if (flow_flag_test(flow, SLOW))
                        mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
                else
                        mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
@@@ -1294,9 -1123,9 +1294,9 @@@ void mlx5e_tc_encap_flows_add(struct ml
  {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_esw_flow_attr slow_attr, *esw_attr;
 +      struct encap_flow_item *efi, *tmp;
        struct mlx5_flow_handle *rule;
        struct mlx5_flow_spec *spec;
 -      struct encap_flow_item *efi;
        struct mlx5e_tc_flow *flow;
        int err;
  
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(priv);
  
 -      list_for_each_entry(efi, &e->flows, list) {
 +      list_for_each_entry_safe(efi, tmp, &e->flows, list) {
                bool all_flow_encaps_valid = true;
                int i;
  
                flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
 +              if (IS_ERR(mlx5e_flow_get(flow)))
 +                      continue;
 +
                esw_attr = flow->esw_attr;
                spec = &esw_attr->parse_attr->spec;
  
                }
                /* Do not offload flows with unresolved neighbors */
                if (!all_flow_encaps_valid)
 -                      continue;
 +                      goto loop_cont;
                /* update from slow path rule to encap rule */
                rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
                if (IS_ERR(rule)) {
                        err = PTR_ERR(rule);
                        mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
                                       err);
 -                      continue;
 +                      goto loop_cont;
                }
  
                mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
 -              flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */
                flow->rule[0] = rule;
 +              /* was unset when slow path rule removed */
 +              flow_flag_set(flow, OFFLOADED);
 +
 +loop_cont:
 +              mlx5e_flow_put(priv, flow);
        }
  }
  
@@@ -1365,17 -1187,14 +1365,17 @@@ void mlx5e_tc_encap_flows_del(struct ml
  {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_esw_flow_attr slow_attr;
 +      struct encap_flow_item *efi, *tmp;
        struct mlx5_flow_handle *rule;
        struct mlx5_flow_spec *spec;
 -      struct encap_flow_item *efi;
        struct mlx5e_tc_flow *flow;
        int err;
  
 -      list_for_each_entry(efi, &e->flows, list) {
 +      list_for_each_entry_safe(efi, tmp, &e->flows, list) {
                flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
 +              if (IS_ERR(mlx5e_flow_get(flow)))
 +                      continue;
 +
                spec = &flow->esw_attr->parse_attr->spec;
  
                /* update from encap rule to slow path rule */
                        err = PTR_ERR(rule);
                        mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
                                       err);
 -                      continue;
 +                      goto loop_cont;
                }
  
                mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
 -              flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */
                flow->rule[0] = rule;
 +              /* was unset when fast path rule removed */
 +              flow_flag_set(flow, OFFLOADED);
 +
 +loop_cont:
 +              mlx5e_flow_put(priv, flow);
        }
  
        /* we know that the encap is valid */
  
  static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
  {
 -      if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 +      if (mlx5e_is_eswitch_flow(flow))
                return flow->esw_attr->counter;
        else
                return flow->nic_attr->counter;
@@@ -1433,32 -1248,21 +1433,32 @@@ void mlx5e_tc_update_neigh_used_value(s
                return;
  
        list_for_each_entry(e, &nhe->encap_list, encap_list) {
 -              struct encap_flow_item *efi;
 -              if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
 +              struct encap_flow_item *efi, *tmp;
 +
 +              if (!(e->flags & MLX5_ENCAP_ENTRY_VALID) ||
 +                  !mlx5e_encap_take(e))
                        continue;
 -              list_for_each_entry(efi, &e->flows, list) {
 +
 +              list_for_each_entry_safe(efi, tmp, &e->flows, list) {
                        flow = container_of(efi, struct mlx5e_tc_flow,
                                            encaps[efi->index]);
 -                      if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 +                      if (IS_ERR(mlx5e_flow_get(flow)))
 +                              continue;
 +
 +                      if (mlx5e_is_offloaded_flow(flow)) {
                                counter = mlx5e_tc_get_counter(flow);
                                lastuse = mlx5_fc_query_lastuse(counter);
                                if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
 +                                      mlx5e_flow_put(netdev_priv(e->out_dev), flow);
                                        neigh_used = true;
                                        break;
                                }
                        }
 +
 +                      mlx5e_flow_put(netdev_priv(e->out_dev), flow);
                }
 +
 +              mlx5e_encap_put(netdev_priv(e->out_dev), e);
                if (neigh_used)
                        break;
        }
        }
  }
  
 -static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 -                             struct mlx5e_tc_flow *flow, int out_index)
 +static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
  {
 -      struct list_head *next = flow->encaps[out_index].list.next;
 +      WARN_ON(!list_empty(&e->flows));
 +      mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
  
 -      list_del(&flow->encaps[out_index].list);
 -      if (list_empty(next)) {
 -              struct mlx5e_encap_entry *e;
 +      if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 +              mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
  
 -              e = list_entry(next, struct mlx5e_encap_entry, flows);
 -              mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 +      kfree(e->encap_header);
 +      kfree(e);
 +}
  
 -              if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 -                      mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
 +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
 +{
 +      struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
  
 -              hash_del_rcu(&e->encap_hlist);
 -              kfree(e->encap_header);
 -              kfree(e);
 +      if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
 +              return;
 +      hash_del_rcu(&e->encap_hlist);
 +      mutex_unlock(&esw->offloads.encap_tbl_lock);
 +
 +      mlx5e_encap_dealloc(priv, e);
 +}
 +
 +static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 +                             struct mlx5e_tc_flow *flow, int out_index)
 +{
 +      struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
 +      struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 +
 +      /* flow wasn't fully initialized */
 +      if (!e)
 +              return;
 +
 +      mutex_lock(&esw->offloads.encap_tbl_lock);
 +      list_del(&flow->encaps[out_index].list);
 +      flow->encaps[out_index].e = NULL;
 +      if (!refcount_dec_and_test(&e->refcnt)) {
 +              mutex_unlock(&esw->offloads.encap_tbl_lock);
 +              return;
        }
 +      hash_del_rcu(&e->encap_hlist);
 +      mutex_unlock(&esw->offloads.encap_tbl_lock);
 +
 +      mlx5e_encap_dealloc(priv, e);
  }
  
  static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
  {
        struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
  
 -      if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
 -          !(flow->flags & MLX5E_TC_FLOW_DUP))
 +      if (!flow_flag_test(flow, ESWITCH) ||
 +          !flow_flag_test(flow, DUP))
                return;
  
        mutex_lock(&esw->offloads.peer_mutex);
        list_del(&flow->peer);
        mutex_unlock(&esw->offloads.peer_mutex);
  
 -      flow->flags &= ~MLX5E_TC_FLOW_DUP;
 +      flow_flag_clear(flow, DUP);
  
        mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
        kvfree(flow->peer_flow);
@@@ -1561,7 -1339,7 +1561,7 @@@ static void mlx5e_tc_del_fdb_peer_flow(
  static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                              struct mlx5e_tc_flow *flow)
  {
 -      if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 +      if (mlx5e_is_eswitch_flow(flow)) {
                mlx5e_tc_del_fdb_peer_flow(flow);
                mlx5e_tc_del_fdb_flow(priv, flow);
        } else {
@@@ -1702,7 -1480,7 +1702,7 @@@ static int __parse_cls_flower(struct ml
                              struct mlx5_flow_spec *spec,
                              struct flow_cls_offload *f,
                              struct net_device *filter_dev,
-                             u8 *match_level, u8 *tunnel_match_level)
+                             u8 *inner_match_level, u8 *outer_match_level)
  {
        struct netlink_ext_ack *extack = f->common.extack;
        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
        struct flow_dissector *dissector = rule->match.dissector;
        u16 addr_type = 0;
        u8 ip_proto = 0;
+       u8 *match_level;
  
-       *match_level = MLX5_MATCH_NONE;
+       match_level = outer_match_level;
  
        if (dissector->used_keys &
            ~(BIT(FLOW_DISSECTOR_KEY_META) |
        }
  
        if (mlx5e_get_tc_tun(filter_dev)) {
-               if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level))
+               if (parse_tunnel_attr(priv, spec, f, filter_dev,
+                                     outer_match_level))
                        return -EOPNOTSUPP;
  
-               /* In decap flow, header pointers should point to the inner
+               /* At this point, header pointers should point to the inner
                 * headers, outer header were already set by parse_tunnel_attr
                 */
+               match_level = inner_match_level;
                headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
                                                       spec);
                headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
@@@ -2053,37 -1834,41 +2056,43 @@@ static int parse_cls_flower(struct mlx5
                            struct flow_cls_offload *f,
                            struct net_device *filter_dev)
  {
+       u8 inner_match_level, outer_match_level, non_tunnel_match_level;
        struct netlink_ext_ack *extack = f->common.extack;
        struct mlx5_core_dev *dev = priv->mdev;
        struct mlx5_eswitch *esw = dev->priv.eswitch;
        struct mlx5e_rep_priv *rpriv = priv->ppriv;
-       u8 match_level, tunnel_match_level = MLX5_MATCH_NONE;
        struct mlx5_eswitch_rep *rep;
 +      bool is_eswitch_flow;
        int err;
  
-       err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level);
+       inner_match_level = MLX5_MATCH_NONE;
+       outer_match_level = MLX5_MATCH_NONE;
+       err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level,
+                                &outer_match_level);
+       non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
+                                outer_match_level : inner_match_level;
  
 -      if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
 +      is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
 +      if (!err && is_eswitch_flow) {
                rep = rpriv->rep;
                if (rep->vport != MLX5_VPORT_UPLINK &&
                    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
-                   esw->offloads.inline_mode < match_level)) {
+                   esw->offloads.inline_mode < non_tunnel_match_level)) {
                        NL_SET_ERR_MSG_MOD(extack,
                                           "Flow is not offloaded due to min inline setting");
                        netdev_warn(priv->netdev,
                                    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
-                                   match_level, esw->offloads.inline_mode);
+                                   non_tunnel_match_level, esw->offloads.inline_mode);
                        return -EOPNOTSUPP;
                }
        }
  
 -      if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 +      if (is_eswitch_flow) {
-               flow->esw_attr->match_level = match_level;
-               flow->esw_attr->tunnel_match_level = tunnel_match_level;
+               flow->esw_attr->inner_match_level = inner_match_level;
+               flow->esw_attr->outer_match_level = outer_match_level;
        } else {
-               flow->nic_attr->match_level = match_level;
+               flow->nic_attr->match_level = non_tunnel_match_level;
        }
  
        return err;
@@@ -2600,15 -2385,14 +2609,15 @@@ static bool actions_match_supported(str
  {
        u32 actions;
  
 -      if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 +      if (mlx5e_is_eswitch_flow(flow))
                actions = flow->esw_attr->action;
        else
                actions = flow->nic_attr->action;
  
 -      if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
 +      if (flow_flag_test(flow, EGRESS) &&
            !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
 -            (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)))
 +            (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
 +            (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
                return false;
  
        if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
@@@ -2758,7 -2542,7 +2767,7 @@@ static int parse_tc_nic_actions(struct 
                        if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
                            same_hw_devs(priv, netdev_priv(peer_dev))) {
                                parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
 -                              flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
 +                              flow_flag_set(flow, HAIRPIN);
                                action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
                                          MLX5_FLOW_CONTEXT_ACTION_COUNT;
                        } else {
@@@ -2845,31 -2629,6 +2854,31 @@@ static bool is_merged_eswitch_dev(struc
  
  
  
 +bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
 +{
 +      return refcount_inc_not_zero(&e->refcnt);
 +}
 +
 +static struct mlx5e_encap_entry *
 +mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
 +              uintptr_t hash_key)
 +{
 +      struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 +      struct mlx5e_encap_entry *e;
 +      struct encap_key e_key;
 +
 +      hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
 +                                 encap_hlist, hash_key) {
 +              e_key.ip_tun_key = &e->tun_info->key;
 +              e_key.tc_tunnel = e->tunnel;
 +              if (!cmp_encap_info(&e_key, key) &&
 +                  mlx5e_encap_take(e))
 +                      return e;
 +      }
 +
 +      return NULL;
 +}
 +
  static int mlx5e_attach_encap(struct mlx5e_priv *priv,
                              struct mlx5e_tc_flow *flow,
                              struct net_device *mirred_dev,
        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
        const struct ip_tunnel_info *tun_info;
 -      struct encap_key key, e_key;
 +      struct encap_key key;
        struct mlx5e_encap_entry *e;
        unsigned short family;
        uintptr_t hash_key;
 -      bool found = false;
        int err = 0;
  
        parse_attr = attr->parse_attr;
  
        hash_key = hash_encap_info(&key);
  
 -      hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
 -                                 encap_hlist, hash_key) {
 -              e_key.ip_tun_key = &e->tun_info->key;
 -              e_key.tc_tunnel = e->tunnel;
 -              if (!cmp_encap_info(&e_key, &key)) {
 -                      found = true;
 -                      break;
 -              }
 -      }
 +      mutex_lock(&esw->offloads.encap_tbl_lock);
 +      e = mlx5e_encap_get(priv, &key, hash_key);
  
        /* must verify if encap is valid or not */
 -      if (found)
 +      if (e) {
 +              mutex_unlock(&esw->offloads.encap_tbl_lock);
 +              wait_for_completion(&e->res_ready);
 +
 +              /* Protect against concurrent neigh update. */
 +              mutex_lock(&esw->offloads.encap_tbl_lock);
 +              if (e->compl_result) {
 +                      err = -EREMOTEIO;
 +                      goto out_err;
 +              }
                goto attach_flow;
 +      }
  
        e = kzalloc(sizeof(*e), GFP_KERNEL);
 -      if (!e)
 -              return -ENOMEM;
 +      if (!e) {
 +              err = -ENOMEM;
 +              goto out_err;
 +      }
 +
 +      refcount_set(&e->refcnt, 1);
 +      init_completion(&e->res_ready);
  
        e->tun_info = tun_info;
        err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
 -      if (err)
 +      if (err) {
 +              kfree(e);
 +              e = NULL;
                goto out_err;
 +      }
  
        INIT_LIST_HEAD(&e->flows);
 +      hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
 +      mutex_unlock(&esw->offloads.encap_tbl_lock);
  
        if (family == AF_INET)
                err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
        else if (family == AF_INET6)
                err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
  
 -      if (err)
 +      /* Protect against concurrent neigh update. */
 +      mutex_lock(&esw->offloads.encap_tbl_lock);
 +      complete_all(&e->res_ready);
 +      if (err) {
 +              e->compl_result = err;
                goto out_err;
 -
 -      hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
 +      }
  
  attach_flow:
 +      flow->encaps[out_index].e = e;
        list_add(&flow->encaps[out_index].list, &e->flows);
        flow->encaps[out_index].index = out_index;
        *encap_dev = e->out_dev;
        } else {
                *encap_valid = false;
        }
 +      mutex_unlock(&esw->offloads.encap_tbl_lock);
  
        return err;
  
  out_err:
 -      kfree(e);
 +      mutex_unlock(&esw->offloads.encap_tbl_lock);
 +      if (e)
 +              mlx5e_encap_put(priv, e);
        return err;
  }
  
@@@ -3150,16 -2890,12 +3159,16 @@@ static int parse_tc_fdb_actions(struct 
                        if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
                                struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
                                struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
 -                              struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
 +                              struct net_device *uplink_upper;
  
 +                              rcu_read_lock();
 +                              uplink_upper =
 +                                      netdev_master_upper_dev_get_rcu(uplink_dev);
                                if (uplink_upper &&
                                    netif_is_lag_master(uplink_upper) &&
                                    uplink_upper == out_dev)
                                        out_dev = uplink_dev;
 +                              rcu_read_unlock();
  
                                if (is_vlan_dev(out_dev)) {
                                        err = add_vlan_push_action(priv, attr,
        return 0;
  }
  
 -static void get_flags(int flags, u16 *flow_flags)
 +static void get_flags(int flags, unsigned long *flow_flags)
  {
 -      u16 __flow_flags = 0;
 +      unsigned long __flow_flags = 0;
  
 -      if (flags & MLX5E_TC_INGRESS)
 -              __flow_flags |= MLX5E_TC_FLOW_INGRESS;
 -      if (flags & MLX5E_TC_EGRESS)
 -              __flow_flags |= MLX5E_TC_FLOW_EGRESS;
 +      if (flags & MLX5_TC_FLAG(INGRESS))
 +              __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
 +      if (flags & MLX5_TC_FLAG(EGRESS))
 +              __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
  
 -      if (flags & MLX5E_TC_ESW_OFFLOAD)
 -              __flow_flags |= MLX5E_TC_FLOW_ESWITCH;
 -      if (flags & MLX5E_TC_NIC_OFFLOAD)
 -              __flow_flags |= MLX5E_TC_FLOW_NIC;
 +      if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
 +              __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
 +      if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
 +              __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
  
        *flow_flags = __flow_flags;
  }
@@@ -3354,13 -3090,12 +3363,13 @@@ static const struct rhashtable_params t
        .automatic_shrinking = true,
  };
  
 -static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags)
 +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
 +                                  unsigned long flags)
  {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5e_rep_priv *uplink_rpriv;
  
 -      if (flags & MLX5E_TC_ESW_OFFLOAD) {
 +      if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
                uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
                return &uplink_rpriv->uplink_priv.tc_ht;
        } else /* NIC offload */
@@@ -3371,7 -3106,7 +3380,7 @@@ static bool is_peer_flow_needed(struct 
  {
        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
        bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
 -                            flow->flags & MLX5E_TC_FLOW_INGRESS;
 +              flow_flag_test(flow, INGRESS);
        bool act_is_encap = !!(attr->action &
                               MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
        bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
  
  static int
  mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
 -               struct flow_cls_offload *f, u16 flow_flags,
 +               struct flow_cls_offload *f, unsigned long flow_flags,
                 struct mlx5e_tc_flow_parse_attr **__parse_attr,
                 struct mlx5e_tc_flow **__flow)
  {
        struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5e_tc_flow *flow;
 -      int err;
 +      int out_index, err;
  
        flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
        parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
        flow->cookie = f->cookie;
        flow->flags = flow_flags;
        flow->priv = priv;
 +      for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
 +              INIT_LIST_HEAD(&flow->encaps[out_index].list);
 +      INIT_LIST_HEAD(&flow->mod_hdr);
 +      INIT_LIST_HEAD(&flow->hairpin);
 +      refcount_set(&flow->refcnt, 1);
  
        *__flow = flow;
        *__parse_attr = parse_attr;
@@@ -3437,7 -3167,7 +3446,7 @@@ mlx5e_flow_esw_attr_init(struct mlx5_es
  
        esw_attr->parse_attr = parse_attr;
        esw_attr->chain = f->common.chain_index;
-       esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
+       esw_attr->prio = f->common.prio;
  
        esw_attr->in_rep = in_rep;
        esw_attr->in_mdev = in_mdev;
  static struct mlx5e_tc_flow *
  __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
                     struct flow_cls_offload *f,
 -                   u16 flow_flags,
 +                   unsigned long flow_flags,
                     struct net_device *filter_dev,
                     struct mlx5_eswitch_rep *in_rep,
                     struct mlx5_core_dev *in_mdev)
        struct mlx5e_tc_flow *flow;
        int attr_size, err;
  
 -      flow_flags |= MLX5E_TC_FLOW_ESWITCH;
 +      flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
        attr_size  = sizeof(struct mlx5_esw_flow_attr);
        err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
                               &parse_attr, &flow);
        return flow;
  
  err_free:
 -      kfree(flow);
 -      kvfree(parse_attr);
 +      mlx5e_flow_put(priv, flow);
  out:
        return ERR_PTR(err);
  }
  
  static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
                                      struct mlx5e_tc_flow *flow,
 -                                    u16 flow_flags)
 +                                    unsigned long flow_flags)
  {
        struct mlx5e_priv *priv = flow->priv, *peer_priv;
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
        }
  
        flow->peer_flow = peer_flow;
 -      flow->flags |= MLX5E_TC_FLOW_DUP;
 +      flow_flag_set(flow, DUP);
        mutex_lock(&esw->offloads.peer_mutex);
        list_add_tail(&flow->peer, &esw->offloads.peer_flows);
        mutex_unlock(&esw->offloads.peer_mutex);
@@@ -3553,7 -3284,7 +3562,7 @@@ out
  static int
  mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
                   struct flow_cls_offload *f,
 -                 u16 flow_flags,
 +                 unsigned long flow_flags,
                   struct net_device *filter_dev,
                   struct mlx5e_tc_flow **__flow)
  {
@@@ -3587,7 -3318,7 +3596,7 @@@ out
  static int
  mlx5e_add_nic_flow(struct mlx5e_priv *priv,
                   struct flow_cls_offload *f,
 -                 u16 flow_flags,
 +                 unsigned long flow_flags,
                   struct net_device *filter_dev,
                   struct mlx5e_tc_flow **__flow)
  {
        if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
                return -EOPNOTSUPP;
  
 -      flow_flags |= MLX5E_TC_FLOW_NIC;
 +      flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
        attr_size  = sizeof(struct mlx5_nic_flow_attr);
        err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
                               &parse_attr, &flow);
        if (err)
                goto err_free;
  
 -      flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
 +      flow_flag_set(flow, OFFLOADED);
        kvfree(parse_attr);
        *__flow = flow;
  
        return 0;
  
  err_free:
 -      kfree(flow);
 +      mlx5e_flow_put(priv, flow);
        kvfree(parse_attr);
  out:
        return err;
  static int
  mlx5e_tc_add_flow(struct mlx5e_priv *priv,
                  struct flow_cls_offload *f,
 -                int flags,
 +                unsigned long flags,
                  struct net_device *filter_dev,
                  struct mlx5e_tc_flow **flow)
  {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 -      u16 flow_flags;
 +      unsigned long flow_flags;
        int err;
  
        get_flags(flags, &flow_flags);
  }
  
  int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
 -                         struct flow_cls_offload *f, int flags)
 +                         struct flow_cls_offload *f, unsigned long flags)
  {
        struct netlink_ext_ack *extack = f->common.extack;
        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
        struct mlx5e_tc_flow *flow;
        int err = 0;
  
 -      flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
 +      rcu_read_lock();
 +      flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
 +      rcu_read_unlock();
        if (flow) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "flow cookie already exists, ignoring");
        if (err)
                goto out;
  
 -      err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
 +      err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
        if (err)
                goto err_free;
  
        return 0;
  
  err_free:
 -      mlx5e_tc_del_flow(priv, flow);
 -      kfree(flow);
 +      mlx5e_flow_put(priv, flow);
  out:
        return err;
  }
  
 -#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS)
 -#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS)
 -
  static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
  {
 -      if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK))
 -              return true;
 +      bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
 +      bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
  
 -      return false;
 +      return flow_flag_test(flow, INGRESS) == dir_ingress &&
 +              flow_flag_test(flow, EGRESS) == dir_egress;
  }
  
  int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
 -                      struct flow_cls_offload *f, int flags)
 +                      struct flow_cls_offload *f, unsigned long flags)
  {
        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
        struct mlx5e_tc_flow *flow;
 +      int err;
  
 +      rcu_read_lock();
        flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
 -      if (!flow || !same_flow_direction(flow, flags))
 -              return -EINVAL;
 +      if (!flow || !same_flow_direction(flow, flags)) {
 +              err = -EINVAL;
 +              goto errout;
 +      }
  
 +      /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
 +       * set.
 +       */
 +      if (flow_flag_test_and_set(flow, DELETED)) {
 +              err = -EINVAL;
 +              goto errout;
 +      }
        rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
 +      rcu_read_unlock();
  
 -      mlx5e_tc_del_flow(priv, flow);
 -
 -      kfree(flow);
 +      mlx5e_flow_put(priv, flow);
  
        return 0;
 +
 +errout:
 +      rcu_read_unlock();
 +      return err;
  }
  
  int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
 -                     struct flow_cls_offload *f, int flags)
 +                     struct flow_cls_offload *f, unsigned long flags)
  {
        struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
        u64 lastuse = 0;
        u64 packets = 0;
        u64 bytes = 0;
 +      int err = 0;
  
 -      flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
 -      if (!flow || !same_flow_direction(flow, flags))
 -              return -EINVAL;
 +      rcu_read_lock();
 +      flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
 +                                              tc_ht_params));
 +      rcu_read_unlock();
 +      if (IS_ERR(flow))
 +              return PTR_ERR(flow);
  
 -      if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 +      if (!same_flow_direction(flow, flags)) {
 +              err = -EINVAL;
 +              goto errout;
 +      }
 +
 +      if (mlx5e_is_offloaded_flow(flow)) {
                counter = mlx5e_tc_get_counter(flow);
                if (!counter)
 -                      return 0;
 +                      goto errout;
  
                mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
        }
        if (!peer_esw)
                goto out;
  
 -      if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
 -          (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
 +      if (flow_flag_test(flow, DUP) &&
 +          flow_flag_test(flow->peer_flow, OFFLOADED)) {
                u64 bytes2;
                u64 packets2;
                u64 lastuse2;
@@@ -3800,117 -3509,15 +3809,117 @@@ no_peer_counter
        mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
  out:
        flow_stats_update(&f->stats, bytes, packets, lastuse);
 +errout:
 +      mlx5e_flow_put(priv, flow);
 +      return err;
 +}
 +
 +static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
 +                             struct netlink_ext_ack *extack)
 +{
 +      struct mlx5e_rep_priv *rpriv = priv->ppriv;
 +      struct mlx5_eswitch *esw;
 +      u16 vport_num;
 +      u32 rate_mbps;
 +      int err;
 +
 +      esw = priv->mdev->priv.eswitch;
 +      /* rate is given in bytes/sec.
 +       * First convert to bits/sec and then round to the nearest mbit/secs.
 +       * mbit means million bits.
 +       * Moreover, if rate is non zero we choose to configure to a minimum of
 +       * 1 mbit/sec.
 +       */
 +      rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
 +      vport_num = rpriv->rep->vport;
 +
 +      err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
 +      if (err)
 +              NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
 +
 +      return err;
 +}
 +
 +static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
 +                                      struct flow_action *flow_action,
 +                                      struct netlink_ext_ack *extack)
 +{
 +      struct mlx5e_rep_priv *rpriv = priv->ppriv;
 +      const struct flow_action_entry *act;
 +      int err;
 +      int i;
 +
 +      if (!flow_action_has_entries(flow_action)) {
 +              NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
 +              return -EINVAL;
 +      }
 +
 +      if (!flow_offload_has_one_action(flow_action)) {
 +              NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
 +              return -EOPNOTSUPP;
 +      }
 +
 +      flow_action_for_each(i, act, flow_action) {
 +              switch (act->id) {
 +              case FLOW_ACTION_POLICE:
 +                      err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
 +                      if (err)
 +                              return err;
 +
 +                      rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
 +                      break;
 +              default:
 +                      NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
 +                      return -EOPNOTSUPP;
 +              }
 +      }
  
        return 0;
  }
  
 +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
 +                              struct tc_cls_matchall_offload *ma)
 +{
 +      struct netlink_ext_ack *extack = ma->common.extack;
 +      int prio = TC_H_MAJ(ma->common.prio) >> 16;
 +
 +      if (prio != 1) {
 +              NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
 +              return -EINVAL;
 +      }
 +
 +      return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
 +}
 +
 +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
 +                           struct tc_cls_matchall_offload *ma)
 +{
 +      struct netlink_ext_ack *extack = ma->common.extack;
 +
 +      return apply_police_params(priv, 0, extack);
 +}
 +
 +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
 +                           struct tc_cls_matchall_offload *ma)
 +{
 +      struct mlx5e_rep_priv *rpriv = priv->ppriv;
 +      struct rtnl_link_stats64 cur_stats;
 +      u64 dbytes;
 +      u64 dpkts;
 +
 +      cur_stats = priv->stats.vf_vport;
 +      dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
 +      dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
 +      rpriv->prev_vf_vport_stats = cur_stats;
 +      flow_stats_update(&ma->stats, dpkts, dbytes, jiffies);
 +}
 +
  static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
                                              struct mlx5e_priv *peer_priv)
  {
        struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
 -      struct mlx5e_hairpin_entry *hpe;
 +      struct mlx5e_hairpin_entry *hpe, *tmp;
 +      LIST_HEAD(init_wait_list);
        u16 peer_vhca_id;
        int bkt;
  
  
        peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
  
 -      hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
 -              if (hpe->peer_vhca_id == peer_vhca_id)
 +      mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
 +      hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
 +              if (refcount_inc_not_zero(&hpe->refcnt))
 +                      list_add(&hpe->dead_peer_wait_list, &init_wait_list);
 +      mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 +
 +      list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
 +              wait_for_completion(&hpe->res_ready);
 +              if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
                        hpe->hp->pair->peer_gone = true;
 +
 +              mlx5e_hairpin_put(priv, hpe);
        }
  }
  
@@@ -3966,10 -3564,7 +3975,10 @@@ int mlx5e_tc_nic_init(struct mlx5e_pri
        struct mlx5e_tc_table *tc = &priv->fs.tc;
        int err;
  
 -      hash_init(tc->mod_hdr_tbl);
 +      mutex_init(&tc->t_lock);
 +      mutex_init(&tc->mod_hdr.lock);
 +      hash_init(tc->mod_hdr.hlist);
 +      mutex_init(&tc->hairpin_tbl_lock);
        hash_init(tc->hairpin_tbl);
  
        err = rhashtable_init(&tc->ht, &tc_ht_params);
@@@ -4001,16 -3596,12 +4010,16 @@@ void mlx5e_tc_nic_cleanup(struct mlx5e_
        if (tc->netdevice_nb.notifier_call)
                unregister_netdevice_notifier(&tc->netdevice_nb);
  
 +      mutex_destroy(&tc->mod_hdr.lock);
 +      mutex_destroy(&tc->hairpin_tbl_lock);
 +
        rhashtable_destroy(&tc->ht);
  
        if (!IS_ERR_OR_NULL(tc->t)) {
                mlx5_destroy_flow_table(tc->t);
                tc->t = NULL;
        }
 +      mutex_destroy(&tc->t_lock);
  }
  
  int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
@@@ -4023,7 -3614,7 +4032,7 @@@ void mlx5e_tc_esw_cleanup(struct rhasht
        rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
  }
  
 -int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags)
 +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
  {
        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
  
@@@ -4045,10 -3636,10 +4054,10 @@@ void mlx5e_tc_reoffload_flows_work(stru
                             reoffload_flows_work);
        struct mlx5e_tc_flow *flow, *tmp;
  
 -      rtnl_lock();
 +      mutex_lock(&rpriv->unready_flows_lock);
        list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
                if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
 -                      remove_unready_flow(flow);
 +                      unready_flow_del(flow);
        }
 -      rtnl_unlock();
 +      mutex_unlock(&rpriv->unready_flows_lock);
  }
index 86db0e9776daec9c4926866c817c9799d9398f89,04685dbb280cfad006007a070ac0d2414a5fd894..aba9e7a6ad3c6d25e4b5531bcdd08ef2addd9424
@@@ -35,7 -35,6 +35,7 @@@
  
  #include <linux/if_ether.h>
  #include <linux/if_link.h>
 +#include <linux/atomic.h>
  #include <net/devlink.h>
  #include <linux/mlx5/device.h>
  #include <linux/mlx5/eswitch.h>
@@@ -102,13 -101,6 +102,13 @@@ struct mlx5_vport_info 
        bool                    trusted;
  };
  
 +/* Vport context events */
 +enum mlx5_eswitch_vport_event {
 +      MLX5_VPORT_UC_ADDR_CHANGE = BIT(0),
 +      MLX5_VPORT_MC_ADDR_CHANGE = BIT(1),
 +      MLX5_VPORT_PROMISC_CHANGE = BIT(3),
 +};
 +
  struct mlx5_vport {
        struct mlx5_core_dev    *dev;
        int                     vport;
        } qos;
  
        bool                    enabled;
 -      u16                     enabled_events;
 +      enum mlx5_eswitch_vport_event enabled_events;
  };
  
  enum offloads_fdb_flags {
@@@ -181,14 -173,13 +181,14 @@@ struct mlx5_esw_offload 
        struct mlx5_eswitch_rep *vport_reps;
        struct list_head peer_flows;
        struct mutex peer_mutex;
 +      struct mutex encap_tbl_lock; /* protects encap_tbl */
        DECLARE_HASHTABLE(encap_tbl, 8);
 -      DECLARE_HASHTABLE(mod_hdr_tbl, 8);
 +      struct mod_hdr_tbl mod_hdr;
        DECLARE_HASHTABLE(termtbl_tbl, 8);
        struct mutex termtbl_mutex; /* protects termtbl hash */
        const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
        u8 inline_mode;
 -      u64 num_flows;
 +      atomic64_t num_flows;
        enum devlink_eswitch_encap_mode encap;
  };
  
@@@ -216,11 -207,8 +216,11 @@@ enum 
  struct mlx5_eswitch {
        struct mlx5_core_dev    *dev;
        struct mlx5_nb          nb;
 +      /* legacy data structures */
        struct mlx5_eswitch_fdb fdb_table;
        struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
 +      struct esw_mc_addr mc_promisc;
 +      /* end of legacy */
        struct workqueue_struct *work_queue;
        struct mlx5_vport       *vports;
        u32 flags;
         * and async SRIOV admin state changes
         */
        struct mutex            state_lock;
 -      struct esw_mc_addr      mc_promisc;
  
        struct {
                bool            enabled;
        struct mlx5_esw_functions esw_funcs;
  };
  
 -void esw_offloads_cleanup(struct mlx5_eswitch *esw);
 -int esw_offloads_init(struct mlx5_eswitch *esw);
 +void esw_offloads_disable(struct mlx5_eswitch *esw);
 +int esw_offloads_enable(struct mlx5_eswitch *esw);
  void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
  int esw_offloads_init_reps(struct mlx5_eswitch *esw);
  void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
@@@ -262,8 -251,6 +262,8 @@@ void esw_vport_disable_ingress_acl(stru
                                   struct mlx5_vport *vport);
  void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
                                               struct mlx5_vport *vport);
 +int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
 +                             u32 rate_mbps);
  
  /* E-Switch API */
  int mlx5_eswitch_init(struct mlx5_core_dev *dev);
@@@ -390,8 -377,8 +390,8 @@@ struct mlx5_esw_flow_attr 
                struct mlx5_termtbl_handle *termtbl;
        } dests[MLX5_MAX_FLOW_FWD_VPORTS];
        u32     mod_hdr_id;
-       u8      match_level;
-       u8      tunnel_match_level;
+       u8      inner_match_level;
+       u8      outer_match_level;
        struct mlx5_fc *counter;
        u32     chain;
        u16     prio;
@@@ -526,11 -513,6 +526,11 @@@ void mlx5e_tc_clean_fdb_peer_flows(stru
             (vport) = &(esw)->vports[i],               \
             (i) < (esw)->total_vports; (i)++)
  
 +#define mlx5_esw_for_all_vports_reverse(esw, i, vport)        \
 +      for ((i) = (esw)->total_vports - 1;             \
 +           (vport) = &(esw)->vports[i],               \
 +           (i) >= MLX5_VPORT_PF; (i)--)
 +
  #define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)       \
        for ((i) = MLX5_VPORT_FIRST_VF;                 \
             (vport) = &(esw)->vports[(i)],             \
@@@ -592,11 -574,6 +592,11 @@@ bool mlx5_eswitch_is_vf_vport(const str
  void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
  int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
  
 +void
 +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
 +                               enum mlx5_eswitch_vport_event enabled_events);
 +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw);
 +
  #else  /* CONFIG_MLX5_ESWITCH */
  /* eswitch API stubs */
  static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
index 42cc5001255bea31f65cc51b39bb78ac3ccfb022,0323fd078271bce17cff6a8c963cf803b7437fda..7d3582ee66b7b77a76e03a0614a04b7351321e51
@@@ -207,14 -207,10 +207,10 @@@ mlx5_eswitch_add_offloaded_rule(struct 
  
        mlx5_eswitch_set_rule_source_port(esw, spec, attr);
  
-       if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
-               if (attr->tunnel_match_level != MLX5_MATCH_NONE)
-                       spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
-               if (attr->match_level != MLX5_MATCH_NONE)
-                       spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
-       } else if (attr->match_level != MLX5_MATCH_NONE) {
+       if (attr->outer_match_level != MLX5_MATCH_NONE)
                spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
-       }
+       if (attr->inner_match_level != MLX5_MATCH_NONE)
+               spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
  
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
                flow_act.modify_id = attr->mod_hdr_id;
        if (IS_ERR(rule))
                goto err_add_rule;
        else
 -              esw->offloads.num_flows++;
 +              atomic64_inc(&esw->offloads.num_flows);
  
        return rule;
  
@@@ -290,7 -286,7 +286,7 @@@ mlx5_eswitch_add_fwd_rule(struct mlx5_e
        mlx5_eswitch_set_rule_source_port(esw, spec, attr);
  
        spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
-       if (attr->match_level != MLX5_MATCH_NONE)
+       if (attr->outer_match_level != MLX5_MATCH_NONE)
                spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
  
        rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
        if (IS_ERR(rule))
                goto add_err;
  
 -      esw->offloads.num_flows++;
 +      atomic64_inc(&esw->offloads.num_flows);
  
        return rule;
  add_err:
@@@ -326,7 -322,7 +322,7 @@@ __mlx5_eswitch_del_rule(struct mlx5_esw
                        mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl);
        }
  
 -      esw->offloads.num_flows--;
 +      atomic64_dec(&esw->offloads.num_flows);
  
        if (fwd_rule)  {
                esw_put_prio_table(esw, attr->chain, attr->prio, 1);
@@@ -442,11 -438,9 +438,11 @@@ int mlx5_eswitch_add_vlan_action(struc
        fwd  = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
                   !attr->dest_chain);
  
 +      mutex_lock(&esw->state_lock);
 +
        err = esw_add_vlan_action_check(attr, push, pop, fwd);
        if (err)
 -              return err;
 +              goto unlock;
  
        attr->vlan_handled = false;
  
                        attr->vlan_handled = true;
                }
  
 -              return 0;
 +              goto unlock;
        }
  
        if (!push && !pop)
 -              return 0;
 +              goto unlock;
  
        if (!(offloads->vlan_push_pop_refcount)) {
                /* it's the 1st vlan rule, apply global vlan pop policy */
@@@ -488,8 -482,6 +484,8 @@@ skip_set_push
  out:
        if (!err)
                attr->vlan_handled = true;
 +unlock:
 +      mutex_unlock(&esw->state_lock);
        return err;
  }
  
@@@ -512,8 -504,6 +508,8 @@@ int mlx5_eswitch_del_vlan_action(struc
        pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
        fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
  
 +      mutex_lock(&esw->state_lock);
 +
        vport = esw_vlan_action_get_vport(attr, push, pop);
  
        if (!push && !pop && fwd) {
                if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
                        vport->vlan_refcount--;
  
 -              return 0;
 +              goto out;
        }
  
        if (push) {
  skip_unset_push:
        offloads->vlan_push_pop_refcount--;
        if (offloads->vlan_push_pop_refcount)
 -              return 0;
 +              goto out;
  
        /* no more vlan rules, stop global vlan pop policy */
        err = esw_set_global_vlan_pop(esw, 0);
  
  out:
 +      mutex_unlock(&esw->state_lock);
        return err;
  }
  
@@@ -594,15 -583,38 +590,15 @@@ void mlx5_eswitch_del_send_to_vport_rul
        mlx5_del_flow_rules(rule);
  }
  
 -static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw)
 +static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
  {
        u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
        u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
        u8 fdb_to_vport_reg_c_id;
        int err;
  
 -      err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
 -                                                 out, sizeof(out));
 -      if (err)
 -              return err;
 -
 -      fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
 -                                       esw_vport_context.fdb_to_vport_reg_c_id);
 -
 -      fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
 -      MLX5_SET(modify_esw_vport_context_in, in,
 -               esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
 -
 -      MLX5_SET(modify_esw_vport_context_in, in,
 -               field_select.fdb_to_vport_reg_c_id, 1);
 -
 -      return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
 -                                                   in, sizeof(in));
 -}
 -
 -static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
 -{
 -      u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
 -      u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
 -      u8 fdb_to_vport_reg_c_id;
 -      int err;
 +      if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
 +              return 0;
  
        err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
                                                   out, sizeof(out));
        fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
                                         esw_vport_context.fdb_to_vport_reg_c_id);
  
 -      fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
 +      if (enable)
 +              fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
 +      else
 +              fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
  
        MLX5_SET(modify_esw_vport_context_in, in,
                 esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
@@@ -1393,9 -1402,10 +1389,9 @@@ void esw_offloads_cleanup_reps(struct m
  int esw_offloads_init_reps(struct mlx5_eswitch *esw)
  {
        int total_vports = esw->total_vports;
 -      struct mlx5_core_dev *dev = esw->dev;
        struct mlx5_eswitch_rep *rep;
 -      u8 hw_id[ETH_ALEN], rep_type;
        int vport_index;
 +      u8 rep_type;
  
        esw->offloads.vport_reps = kcalloc(total_vports,
                                           sizeof(struct mlx5_eswitch_rep),
        if (!esw->offloads.vport_reps)
                return -ENOMEM;
  
 -      mlx5_query_mac_address(dev, hw_id);
 -
        mlx5_esw_for_all_reps(esw, vport_index, rep) {
                rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
                rep->vport_index = vport_index;
 -              ether_addr_copy(rep->hw_id, hw_id);
  
                for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
                        atomic_set(&rep->rep_data[rep_type].state,
@@@ -2107,7 -2120,7 +2103,7 @@@ int mlx5_esw_funcs_changed_handler(stru
        return NOTIFY_OK;
  }
  
 -int esw_offloads_init(struct mlx5_eswitch *esw)
 +int esw_offloads_enable(struct mlx5_eswitch *esw)
  {
        int err;
  
        if (err)
                return err;
  
 -      if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
 -              err = mlx5_eswitch_enable_passing_vport_metadata(esw);
 -              if (err)
 -                      goto err_vport_metadata;
 -      }
 +      err = esw_set_passing_vport_metadata(esw, true);
 +      if (err)
 +              goto err_vport_metadata;
 +
 +      mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
  
        err = esw_offloads_load_all_reps(esw);
        if (err)
        return 0;
  
  err_reps:
 -      if (mlx5_eswitch_vport_match_metadata_enabled(esw))
 -              mlx5_eswitch_disable_passing_vport_metadata(esw);
 +      mlx5_eswitch_disable_pf_vf_vports(esw);
 +      esw_set_passing_vport_metadata(esw, false);
  err_vport_metadata:
        esw_offloads_steering_cleanup(esw);
        return err;
@@@ -2165,13 -2178,13 +2161,13 @@@ static int esw_offloads_stop(struct mlx
        return err;
  }
  
 -void esw_offloads_cleanup(struct mlx5_eswitch *esw)
 +void esw_offloads_disable(struct mlx5_eswitch *esw)
  {
        mlx5_rdma_disable_roce(esw->dev);
        esw_offloads_devcom_cleanup(esw);
        esw_offloads_unload_all_reps(esw);
 -      if (mlx5_eswitch_vport_match_metadata_enabled(esw))
 -              mlx5_eswitch_disable_passing_vport_metadata(esw);
 +      mlx5_eswitch_disable_pf_vf_vports(esw);
 +      esw_set_passing_vport_metadata(esw, false);
        esw_offloads_steering_cleanup(esw);
        esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
  }
@@@ -2332,7 -2345,7 +2328,7 @@@ int mlx5_devlink_eswitch_inline_mode_se
                break;
        }
  
 -      if (esw->offloads.num_flows > 0) {
 +      if (atomic64_read(&esw->offloads.num_flows) > 0) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Can't set inline mode when flows are configured");
                return -EOPNOTSUPP;
@@@ -2442,7 -2455,7 +2438,7 @@@ int mlx5_devlink_eswitch_encap_mode_set
        if (esw->offloads.encap == encap)
                return 0;
  
 -      if (esw->offloads.num_flows > 0) {
 +      if (atomic64_read(&esw->offloads.num_flows) > 0) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Can't set encapsulation when flows are configured");
                return -EOPNOTSUPP;
index 1aaab84462708ca43a99ef029db7e8a21c94f1f7,84a87d0593338845ef6d900d9ae865763729ead4..150b3a144b83e8c609a212867ed4870504f504eb
@@@ -239,8 -239,7 +239,8 @@@ mlxsw_sp_acl_block_lookup(struct mlxsw_
  int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
                            struct mlxsw_sp_acl_block *block,
                            struct mlxsw_sp_port *mlxsw_sp_port,
 -                          bool ingress)
 +                          bool ingress,
 +                          struct netlink_ext_ack *extack)
  {
        struct mlxsw_sp_acl_block_binding *binding;
        int err;
        if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress)))
                return -EEXIST;
  
 +      if (!ingress && block->egress_blocker_rule_count) {
 +              NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules");
 +              return -EOPNOTSUPP;
 +      }
 +
        binding = kzalloc(sizeof(*binding), GFP_KERNEL);
        if (!binding)
                return -ENOMEM;
@@@ -477,7 -471,7 +477,7 @@@ int mlxsw_sp_acl_rulei_commit(struct ml
  void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
                                 unsigned int priority)
  {
-       rulei->priority = priority >> 16;
+       rulei->priority = priority;
  }
  
  void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei,
@@@ -678,7 -672,6 +678,7 @@@ int mlxsw_sp_acl_rule_add(struct mlxsw_
  {
        struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
        const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
 +      struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
        int err;
  
        err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei);
                 * one, to be directly bound to device. The rest of the
                 * rulesets are bound by "Goto action set".
                 */
 -              err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
 -                                                    ruleset->ht_key.block);
 +              err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block);
                if (err)
                        goto err_ruleset_block_bind;
        }
  
        list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
 -      ruleset->ht_key.block->rule_count++;
 +      block->rule_count++;
 +      block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker;
        return 0;
  
  err_ruleset_block_bind:
@@@ -719,9 -712,7 +719,9 @@@ void mlxsw_sp_acl_rule_del(struct mlxsw
  {
        struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
        const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
 +      struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
  
 +      block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker;
        ruleset->ht_key.block->rule_count--;
        list_del(&rule->list);
        if (!ruleset->ht_key.chain_index &&
index 61fe92719982a5faaf64a6e5a9b2bf34fef01fb4,337b0cbfd153ef6bdabcbd8f4e26a1b21bfb2b00..c979f38a2e0cb5e758b15d36b9072002a73a0e27
@@@ -1286,7 -1286,7 +1286,7 @@@ myri10ge_vlan_rx(struct net_device *dev
  {
        u8 *va;
        struct vlan_ethhdr *veh;
 -      struct skb_frag_struct *frag;
 +      skb_frag_t *frag;
        __wsum vsum;
  
        va = addr;
                skb->len -= VLAN_HLEN;
                skb->data_len -= VLAN_HLEN;
                frag = skb_shinfo(skb)->frags;
 -              frag->page_offset += VLAN_HLEN;
 -              skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN);
 +              skb_frag_off_add(frag, VLAN_HLEN);
 +              skb_frag_size_sub(frag, VLAN_HLEN);
        }
  }
  
@@@ -1318,7 -1318,7 +1318,7 @@@ myri10ge_rx_done(struct myri10ge_slice_
  {
        struct myri10ge_priv *mgp = ss->mgp;
        struct sk_buff *skb;
 -      struct skb_frag_struct *rx_frags;
 +      skb_frag_t *rx_frags;
        struct myri10ge_rx_buf *rx;
        int i, idx, remainder, bytes;
        struct pci_dev *pdev = mgp->pdev;
                return 0;
        }
        rx_frags = skb_shinfo(skb)->frags;
 -      /* Fill skb_frag_struct(s) with data from our receive */
 +      /* Fill skb_frag_t(s) with data from our receive */
        for (i = 0, remainder = len; remainder > 0; i++) {
                myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes);
                skb_fill_page_desc(skb, i, rx->info[idx].page,
        }
  
        /* remove padding */
 -      rx_frags[0].page_offset += MXGEFW_PAD;
 -      rx_frags[0].size -= MXGEFW_PAD;
 +      skb_frag_off_add(&rx_frags[0], MXGEFW_PAD);
 +      skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD);
        len -= MXGEFW_PAD;
  
        skb->len = len;
@@@ -2628,7 -2628,7 +2628,7 @@@ static netdev_tx_t myri10ge_xmit(struc
        struct myri10ge_slice_state *ss;
        struct mcp_kreq_ether_send *req;
        struct myri10ge_tx_buf *tx;
 -      struct skb_frag_struct *frag;
 +      skb_frag_t *frag;
        struct netdev_queue *netdev_queue;
        dma_addr_t bus;
        u32 low;
@@@ -3037,6 -3037,7 +3037,6 @@@ static int myri10ge_set_mac_address(str
  static int myri10ge_change_mtu(struct net_device *dev, int new_mtu)
  {
        struct myri10ge_priv *mgp = netdev_priv(dev);
 -      int error = 0;
  
        netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu);
        if (mgp->running) {
        } else
                dev->mtu = new_mtu;
  
 -      return error;
 +      return 0;
  }
  
  /*
@@@ -3918,7 -3919,7 +3918,7 @@@ static int myri10ge_probe(struct pci_de
         * setup (if available). */
        status = myri10ge_request_irq(mgp);
        if (status != 0)
-               goto abort_with_firmware;
+               goto abort_with_slices;
        myri10ge_free_irq(mgp);
  
        /* Save configuration space to be restored if the
index 86884c8630130ad9acc41f5cedd1df1eea1a0a1e,e8fce6d715ef069fe2e482b014b4d3f6e4f3e088..0a6cd2f1111fd40c6e79f394a12b5567f05da4b8
@@@ -435,7 -435,7 +435,7 @@@ static u32 init_page_array(void *hdr, u
                skb_frag_t *frag = skb_shinfo(skb)->frags + i;
  
                slots_used += fill_pg_buf(skb_frag_page(frag),
 -                                      frag->page_offset,
 +                                      skb_frag_off(frag),
                                        skb_frag_size(frag), &pb[slots_used]);
        }
        return slots_used;
@@@ -449,7 -449,7 +449,7 @@@ static int count_skb_frag_slots(struct 
        for (i = 0; i < frags; i++) {
                skb_frag_t *frag = skb_shinfo(skb)->frags + i;
                unsigned long size = skb_frag_size(frag);
 -              unsigned long offset = frag->page_offset;
 +              unsigned long offset = skb_frag_off(frag);
  
                /* Skip unused frames from start of page */
                offset &= ~PAGE_MASK;
@@@ -1239,12 -1239,15 +1239,15 @@@ static void netvsc_get_stats64(struct n
                               struct rtnl_link_stats64 *t)
  {
        struct net_device_context *ndev_ctx = netdev_priv(net);
-       struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+       struct netvsc_device *nvdev;
        struct netvsc_vf_pcpu_stats vf_tot;
        int i;
  
+       rcu_read_lock();
+       nvdev = rcu_dereference(ndev_ctx->nvdev);
        if (!nvdev)
-               return;
+               goto out;
  
        netdev_stats_to_stats64(t, &net->stats);
  
                t->rx_packets   += packets;
                t->multicast    += multicast;
        }
+ out:
+       rcu_read_unlock();
  }
  
  static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
index c217049552f7be3e7cd46021a9d312a4e01a3e9d,bcc40a2366248be1954f4ba7d67184009b58e548..c5b026150bf5140593becd1fed315bd1c7cebd70
  
  #include <linux/debugfs.h>
  #include <linux/device.h>
 +#include <linux/etherdevice.h>
 +#include <linux/inet.h>
 +#include <linux/jiffies.h>
 +#include <linux/kernel.h>
  #include <linux/list.h>
  #include <linux/mutex.h>
  #include <linux/random.h>
  #include <linux/rtnetlink.h>
 +#include <linux/workqueue.h>
  #include <net/devlink.h>
 +#include <net/ip.h>
 +#include <uapi/linux/devlink.h>
 +#include <uapi/linux/ip.h>
 +#include <uapi/linux/udp.h>
  
  #include "netdevsim.h"
  
  static struct dentry *nsim_dev_ddir;
  
 +#define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32)
 +
 +static ssize_t nsim_dev_take_snapshot_write(struct file *file,
 +                                          const char __user *data,
 +                                          size_t count, loff_t *ppos)
 +{
 +      struct nsim_dev *nsim_dev = file->private_data;
 +      void *dummy_data;
 +      int err;
 +      u32 id;
 +
 +      dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL);
 +      if (!dummy_data)
 +              return -ENOMEM;
 +
 +      get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
 +
 +      id = devlink_region_shapshot_id_get(priv_to_devlink(nsim_dev));
 +      err = devlink_region_snapshot_create(nsim_dev->dummy_region,
 +                                           dummy_data, id, kfree);
 +      if (err) {
 +              pr_err("Failed to create region snapshot\n");
 +              kfree(dummy_data);
 +              return err;
 +      }
 +
 +      return count;
 +}
 +
 +static const struct file_operations nsim_dev_take_snapshot_fops = {
 +      .open = simple_open,
 +      .write = nsim_dev_take_snapshot_write,
 +      .llseek = generic_file_llseek,
 +};
 +
  static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
  {
        char dev_ddir_name[16];
                return PTR_ERR_OR_ZERO(nsim_dev->ports_ddir) ?: -EINVAL;
        debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir,
                            &nsim_dev->fw_update_status);
 +      debugfs_create_u32("max_macs", 0600, nsim_dev->ddir,
 +                         &nsim_dev->max_macs);
 +      debugfs_create_bool("test1", 0600, nsim_dev->ddir,
 +                          &nsim_dev->test1);
 +      debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev,
 +                          &nsim_dev_take_snapshot_fops);
        return 0;
  }
  
@@@ -123,46 -73,47 +123,47 @@@ static void nsim_dev_port_debugfs_exit(
        debugfs_remove_recursive(nsim_dev_port->ddir);
  }
  
+ static struct net *nsim_devlink_net(struct devlink *devlink)
+ {
+       return &init_net;
+ }
  static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv)
  {
-       struct nsim_dev *nsim_dev = priv;
+       struct net *net = priv;
  
-       return nsim_fib_get_val(nsim_dev->fib_data,
-                               NSIM_RESOURCE_IPV4_FIB, false);
+       return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false);
  }
  
  static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv)
  {
-       struct nsim_dev *nsim_dev = priv;
+       struct net *net = priv;
  
-       return nsim_fib_get_val(nsim_dev->fib_data,
-                               NSIM_RESOURCE_IPV4_FIB_RULES, false);
+       return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false);
  }
  
  static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv)
  {
-       struct nsim_dev *nsim_dev = priv;
+       struct net *net = priv;
  
-       return nsim_fib_get_val(nsim_dev->fib_data,
-                               NSIM_RESOURCE_IPV6_FIB, false);
+       return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false);
  }
  
  static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv)
  {
-       struct nsim_dev *nsim_dev = priv;
+       struct net *net = priv;
  
-       return nsim_fib_get_val(nsim_dev->fib_data,
-                               NSIM_RESOURCE_IPV6_FIB_RULES, false);
+       return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false);
  }
  
  static int nsim_dev_resources_register(struct devlink *devlink)
  {
-       struct nsim_dev *nsim_dev = devlink_priv(devlink);
        struct devlink_resource_size_params params = {
                .size_max = (u64)-1,
                .size_granularity = 1,
                .unit = DEVLINK_RESOURCE_UNIT_ENTRY
        };
+       struct net *net = nsim_devlink_net(devlink);
        int err;
        u64 n;
  
                goto out;
        }
  
-       n = nsim_fib_get_val(nsim_dev->fib_data,
-                            NSIM_RESOURCE_IPV4_FIB, true);
+       n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true);
        err = devlink_resource_register(devlink, "fib", n,
                                        NSIM_RESOURCE_IPV4_FIB,
                                        NSIM_RESOURCE_IPV4, &params);
                return err;
        }
  
-       n = nsim_fib_get_val(nsim_dev->fib_data,
-                            NSIM_RESOURCE_IPV4_FIB_RULES, true);
+       n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true);
        err = devlink_resource_register(devlink, "fib-rules", n,
                                        NSIM_RESOURCE_IPV4_FIB_RULES,
                                        NSIM_RESOURCE_IPV4, &params);
                goto out;
        }
  
-       n = nsim_fib_get_val(nsim_dev->fib_data,
-                            NSIM_RESOURCE_IPV6_FIB, true);
+       n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true);
        err = devlink_resource_register(devlink, "fib", n,
                                        NSIM_RESOURCE_IPV6_FIB,
                                        NSIM_RESOURCE_IPV6, &params);
                return err;
        }
  
-       n = nsim_fib_get_val(nsim_dev->fib_data,
-                            NSIM_RESOURCE_IPV6_FIB_RULES, true);
+       n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true);
        err = devlink_resource_register(devlink, "fib-rules", n,
                                        NSIM_RESOURCE_IPV6_FIB_RULES,
                                        NSIM_RESOURCE_IPV6, &params);
        devlink_resource_occ_get_register(devlink,
                                          NSIM_RESOURCE_IPV4_FIB,
                                          nsim_dev_ipv4_fib_resource_occ_get,
-                                         nsim_dev);
+                                         net);
        devlink_resource_occ_get_register(devlink,
                                          NSIM_RESOURCE_IPV4_FIB_RULES,
                                          nsim_dev_ipv4_fib_rules_res_occ_get,
-                                         nsim_dev);
+                                         net);
        devlink_resource_occ_get_register(devlink,
                                          NSIM_RESOURCE_IPV6_FIB,
                                          nsim_dev_ipv6_fib_resource_occ_get,
-                                         nsim_dev);
+                                         net);
        devlink_resource_occ_get_register(devlink,
                                          NSIM_RESOURCE_IPV6_FIB_RULES,
                                          nsim_dev_ipv6_fib_rules_res_occ_get,
-                                         nsim_dev);
+                                         net);
  out:
        return err;
  }
  
 +enum nsim_devlink_param_id {
 +      NSIM_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
 +      NSIM_DEVLINK_PARAM_ID_TEST1,
 +};
 +
 +static const struct devlink_param nsim_devlink_params[] = {
 +      DEVLINK_PARAM_GENERIC(MAX_MACS,
 +                            BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 +                            NULL, NULL, NULL),
 +      DEVLINK_PARAM_DRIVER(NSIM_DEVLINK_PARAM_ID_TEST1,
 +                           "test1", DEVLINK_PARAM_TYPE_BOOL,
 +                           BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 +                           NULL, NULL, NULL),
 +};
 +
 +static void nsim_devlink_set_params_init_values(struct nsim_dev *nsim_dev,
 +                                              struct devlink *devlink)
 +{
 +      union devlink_param_value value;
 +
 +      value.vu32 = nsim_dev->max_macs;
 +      devlink_param_driverinit_value_set(devlink,
 +                                         DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 +                                         value);
 +      value.vbool = nsim_dev->test1;
 +      devlink_param_driverinit_value_set(devlink,
 +                                         NSIM_DEVLINK_PARAM_ID_TEST1,
 +                                         value);
 +}
 +
 +static void nsim_devlink_param_load_driverinit_values(struct devlink *devlink)
 +{
 +      struct nsim_dev *nsim_dev = devlink_priv(devlink);
 +      union devlink_param_value saved_value;
 +      int err;
 +
 +      err = devlink_param_driverinit_value_get(devlink,
 +                                               DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 +                                               &saved_value);
 +      if (!err)
 +              nsim_dev->max_macs = saved_value.vu32;
 +      err = devlink_param_driverinit_value_get(devlink,
 +                                               NSIM_DEVLINK_PARAM_ID_TEST1,
 +                                               &saved_value);
 +      if (!err)
 +              nsim_dev->test1 = saved_value.vbool;
 +}
 +
 +#define NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX 16
 +
 +static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev,
 +                                    struct devlink *devlink)
 +{
 +      nsim_dev->dummy_region =
 +              devlink_region_create(devlink, "dummy",
 +                                    NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX,
 +                                    NSIM_DEV_DUMMY_REGION_SIZE);
 +      return PTR_ERR_OR_ZERO(nsim_dev->dummy_region);
 +}
 +
 +static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev)
 +{
 +      devlink_region_destroy(nsim_dev->dummy_region);
 +}
 +
 +struct nsim_trap_item {
 +      void *trap_ctx;
 +      enum devlink_trap_action action;
 +};
 +
 +struct nsim_trap_data {
 +      struct delayed_work trap_report_dw;
 +      struct nsim_trap_item *trap_items_arr;
 +      struct nsim_dev *nsim_dev;
 +      spinlock_t trap_lock;   /* Protects trap_items_arr */
 +};
 +
 +/* All driver-specific traps must be documented in
 + * Documentation/networking/devlink-trap-netdevsim.rst
 + */
 +enum {
 +      NSIM_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX,
 +      NSIM_TRAP_ID_FID_MISS,
 +};
 +
 +#define NSIM_TRAP_NAME_FID_MISS "fid_miss"
 +
 +#define NSIM_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
 +
 +#define NSIM_TRAP_DROP(_id, _group_id)                                              \
 +      DEVLINK_TRAP_GENERIC(DROP, DROP, _id,                                 \
 +                           DEVLINK_TRAP_GROUP_GENERIC(_group_id),           \
 +                           NSIM_TRAP_METADATA)
 +#define NSIM_TRAP_EXCEPTION(_id, _group_id)                                 \
 +      DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id,                            \
 +                           DEVLINK_TRAP_GROUP_GENERIC(_group_id),           \
 +                           NSIM_TRAP_METADATA)
 +#define NSIM_TRAP_DRIVER_EXCEPTION(_id, _group_id)                          \
 +      DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, NSIM_TRAP_ID_##_id,              \
 +                          NSIM_TRAP_NAME_##_id,                             \
 +                          DEVLINK_TRAP_GROUP_GENERIC(_group_id),            \
 +                          NSIM_TRAP_METADATA)
 +
 +static const struct devlink_trap nsim_traps_arr[] = {
 +      NSIM_TRAP_DROP(SMAC_MC, L2_DROPS),
 +      NSIM_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS),
 +      NSIM_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS),
 +      NSIM_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS),
 +      NSIM_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS),
 +      NSIM_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS),
 +      NSIM_TRAP_DRIVER_EXCEPTION(FID_MISS, L2_DROPS),
 +      NSIM_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS),
 +      NSIM_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS),
 +      NSIM_TRAP_DROP(TAIL_DROP, BUFFER_DROPS),
 +};
 +
 +#define NSIM_TRAP_L4_DATA_LEN 100
 +
 +static struct sk_buff *nsim_dev_trap_skb_build(void)
 +{
 +      int tot_len, data_len = NSIM_TRAP_L4_DATA_LEN;
 +      struct sk_buff *skb;
 +      struct udphdr *udph;
 +      struct ethhdr *eth;
 +      struct iphdr *iph;
 +
 +      skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
 +      if (!skb)
 +              return NULL;
 +      tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len;
 +
 +      eth = skb_put(skb, sizeof(struct ethhdr));
 +      eth_random_addr(eth->h_dest);
 +      eth_random_addr(eth->h_source);
 +      eth->h_proto = htons(ETH_P_IP);
 +      skb->protocol = htons(ETH_P_IP);
 +
 +      iph = skb_put(skb, sizeof(struct iphdr));
 +      iph->protocol = IPPROTO_UDP;
 +      iph->saddr = in_aton("192.0.2.1");
 +      iph->daddr = in_aton("198.51.100.1");
 +      iph->version = 0x4;
 +      iph->frag_off = 0;
 +      iph->ihl = 0x5;
 +      iph->tot_len = htons(tot_len);
 +      iph->ttl = 100;
 +      ip_send_check(iph);
 +
 +      udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len);
 +      get_random_bytes(&udph->source, sizeof(u16));
 +      get_random_bytes(&udph->dest, sizeof(u16));
 +      udph->len = htons(sizeof(struct udphdr) + data_len);
 +
 +      return skb;
 +}
 +
 +static void nsim_dev_trap_report(struct nsim_dev_port *nsim_dev_port)
 +{
 +      struct nsim_dev *nsim_dev = nsim_dev_port->ns->nsim_dev;
 +      struct devlink *devlink = priv_to_devlink(nsim_dev);
 +      struct nsim_trap_data *nsim_trap_data;
 +      int i;
 +
 +      nsim_trap_data = nsim_dev->trap_data;
 +
 +      spin_lock(&nsim_trap_data->trap_lock);
 +      for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) {
 +              struct nsim_trap_item *nsim_trap_item;
 +              struct sk_buff *skb;
 +
 +              nsim_trap_item = &nsim_trap_data->trap_items_arr[i];
 +              if (nsim_trap_item->action == DEVLINK_TRAP_ACTION_DROP)
 +                      continue;
 +
 +              skb = nsim_dev_trap_skb_build();
 +              if (!skb)
 +                      continue;
 +              skb->dev = nsim_dev_port->ns->netdev;
 +
 +              /* Trapped packets are usually passed to devlink in softIRQ,
 +               * but in this case they are generated in a workqueue. Disable
 +               * softIRQs to prevent lockdep from complaining about
 +               * "incosistent lock state".
 +               */
 +              local_bh_disable();
 +              devlink_trap_report(devlink, skb, nsim_trap_item->trap_ctx,
 +                                  &nsim_dev_port->devlink_port);
 +              local_bh_enable();
 +              consume_skb(skb);
 +      }
 +      spin_unlock(&nsim_trap_data->trap_lock);
 +}
 +
 +#define NSIM_TRAP_REPORT_INTERVAL_MS  100
 +
 +static void nsim_dev_trap_report_work(struct work_struct *work)
 +{
 +      struct nsim_trap_data *nsim_trap_data;
 +      struct nsim_dev_port *nsim_dev_port;
 +      struct nsim_dev *nsim_dev;
 +
 +      nsim_trap_data = container_of(work, struct nsim_trap_data,
 +                                    trap_report_dw.work);
 +      nsim_dev = nsim_trap_data->nsim_dev;
 +
 +      /* For each running port and enabled packet trap, generate a UDP
 +       * packet with a random 5-tuple and report it.
 +       */
 +      mutex_lock(&nsim_dev->port_list_lock);
 +      list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) {
 +              if (!netif_running(nsim_dev_port->ns->netdev))
 +                      continue;
 +
 +              nsim_dev_trap_report(nsim_dev_port);
 +      }
 +      mutex_unlock(&nsim_dev->port_list_lock);
 +
 +      schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw,
 +                            msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS));
 +}
 +
 +static int nsim_dev_traps_init(struct devlink *devlink)
 +{
 +      struct nsim_dev *nsim_dev = devlink_priv(devlink);
 +      struct nsim_trap_data *nsim_trap_data;
 +      int err;
 +
 +      nsim_trap_data = kzalloc(sizeof(*nsim_trap_data), GFP_KERNEL);
 +      if (!nsim_trap_data)
 +              return -ENOMEM;
 +
 +      nsim_trap_data->trap_items_arr = kcalloc(ARRAY_SIZE(nsim_traps_arr),
 +                                               sizeof(struct nsim_trap_item),
 +                                               GFP_KERNEL);
 +      if (!nsim_trap_data->trap_items_arr) {
 +              err = -ENOMEM;
 +              goto err_trap_data_free;
 +      }
 +
 +      /* The lock is used to protect the action state of the registered
 +       * traps. The value is written by user and read in delayed work when
 +       * iterating over all the traps.
 +       */
 +      spin_lock_init(&nsim_trap_data->trap_lock);
 +      nsim_trap_data->nsim_dev = nsim_dev;
 +      nsim_dev->trap_data = nsim_trap_data;
 +
 +      err = devlink_traps_register(devlink, nsim_traps_arr,
 +                                   ARRAY_SIZE(nsim_traps_arr), NULL);
 +      if (err)
 +              goto err_trap_items_free;
 +
 +      INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw,
 +                        nsim_dev_trap_report_work);
 +      schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw,
 +                            msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS));
 +
 +      return 0;
 +
 +err_trap_items_free:
 +      kfree(nsim_trap_data->trap_items_arr);
 +err_trap_data_free:
 +      kfree(nsim_trap_data);
 +      return err;
 +}
 +
 +static void nsim_dev_traps_exit(struct devlink *devlink)
 +{
 +      struct nsim_dev *nsim_dev = devlink_priv(devlink);
 +
 +      cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw);
 +      devlink_traps_unregister(devlink, nsim_traps_arr,
 +                               ARRAY_SIZE(nsim_traps_arr));
 +      kfree(nsim_dev->trap_data->trap_items_arr);
 +      kfree(nsim_dev->trap_data);
 +}
 +
  static int nsim_dev_reload(struct devlink *devlink,
                           struct netlink_ext_ack *extack)
  {
-       struct nsim_dev *nsim_dev = devlink_priv(devlink);
        enum nsim_resource_id res_ids[] = {
                NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
                NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
        };
+       struct net *net = nsim_devlink_net(devlink);
        int i;
  
        for (i = 0; i < ARRAY_SIZE(res_ids); ++i) {
  
                err = devlink_resource_size_get(devlink, res_ids[i], &val);
                if (!err) {
-                       err = nsim_fib_set_max(nsim_dev->fib_data,
-                                              res_ids[i], val, extack);
+                       err = nsim_fib_set_max(net, res_ids[i], val, extack);
                        if (err)
                                return err;
                }
        }
 +      nsim_devlink_param_load_driverinit_values(devlink);
  
        return 0;
  }
@@@ -590,66 -258,11 +586,66 @@@ static int nsim_dev_flash_update(struc
        return 0;
  }
  
 +static struct nsim_trap_item *
 +nsim_dev_trap_item_lookup(struct nsim_dev *nsim_dev, u16 trap_id)
 +{
 +      struct nsim_trap_data *nsim_trap_data = nsim_dev->trap_data;
 +      int i;
 +
 +      for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) {
 +              if (nsim_traps_arr[i].id == trap_id)
 +                      return &nsim_trap_data->trap_items_arr[i];
 +      }
 +
 +      return NULL;
 +}
 +
 +static int nsim_dev_devlink_trap_init(struct devlink *devlink,
 +                                    const struct devlink_trap *trap,
 +                                    void *trap_ctx)
 +{
 +      struct nsim_dev *nsim_dev = devlink_priv(devlink);
 +      struct nsim_trap_item *nsim_trap_item;
 +
 +      nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id);
 +      if (WARN_ON(!nsim_trap_item))
 +              return -ENOENT;
 +
 +      nsim_trap_item->trap_ctx = trap_ctx;
 +      nsim_trap_item->action = trap->init_action;
 +
 +      return 0;
 +}
 +
 +static int
 +nsim_dev_devlink_trap_action_set(struct devlink *devlink,
 +                               const struct devlink_trap *trap,
 +                               enum devlink_trap_action action)
 +{
 +      struct nsim_dev *nsim_dev = devlink_priv(devlink);
 +      struct nsim_trap_item *nsim_trap_item;
 +
 +      nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id);
 +      if (WARN_ON(!nsim_trap_item))
 +              return -ENOENT;
 +
 +      spin_lock(&nsim_dev->trap_data->trap_lock);
 +      nsim_trap_item->action = action;
 +      spin_unlock(&nsim_dev->trap_data->trap_lock);
 +
 +      return 0;
 +}
 +
  static const struct devlink_ops nsim_dev_devlink_ops = {
        .reload = nsim_dev_reload,
        .flash_update = nsim_dev_flash_update,
 +      .trap_init = nsim_dev_devlink_trap_init,
 +      .trap_action_set = nsim_dev_devlink_trap_action_set,
  };
  
 +#define NSIM_DEV_MAX_MACS_DEFAULT 32
 +#define NSIM_DEV_TEST1_DEFAULT true
 +
  static struct nsim_dev *
  nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
  {
        INIT_LIST_HEAD(&nsim_dev->port_list);
        mutex_init(&nsim_dev->port_list_lock);
        nsim_dev->fw_update_status = true;
 +      nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT;
 +      nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT;
  
-       nsim_dev->fib_data = nsim_fib_create();
-       if (IS_ERR(nsim_dev->fib_data)) {
-               err = PTR_ERR(nsim_dev->fib_data);
-               goto err_devlink_free;
-       }
        err = nsim_dev_resources_register(devlink);
        if (err)
-               goto err_fib_destroy;
+               goto err_devlink_free;
  
        err = devlink_register(devlink, &nsim_bus_dev->dev);
        if (err)
                goto err_resources_unregister;
  
 -      err = nsim_dev_debugfs_init(nsim_dev);
 +      err = devlink_params_register(devlink, nsim_devlink_params,
 +                                    ARRAY_SIZE(nsim_devlink_params));
        if (err)
                goto err_dl_unregister;
 +      nsim_devlink_set_params_init_values(nsim_dev, devlink);
 +
 +      err = nsim_dev_dummy_region_init(nsim_dev, devlink);
 +      if (err)
 +              goto err_params_unregister;
 +
 +      err = nsim_dev_traps_init(devlink);
 +      if (err)
 +              goto err_dummy_region_exit;
 +
 +      err = nsim_dev_debugfs_init(nsim_dev);
 +      if (err)
 +              goto err_traps_exit;
  
        err = nsim_bpf_dev_init(nsim_dev);
        if (err)
                goto err_debugfs_exit;
  
 +      devlink_params_publish(devlink);
        return nsim_dev;
  
  err_debugfs_exit:
        nsim_dev_debugfs_exit(nsim_dev);
 +err_traps_exit:
 +      nsim_dev_traps_exit(devlink);
 +err_dummy_region_exit:
 +      nsim_dev_dummy_region_exit(nsim_dev);
 +err_params_unregister:
 +      devlink_params_unregister(devlink, nsim_devlink_params,
 +                                ARRAY_SIZE(nsim_devlink_params));
  err_dl_unregister:
        devlink_unregister(devlink);
  err_resources_unregister:
        devlink_resources_unregister(devlink, NULL);
- err_fib_destroy:
-       nsim_fib_destroy(nsim_dev->fib_data);
  err_devlink_free:
        devlink_free(devlink);
        return ERR_PTR(err);
@@@ -735,13 -316,8 +723,12 @@@ static void nsim_dev_destroy(struct nsi
  
        nsim_bpf_dev_exit(nsim_dev);
        nsim_dev_debugfs_exit(nsim_dev);
 +      nsim_dev_traps_exit(devlink);
 +      nsim_dev_dummy_region_exit(nsim_dev);
 +      devlink_params_unregister(devlink, nsim_devlink_params,
 +                                ARRAY_SIZE(nsim_devlink_params));
        devlink_unregister(devlink);
        devlink_resources_unregister(devlink, NULL);
-       nsim_fib_destroy(nsim_dev->fib_data);
        mutex_destroy(&nsim_dev->port_list_lock);
        devlink_free(devlink);
  }
index 262a6978bbcaf5613a501ca0fb6f2695d65868af,9404637d34b7ab30da70c32492abec2b1c07558b..66bf13765ad0916809fe9c6bc8b480360abb2210
@@@ -145,7 -145,6 +145,7 @@@ struct nsim_dev_port 
  struct nsim_dev {
        struct nsim_bus_dev *nsim_bus_dev;
        struct nsim_fib_data *fib_data;
 +      struct nsim_trap_data *trap_data;
        struct dentry *ddir;
        struct dentry *ports_ddir;
        struct bpf_offload_dev *bpf_dev;
        struct list_head port_list;
        struct mutex port_list_lock; /* protects port list */
        bool fw_update_status;
 +      u32 max_macs;
 +      bool test1;
 +      struct devlink_region *dummy_region;
  };
  
  int nsim_dev_init(void);
@@@ -173,12 -169,10 +173,10 @@@ int nsim_dev_port_add(struct nsim_bus_d
  int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
                      unsigned int port_index);
  
- struct nsim_fib_data *nsim_fib_create(void);
- void nsim_fib_destroy(struct nsim_fib_data *fib_data);
- u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
-                    enum nsim_resource_id res_id, bool max);
- int nsim_fib_set_max(struct nsim_fib_data *fib_data,
-                    enum nsim_resource_id res_id, u64 val,
+ int nsim_fib_init(void);
+ void nsim_fib_exit(void);
+ u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max);
+ int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val,
                     struct netlink_ext_ack *extack);
  
  #if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
diff --combined drivers/net/phy/at803x.c
index d98aa56710a9b0e54f9749acd6c61ec627a55b56,6ad8b1c63c3426c9f7a554eceb6033bab675be62..2aa7b2e600464fb89383d508940d08f0c2935c2a
@@@ -249,40 -249,28 +249,24 @@@ static int at803x_config_init(struct ph
  {
        int ret;
  
 -      ret = genphy_config_init(phydev);
 -      if (ret < 0)
 -              return ret;
 -
        /* The RX and TX delay default is:
         *   after HW reset: RX delay enabled and TX delay disabled
         *   after SW reset: RX delay enabled, while TX delay retains the
         *   value before reset.
-        *
-        * So let's first disable the RX and TX delays in PHY and enable
-        * them based on the mode selected (this also takes care of RGMII
-        * mode where we expect delays to be disabled)
         */
-       ret = at803x_disable_rx_delay(phydev);
-       if (ret < 0)
-               return ret;
-       ret = at803x_disable_tx_delay(phydev);
-       if (ret < 0)
-               return ret;
        if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
-           phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
-               /* If RGMII_ID or RGMII_RXID are specified enable RX delay,
-                * otherwise keep it disabled
-                */
+           phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
                ret = at803x_enable_rx_delay(phydev);
-               if (ret < 0)
-                       return ret;
-       }
+       else
+               ret = at803x_disable_rx_delay(phydev);
+       if (ret < 0)
+               return ret;
  
        if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
-           phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
-               /* If RGMII_ID or RGMII_TXID are specified enable TX delay,
-                * otherwise keep it disabled
-                */
+           phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
                ret = at803x_enable_tx_delay(phydev);
-       }
+       else
+               ret = at803x_disable_tx_delay(phydev);
  
        return ret;
  }
index d5db7604d7c42ec08712fe16327d2bdc98c0dd1c,27ebc2c6c2d0c4eb111f39a5596f50d976ca4a1a..d347ddcac45bd9b188e62825bfa541e5a7f33217
@@@ -1564,20 -1564,24 +1564,20 @@@ EXPORT_SYMBOL(phy_reset_after_clk_enabl
   */
  static int genphy_config_advert(struct phy_device *phydev)
  {
 -      u32 advertise;
 -      int bmsr, adv;
 -      int err, changed = 0;
 +      int err, bmsr, changed = 0;
 +      u32 adv;
  
        /* Only allow advertising what this PHY supports */
        linkmode_and(phydev->advertising, phydev->advertising,
                     phydev->supported);
 -      if (!ethtool_convert_link_mode_to_legacy_u32(&advertise,
 -                                                   phydev->advertising))
 -              phydev_warn(phydev, "PHY advertising (%*pb) more modes than genphy supports, some modes not advertised.\n",
 -                          __ETHTOOL_LINK_MODE_MASK_NBITS,
 -                          phydev->advertising);
 +
 +      adv = linkmode_adv_to_mii_adv_t(phydev->advertising);
  
        /* Setup standard advertisement */
        err = phy_modify_changed(phydev, MII_ADVERTISE,
                                 ADVERTISE_ALL | ADVERTISE_100BASE4 |
                                 ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM,
 -                               ethtool_adv_to_mii_adv_t(advertise));
 +                               adv);
        if (err < 0)
                return err;
        if (err > 0)
        if (!(bmsr & BMSR_ESTATEN))
                return changed;
  
 -      /* Configure gigabit if it's supported */
 -      adv = 0;
 -      if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
 -                            phydev->supported) ||
 -          linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
 -                            phydev->supported))
 -              adv = ethtool_adv_to_mii_ctrl1000_t(advertise);
 +      adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
  
        err = phy_modify_changed(phydev, MII_CTRL1000,
                                 ADVERTISE_1000FULL | ADVERTISE_1000HALF,
@@@ -1671,20 -1681,18 +1671,20 @@@ int genphy_restart_aneg(struct phy_devi
  EXPORT_SYMBOL(genphy_restart_aneg);
  
  /**
 - * genphy_config_aneg - restart auto-negotiation or write BMCR
 + * __genphy_config_aneg - restart auto-negotiation or write BMCR
   * @phydev: target phy_device struct
 + * @changed: whether autoneg is requested
   *
   * Description: If auto-negotiation is enabled, we configure the
   *   advertising, and then restart auto-negotiation.  If it is not
   *   enabled, then we write the BMCR.
   */
 -int genphy_config_aneg(struct phy_device *phydev)
 +int __genphy_config_aneg(struct phy_device *phydev, bool changed)
  {
 -      int err, changed;
 +      int err;
  
 -      changed = genphy_config_eee_advert(phydev);
 +      if (genphy_config_eee_advert(phydev))
 +              changed = true;
  
        if (AUTONEG_ENABLE != phydev->autoneg)
                return genphy_setup_forced(phydev);
        err = genphy_config_advert(phydev);
        if (err < 0) /* error */
                return err;
 +      else if (err)
 +              changed = true;
  
 -      changed |= err;
 -
 -      if (changed == 0) {
 +      if (!changed) {
                /* Advertisement hasn't changed, but maybe aneg was never on to
                 * begin with?  Or maybe phy was isolated?
                 */
                        return ctl;
  
                if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
 -                      changed = 1; /* do restart aneg */
 +                      changed = true; /* do restart aneg */
        }
  
        /* Only restart aneg if we are advertising something different
         * than we were before.
         */
 -      if (changed > 0)
 -              return genphy_restart_aneg(phydev);
 -
 -      return 0;
 +      return changed ? genphy_restart_aneg(phydev) : 0;
  }
 -EXPORT_SYMBOL(genphy_config_aneg);
 +EXPORT_SYMBOL(__genphy_config_aneg);
  
  /**
   * genphy_aneg_done - return auto-negotiation status
@@@ -1741,7 -1752,17 +1741,17 @@@ EXPORT_SYMBOL(genphy_aneg_done)
   */
  int genphy_update_link(struct phy_device *phydev)
  {
-       int status;
+       int status = 0, bmcr;
+       bmcr = phy_read(phydev, MII_BMCR);
+       if (bmcr < 0)
+               return bmcr;
+       /* Autoneg is being started, therefore disregard BMSR value and
+        * report link as down.
+        */
+       if (bmcr & BMCR_ANRESTART)
+               goto done;
  
        /* The link state is latched low so that momentary link
         * drops can be detected. Do not double-read the status
@@@ -1784,7 -1805,7 +1794,7 @@@ EXPORT_SYMBOL(genphy_update_link)
   */
  int genphy_read_status(struct phy_device *phydev)
  {
 -      int adv, lpa, lpagb, err, old_link = phydev->link;
 +      int lpa, lpagb, err, old_link = phydev->link;
  
        /* Update the link, but return if there was an error */
        err = genphy_update_link(phydev);
        phydev->pause = 0;
        phydev->asym_pause = 0;
  
 -      linkmode_zero(phydev->lp_advertising);
 -
        if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
                if (phydev->is_gigabit_capable) {
                        lpagb = phy_read(phydev, MII_STAT1000);
                        if (lpagb < 0)
                                return lpagb;
  
 -                      adv = phy_read(phydev, MII_CTRL1000);
 -                      if (adv < 0)
 -                              return adv;
 -
                        if (lpagb & LPA_1000MSFAIL) {
 +                              int adv = phy_read(phydev, MII_CTRL1000);
 +
 +                              if (adv < 0)
 +                                      return adv;
 +
                                if (adv & CTL1000_ENABLE_MASTER)
                                        phydev_err(phydev, "Master/Slave resolution failed, maybe conflicting manual settings?\n");
                                else
@@@ -1885,6 -1907,57 +1895,6 @@@ int genphy_soft_reset(struct phy_devic
  }
  EXPORT_SYMBOL(genphy_soft_reset);
  
 -int genphy_config_init(struct phy_device *phydev)
 -{
 -      int val;
 -      __ETHTOOL_DECLARE_LINK_MODE_MASK(features) = { 0, };
 -
 -      linkmode_set_bit_array(phy_basic_ports_array,
 -                             ARRAY_SIZE(phy_basic_ports_array),
 -                             features);
 -      linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, features);
 -      linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, features);
 -
 -      /* Do we support autonegotiation? */
 -      val = phy_read(phydev, MII_BMSR);
 -      if (val < 0)
 -              return val;
 -
 -      if (val & BMSR_ANEGCAPABLE)
 -              linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, features);
 -
 -      if (val & BMSR_100FULL)
 -              linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, features);
 -      if (val & BMSR_100HALF)
 -              linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, features);
 -      if (val & BMSR_10FULL)
 -              linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, features);
 -      if (val & BMSR_10HALF)
 -              linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, features);
 -
 -      if (val & BMSR_ESTATEN) {
 -              val = phy_read(phydev, MII_ESTATUS);
 -              if (val < 0)
 -                      return val;
 -
 -              if (val & ESTATUS_1000_TFULL)
 -                      linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
 -                                       features);
 -              if (val & ESTATUS_1000_THALF)
 -                      linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
 -                                       features);
 -              if (val & ESTATUS_1000_XFULL)
 -                      linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
 -                                       features);
 -      }
 -
 -      linkmode_and(phydev->supported, phydev->supported, features);
 -      linkmode_and(phydev->advertising, phydev->advertising, features);
 -
 -      return 0;
 -}
 -EXPORT_SYMBOL(genphy_config_init);
 -
  /**
   * genphy_read_abilities - read PHY abilities from Clause 22 registers
   * @phydev: target phy_device struct
index 769bb262fbec9852dcb8000bc03e9f39541c88e0,f033fee225a11cc7c5b977d26f639042f7896524..58f5a219fb65cdc8417ee9b19a1660b82f6f56c4
@@@ -1258,7 -1258,8 +1258,7 @@@ static void lan78xx_status(struct lan78
                return;
        }
  
 -      memcpy(&intdata, urb->transfer_buffer, 4);
 -      le32_to_cpus(&intdata);
 +      intdata = get_unaligned_le32(urb->transfer_buffer);
  
        if (intdata & INT_ENP_PHY_INT) {
                netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
@@@ -2729,7 -2730,6 +2729,7 @@@ static struct sk_buff *lan78xx_tx_prep(
                                       struct sk_buff *skb, gfp_t flags)
  {
        u32 tx_cmd_a, tx_cmd_b;
 +      void *ptr;
  
        if (skb_cow_head(skb, TX_OVERHEAD)) {
                dev_kfree_skb_any(skb);
                tx_cmd_b |= skb_vlan_tag_get(skb) & TX_CMD_B_VTAG_MASK_;
        }
  
 -      skb_push(skb, 4);
 -      cpu_to_le32s(&tx_cmd_b);
 -      memcpy(skb->data, &tx_cmd_b, 4);
 -
 -      skb_push(skb, 4);
 -      cpu_to_le32s(&tx_cmd_a);
 -      memcpy(skb->data, &tx_cmd_a, 4);
 +      ptr = skb_push(skb, 8);
 +      put_unaligned_le32(tx_cmd_a, ptr);
 +      put_unaligned_le32(tx_cmd_b, ptr + 4);
  
        return skb;
  }
@@@ -3101,13 -3105,16 +3101,13 @@@ static int lan78xx_rx(struct lan78xx_ne
                struct sk_buff *skb2;
                unsigned char *packet;
  
 -              memcpy(&rx_cmd_a, skb->data, sizeof(rx_cmd_a));
 -              le32_to_cpus(&rx_cmd_a);
 +              rx_cmd_a = get_unaligned_le32(skb->data);
                skb_pull(skb, sizeof(rx_cmd_a));
  
 -              memcpy(&rx_cmd_b, skb->data, sizeof(rx_cmd_b));
 -              le32_to_cpus(&rx_cmd_b);
 +              rx_cmd_b = get_unaligned_le32(skb->data);
                skb_pull(skb, sizeof(rx_cmd_b));
  
 -              memcpy(&rx_cmd_c, skb->data, sizeof(rx_cmd_c));
 -              le16_to_cpus(&rx_cmd_c);
 +              rx_cmd_c = get_unaligned_le16(skb->data);
                skb_pull(skb, sizeof(rx_cmd_c));
  
                packet = skb->data;
@@@ -3785,7 -3792,7 +3785,7 @@@ static int lan78xx_probe(struct usb_int
        ret = register_netdev(netdev);
        if (ret != 0) {
                netif_err(dev, probe, netdev, "couldn't register the device\n");
-               goto out3;
+               goto out4;
        }
  
        usb_set_intfdata(intf, dev);
  
        ret = lan78xx_phy_init(dev);
        if (ret < 0)
-               goto out4;
+               goto out5;
  
        return 0;
  
- out4:
+ out5:
        unregister_netdev(netdev);
+ out4:
+       usb_free_urb(dev->urb_intr);
  out3:
        lan78xx_unbind(dev, intf);
  out2:
index 4679fcf1a1c4b92cc5a9e3e18ed2d41a519dbb0b,c9262ffeefe4cb3eb8a13dfdcc0b77801adc5876..0020b2e8c279d40059f470a68fb2f0d8d1599cfb
@@@ -136,12 -136,12 +136,12 @@@ static inline struct xenvif_queue *ubuf
  
  static u16 frag_get_pending_idx(skb_frag_t *frag)
  {
 -      return (u16)frag->page_offset;
 +      return (u16)skb_frag_off(frag);
  }
  
  static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
  {
 -      frag->page_offset = pending_idx;
 +      skb_frag_off_set(frag, pending_idx);
  }
  
  static inline pending_ring_idx_t pending_index(unsigned i)
@@@ -925,6 -925,7 +925,7 @@@ static void xenvif_tx_build_gops(struc
                        skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
                        nskb = xenvif_alloc_skb(0);
                        if (unlikely(nskb == NULL)) {
+                               skb_shinfo(skb)->nr_frags = 0;
                                kfree_skb(skb);
                                xenvif_tx_err(queue, &txreq, extra_count, idx);
                                if (net_ratelimit())
  
                        if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
                                /* Failure in xenvif_set_skb_gso is fatal. */
+                               skb_shinfo(skb)->nr_frags = 0;
                                kfree_skb(skb);
                                kfree_skb(nskb);
                                break;
@@@ -1055,7 -1057,7 +1057,7 @@@ static int xenvif_handle_frag_list(stru
                        int j;
                        skb->truesize += skb->data_len;
                        for (j = 0; j < i; j++)
 -                              put_page(frags[j].page.p);
 +                              put_page(skb_frag_page(&frags[j]));
                        return -ENOMEM;
                }
  
                        BUG();
  
                offset += len;
 -              frags[i].page.p = page;
 -              frags[i].page_offset = 0;
 +              __skb_frag_set_page(&frags[i], page);
 +              skb_frag_off_set(&frags[i], 0);
                skb_frag_size_set(&frags[i], len);
        }
  
@@@ -1653,6 -1655,9 +1655,6 @@@ static int __init netback_init(void
  
  #ifdef CONFIG_DEBUG_FS
        xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
 -      if (IS_ERR_OR_NULL(xen_netback_dbg_root))
 -              pr_warn("Init of debugfs returned %ld!\n",
 -                      PTR_ERR(xen_netback_dbg_root));
  #endif /* CONFIG_DEBUG_FS */
  
        return 0;
index 5aa0f1268bca54845610d7eaf3222b3e9868d604,9c3310c4d61d670426bfa5a842b5f2c750119f04..0803070246aab89a6da676d259f04a029bf23a10
@@@ -544,6 -544,7 +544,7 @@@ static struct qeth_reply *qeth_alloc_re
        if (reply) {
                refcount_set(&reply->refcnt, 1);
                init_completion(&reply->received);
+               spin_lock_init(&reply->lock);
        }
        return reply;
  }
@@@ -799,6 -800,13 +800,13 @@@ static void qeth_issue_next_read_cb(str
  
        if (!reply->callback) {
                rc = 0;
+               goto no_callback;
+       }
+       spin_lock_irqsave(&reply->lock, flags);
+       if (reply->rc) {
+               /* Bail out when the requestor has already left: */
+               rc = reply->rc;
        } else {
                if (cmd) {
                        reply->offset = (u16)((char *)cmd - (char *)iob->data);
                        rc = reply->callback(card, reply, (unsigned long)iob);
                }
        }
+       spin_unlock_irqrestore(&reply->lock, flags);
  
+ no_callback:
        if (rc <= 0)
                qeth_notify_reply(reply, rc);
        qeth_put_reply(reply);
@@@ -1749,6 -1759,16 +1759,16 @@@ static int qeth_send_control_data(struc
                rc = (timeout == -ERESTARTSYS) ? -EINTR : -ETIME;
  
        qeth_dequeue_reply(card, reply);
+       if (reply_cb) {
+               /* Wait until the callback for a late reply has completed: */
+               spin_lock_irq(&reply->lock);
+               if (rc)
+                       /* Zap any callback that's still pending: */
+                       reply->rc = rc;
+               spin_unlock_irq(&reply->lock);
+       }
        if (!rc)
                rc = reply->rc;
        qeth_put_reply(reply);
@@@ -3515,7 -3535,7 +3535,7 @@@ static int qeth_get_elements_for_frags(
        int cnt, elements = 0;
  
        for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) {
 -              struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[cnt];
 +              skb_frag_t *frag = &skb_shinfo(skb)->frags[cnt];
  
                elements += qeth_get_elements_for_range(
                        (addr_t)skb_frag_address(frag),
index 6fa7726185deef212c345b6055e49b943c70937d,40dd573e73c3d4318ab54911951907f5ecaf9f3d..1d1440d4300285e367fa3d9c5bd766db7470217e
@@@ -284,9 -284,9 +284,9 @@@ static int visor_copy_fragsinfo_from_sk
                for (frag = 0; frag < numfrags; frag++) {
                        count = add_physinfo_entries(page_to_pfn(
                                  skb_frag_page(&skb_shinfo(skb)->frags[frag])),
 -                                skb_shinfo(skb)->frags[frag].page_offset,
 -                                skb_shinfo(skb)->frags[frag].size, count,
 -                                frags_max, frags);
 +                                skb_frag_off(&skb_shinfo(skb)->frags[frag]),
 +                                skb_frag_size(&skb_shinfo(skb)->frags[frag]),
 +                                count, frags_max, frags);
                        /* add_physinfo_entries only returns
                         * zero if the frags array is out of room
                         * That should never happen because we
@@@ -1750,7 -1750,8 +1750,8 @@@ static int visornic_poll(struct napi_st
  }
  
  /* poll_for_irq       - checks the status of the response queue
-  * @v: Void pointer to the visronic devdata struct.
+  * @t: pointer to the 'struct timer_list' from which we can retrieve the
+  *     the visornic devdata struct.
   *
   * Main function of the vnic_incoming thread. Periodically check the response
   * queue and drain it if needed.
index da5e7eaed43830b7a76e3f1bf1cea9f462d0211a,b8b570c30b5ed967bd5acc38fe2e3f4a6c7025fa..a66ed0abe40ea72ca9bc375a3ab13ef389aca289
@@@ -1040,21 -1040,6 +1040,21 @@@ enum 
        MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1,
  };
  
 +#define MLX5_FC_BULK_SIZE_FACTOR 128
 +
 +enum mlx5_fc_bulk_alloc_bitmask {
 +      MLX5_FC_BULK_128   = (1 << 0),
 +      MLX5_FC_BULK_256   = (1 << 1),
 +      MLX5_FC_BULK_512   = (1 << 2),
 +      MLX5_FC_BULK_1024  = (1 << 3),
 +      MLX5_FC_BULK_2048  = (1 << 4),
 +      MLX5_FC_BULK_4096  = (1 << 5),
 +      MLX5_FC_BULK_8192  = (1 << 6),
 +      MLX5_FC_BULK_16384 = (1 << 7),
 +};
 +
 +#define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum))
 +
  struct mlx5_ifc_cmd_hca_cap_bits {
        u8         reserved_at_0[0x30];
        u8         vhca_id[0x10];
        u8         reserved_at_2e0[0x7];
        u8         max_qp_mcg[0x19];
  
 -      u8         reserved_at_300[0x18];
 +      u8         reserved_at_300[0x10];
 +      u8         flow_counter_bulk_alloc[0x8];
        u8         log_max_mcg[0x8];
  
        u8         reserved_at_320[0x3];
@@@ -2782,7 -2766,7 +2782,7 @@@ struct mlx5_ifc_traffic_counter_bits 
  struct mlx5_ifc_tisc_bits {
        u8         strict_lag_tx_port_affinity[0x1];
        u8         tls_en[0x1];
 -      u8         reserved_at_1[0x2];
 +      u8         reserved_at_2[0x2];
        u8         lag_tx_port_affinity[0x04];
  
        u8         reserved_at_8[0x4];
@@@ -2957,13 -2941,6 +2957,13 @@@ enum 
        SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
  };
  
 +enum {
 +      ELEMENT_TYPE_CAP_MASK_TASR              = 1 << 0,
 +      ELEMENT_TYPE_CAP_MASK_VPORT             = 1 << 1,
 +      ELEMENT_TYPE_CAP_MASK_VPORT_TC          = 1 << 2,
 +      ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC     = 1 << 3,
 +};
 +
  struct mlx5_ifc_scheduling_context_bits {
        u8         element_type[0x8];
        u8         reserved_at_8[0x18];
@@@ -7840,8 -7817,7 +7840,8 @@@ struct mlx5_ifc_alloc_flow_counter_in_b
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
  
 -      u8         reserved_at_40[0x40];
 +      u8         reserved_at_40[0x38];
 +      u8         flow_counter_bulk[0x8];
  };
  
  struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
@@@ -10078,9 -10054,8 +10078,8 @@@ struct mlx5_ifc_tls_static_params_bits 
  };
  
  struct mlx5_ifc_tls_progress_params_bits {
-       u8         valid[0x1];
-       u8         reserved_at_1[0x7];
-       u8         pd[0x18];
+       u8         reserved_at_0[0x8];
+       u8         tisn[0x18];
  
        u8         next_record_tcp_sn[0x20];
  
diff --combined include/linux/skbuff.h
index 7eb28b72d9ba6fe805f8a2228ded0e777726864b,ba5583522d249444361314dc12ef63d5e6d5e79a..77c6dc88e95dde78e32dd5e4c9909f2a088b75ed
@@@ -14,7 -14,6 +14,7 @@@
  #include <linux/compiler.h>
  #include <linux/time.h>
  #include <linux/bug.h>
 +#include <linux/bvec.h>
  #include <linux/cache.h>
  #include <linux/rbtree.h>
  #include <linux/socket.h>
@@@ -309,45 -308,58 +309,45 @@@ extern int sysctl_max_skb_frags
   */
  #define GSO_BY_FRAGS  0xFFFF
  
 -typedef struct skb_frag_struct skb_frag_t;
 -
 -struct skb_frag_struct {
 -      struct {
 -              struct page *p;
 -      } page;
 -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
 -      __u32 page_offset;
 -      __u32 size;
 -#else
 -      __u16 page_offset;
 -      __u16 size;
 -#endif
 -};
 +typedef struct bio_vec skb_frag_t;
  
  /**
 - * skb_frag_size - Returns the size of a skb fragment
 + * skb_frag_size() - Returns the size of a skb fragment
   * @frag: skb fragment
   */
  static inline unsigned int skb_frag_size(const skb_frag_t *frag)
  {
 -      return frag->size;
 +      return frag->bv_len;
  }
  
  /**
 - * skb_frag_size_set - Sets the size of a skb fragment
 + * skb_frag_size_set() - Sets the size of a skb fragment
   * @frag: skb fragment
   * @size: size of fragment
   */
  static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
  {
 -      frag->size = size;
 +      frag->bv_len = size;
  }
  
  /**
 - * skb_frag_size_add - Incrementes the size of a skb fragment by %delta
 + * skb_frag_size_add() - Increments the size of a skb fragment by @delta
   * @frag: skb fragment
   * @delta: value to add
   */
  static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
  {
 -      frag->size += delta;
 +      frag->bv_len += delta;
  }
  
  /**
 - * skb_frag_size_sub - Decrements the size of a skb fragment by %delta
 + * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta
   * @frag: skb fragment
   * @delta: value to subtract
   */
  static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  {
 -      frag->size -= delta;
 +      frag->bv_len -= delta;
  }
  
  /**
@@@ -367,7 -379,7 +367,7 @@@ static inline bool skb_frag_must_loop(s
   *    skb_frag_foreach_page - loop over pages in a fragment
   *
   *    @f:             skb frag to operate on
 - *    @f_off:         offset from start of f->page.p
 + *    @f_off:         offset from start of f->bv_page
   *    @f_len:         length from f_off to loop over
   *    @p:             (temp var) current page
   *    @p_off:         (temp var) offset from start of current page,
@@@ -1271,7 -1283,7 +1271,7 @@@ static inline int skb_flow_dissector_bp
  
  struct bpf_flow_dissector;
  bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 -                    __be16 proto, int nhoff, int hlen);
 +                    __be16 proto, int nhoff, int hlen, unsigned int flags);
  
  bool __skb_flow_dissect(const struct net *net,
                        const struct sk_buff *skb,
@@@ -1362,6 -1374,14 +1362,14 @@@ static inline void skb_copy_hash(struc
        to->l4_hash = from->l4_hash;
  };
  
+ static inline void skb_copy_decrypted(struct sk_buff *to,
+                                     const struct sk_buff *from)
+ {
+ #ifdef CONFIG_TLS_DEVICE
+       to->decrypted = from->decrypted;
+ #endif
+ }
  #ifdef NET_SKBUFF_DATA_USES_OFFSET
  static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
  {
@@@ -2077,8 -2097,8 +2085,8 @@@ static inline void __skb_fill_page_desc
         * that not all callers have unique ownership of the page but rely
         * on page_is_pfmemalloc doing the right thing(tm).
         */
 -      frag->page.p              = page;
 -      frag->page_offset         = off;
 +      frag->bv_page             = page;
 +      frag->bv_offset           = off;
        skb_frag_size_set(frag, size);
  
        page = compound_head(page);
@@@ -2857,46 -2877,6 +2865,46 @@@ static inline void skb_propagate_pfmema
                skb->pfmemalloc = true;
  }
  
 +/**
 + * skb_frag_off() - Returns the offset of a skb fragment
 + * @frag: the paged fragment
 + */
 +static inline unsigned int skb_frag_off(const skb_frag_t *frag)
 +{
 +      return frag->bv_offset;
 +}
 +
 +/**
 + * skb_frag_off_add() - Increments the offset of a skb fragment by @delta
 + * @frag: skb fragment
 + * @delta: value to add
 + */
 +static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
 +{
 +      frag->bv_offset += delta;
 +}
 +
 +/**
 + * skb_frag_off_set() - Sets the offset of a skb fragment
 + * @frag: skb fragment
 + * @offset: offset of fragment
 + */
 +static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
 +{
 +      frag->bv_offset = offset;
 +}
 +
 +/**
 + * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment
 + * @fragto: skb fragment where offset is set
 + * @fragfrom: skb fragment offset is copied from
 + */
 +static inline void skb_frag_off_copy(skb_frag_t *fragto,
 +                                   const skb_frag_t *fragfrom)
 +{
 +      fragto->bv_offset = fragfrom->bv_offset;
 +}
 +
  /**
   * skb_frag_page - retrieve the page referred to by a paged fragment
   * @frag: the paged fragment
   */
  static inline struct page *skb_frag_page(const skb_frag_t *frag)
  {
 -      return frag->page.p;
 +      return frag->bv_page;
  }
  
  /**
@@@ -2963,7 -2943,7 +2971,7 @@@ static inline void skb_frag_unref(struc
   */
  static inline void *skb_frag_address(const skb_frag_t *frag)
  {
 -      return page_address(skb_frag_page(frag)) + frag->page_offset;
 +      return page_address(skb_frag_page(frag)) + skb_frag_off(frag);
  }
  
  /**
@@@ -2979,18 -2959,7 +2987,18 @@@ static inline void *skb_frag_address_sa
        if (unlikely(!ptr))
                return NULL;
  
 -      return ptr + frag->page_offset;
 +      return ptr + skb_frag_off(frag);
 +}
 +
 +/**
 + * skb_frag_page_copy() - sets the page in a fragment from another fragment
 + * @fragto: skb fragment where page is set
 + * @fragfrom: skb fragment page is copied from
 + */
 +static inline void skb_frag_page_copy(skb_frag_t *fragto,
 +                                    const skb_frag_t *fragfrom)
 +{
 +      fragto->bv_page = fragfrom->bv_page;
  }
  
  /**
   */
  static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
  {
 -      frag->page.p = page;
 +      frag->bv_page = page;
  }
  
  /**
@@@ -3038,7 -3007,7 +3046,7 @@@ static inline dma_addr_t skb_frag_dma_m
                                          enum dma_data_direction dir)
  {
        return dma_map_page(dev, skb_frag_page(frag),
 -                          frag->page_offset + offset, size, dir);
 +                          skb_frag_off(frag) + offset, size, dir);
  }
  
  static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
@@@ -3205,10 -3174,10 +3213,10 @@@ static inline bool skb_can_coalesce(str
        if (skb_zcopy(skb))
                return false;
        if (i) {
 -              const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
 +              const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
  
                return page == skb_frag_page(frag) &&
 -                     off == frag->page_offset + skb_frag_size(frag);
 +                     off == skb_frag_off(frag) + skb_frag_size(frag);
        }
        return false;
  }
index dc301e3d67396cbad037b0931251a95b18f39658,475d6f28ca677a8b40c106fa08af86971226000e..e73d16f8b870353583e8579aa6bfc22ba870f140
@@@ -25,7 -25,6 +25,7 @@@ struct nft_pktinfo 
        struct xt_action_param          xt;
  };
  
 +#if IS_ENABLED(CONFIG_NETFILTER)
  static inline struct net *nft_net(const struct nft_pktinfo *pkt)
  {
        return pkt->xt.state->net;
@@@ -58,7 -57,6 +58,7 @@@ static inline void nft_set_pktinfo(stru
        pkt->skb = skb;
        pkt->xt.state = state;
  }
 +#endif
  
  static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
                                          struct sk_buff *skb)
@@@ -423,8 -421,7 +423,7 @@@ struct nft_set 
        unsigned char                   *udata;
        /* runtime data below here */
        const struct nft_set_ops        *ops ____cacheline_aligned;
-       u16                             flags:13,
-                                       bound:1,
+       u16                             flags:14,
                                        genmask:2;
        u8                              klen;
        u8                              dlen;
@@@ -929,11 -926,9 +928,11 @@@ struct nft_chain_type 
        int                             family;
        struct module                   *owner;
        unsigned int                    hook_mask;
 +#if IS_ENABLED(CONFIG_NETFILTER)
        nf_hookfn                       *hooks[NF_MAX_HOOKS];
        int                             (*ops_register)(struct net *net, const struct nf_hook_ops *ops);
        void                            (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops);
 +#endif
  };
  
  int nft_chain_validate_dependency(const struct nft_chain *chain,
@@@ -959,9 -954,7 +958,9 @@@ struct nft_stats 
   *    @flow_block: flow block (for hardware offload)
   */
  struct nft_base_chain {
 +#if IS_ENABLED(CONFIG_NETFILTER)
        struct nf_hook_ops              ops;
 +#endif
        const struct nft_chain_type     *type;
        u8                              policy;
        u8                              flags;
@@@ -1158,9 -1151,7 +1157,9 @@@ struct nft_flowtable 
                                        use:30;
        u64                             handle;
        /* runtime data below here */
 +#if IS_ENABLED(CONFIG_NETFILTER)
        struct nf_hook_ops              *ops ____cacheline_aligned;
 +#endif
        struct nf_flowtable             data;
  };
  
@@@ -1215,8 -1206,6 +1214,8 @@@ void nft_trace_notify(struct nft_tracei
  #define MODULE_ALIAS_NFT_OBJ(type) \
        MODULE_ALIAS("nft-obj-" __stringify(type))
  
 +#if IS_ENABLED(CONFIG_NF_TABLES)
 +
  /*
   * The gencursor defines two generations, the currently active and the
   * next one. Objects contain a bitmask of 2 bits specifying the generations
@@@ -1290,8 -1279,6 +1289,8 @@@ static inline void nft_set_elem_change_
        ext->genmask ^= nft_genmask_next(net);
  }
  
 +#endif /* IS_ENABLED(CONFIG_NF_TABLES) */
 +
  /*
   * We use a free bit in the genmask field to indicate the element
   * is busy, meaning it is currently being processed either by
@@@ -1360,12 -1347,15 +1359,15 @@@ struct nft_trans_rule 
  struct nft_trans_set {
        struct nft_set                  *set;
        u32                             set_id;
+       bool                            bound;
  };
  
  #define nft_trans_set(trans)  \
        (((struct nft_trans_set *)trans->data)->set)
  #define nft_trans_set_id(trans)       \
        (((struct nft_trans_set *)trans->data)->set_id)
+ #define nft_trans_set_bound(trans)    \
+       (((struct nft_trans_set *)trans->data)->bound)
  
  struct nft_trans_chain {
        bool                            update;
@@@ -1396,12 -1386,15 +1398,15 @@@ struct nft_trans_table 
  struct nft_trans_elem {
        struct nft_set                  *set;
        struct nft_set_elem             elem;
+       bool                            bound;
  };
  
  #define nft_trans_elem_set(trans)     \
        (((struct nft_trans_elem *)trans->data)->set)
  #define nft_trans_elem(trans) \
        (((struct nft_trans_elem *)trans->data)->elem)
+ #define nft_trans_elem_set_bound(trans)       \
+       (((struct nft_trans_elem *)trans->data)->bound)
  
  struct nft_trans_obj {
        struct nft_object               *obj;
index 8a5969d9b80b95f8935369e7b0924dce3ac9eff0,c8b9dec376f56599a7403718727b0674681fbcdb..db104665a9e4ef3c69a5498b0f37a9e98fff9654
@@@ -9,7 -9,6 +9,7 @@@ struct nft_offload_reg 
        u32             len;
        u32             base_offset;
        u32             offset;
 +      struct nft_data data;
        struct nft_data mask;
  };
  
@@@ -64,10 -63,6 +64,10 @@@ struct nft_rule
  struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule);
  void nft_flow_rule_destroy(struct nft_flow_rule *flow);
  int nft_flow_rule_offload_commit(struct net *net);
 +void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
 +                                  flow_indr_block_bind_cb_t *cb,
 +                                  void *cb_priv,
 +                                  enum flow_block_command command);
  
  #define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg)               \
        (__reg)->base_offset    =                                       \
@@@ -78,4 -73,6 +78,6 @@@
        (__reg)->key            = __key;                                \
        memset(&(__reg)->mask, 0xff, (__reg)->len);
  
+ int nft_chain_offload_priority(struct nft_base_chain *basechain);
  #endif
diff --combined include/net/pkt_cls.h
index 0790a4ed909cb1e98688c649e9006458307db837,98be18ef1ed3400739120e354ba1c5082bc94770..64999ffcb486851e6058685d9e804e5ba5a6745b
@@@ -70,6 -70,15 +70,6 @@@ static inline struct Qdisc *tcf_block_q
        return block->q;
  }
  
 -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 -                              tc_indr_block_bind_cb_t *cb, void *cb_ident);
 -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 -                            tc_indr_block_bind_cb_t *cb, void *cb_ident);
 -void __tc_indr_block_cb_unregister(struct net_device *dev,
 -                                 tc_indr_block_bind_cb_t *cb, void *cb_ident);
 -void tc_indr_block_cb_unregister(struct net_device *dev,
 -                               tc_indr_block_bind_cb_t *cb, void *cb_ident);
 -
  int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
                 struct tcf_result *res, bool compat_mode);
  
@@@ -128,6 -137,32 +128,6 @@@ void tc_setup_cb_block_unregister(struc
  {
  }
  
 -static inline
 -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 -                              tc_indr_block_bind_cb_t *cb, void *cb_ident)
 -{
 -      return 0;
 -}
 -
 -static inline
 -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 -                            tc_indr_block_bind_cb_t *cb, void *cb_ident)
 -{
 -      return 0;
 -}
 -
 -static inline
 -void __tc_indr_block_cb_unregister(struct net_device *dev,
 -                                 tc_indr_block_bind_cb_t *cb, void *cb_ident)
 -{
 -}
 -
 -static inline
 -void tc_indr_block_cb_unregister(struct net_device *dev,
 -                               tc_indr_block_bind_cb_t *cb, void *cb_ident)
 -{
 -}
 -
  static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
                               struct tcf_result *res, bool compat_mode)
  {
@@@ -611,7 -646,7 +611,7 @@@ tc_cls_common_offload_init(struct flow_
  {
        cls_common->chain_index = tp->chain->index;
        cls_common->protocol = tp->protocol;
-       cls_common->prio = tp->prio;
+       cls_common->prio = tp->prio >> 16;
        if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE)
                cls_common->extack = extack;
  }
diff --combined include/uapi/linux/bpf.h
index 4393bd4b241973c1ad82edb70a40b63517a7f9ce,a5aa7d3ac6a116a7a8d3c08d840c37a70e42e7a4..0e66371bea13fdb93411c87aeb08a88615b5f8bb
@@@ -134,7 -134,6 +134,7 @@@ enum bpf_map_type 
        BPF_MAP_TYPE_QUEUE,
        BPF_MAP_TYPE_STACK,
        BPF_MAP_TYPE_SK_STORAGE,
 +      BPF_MAP_TYPE_DEVMAP_HASH,
  };
  
  /* Note that tracing related programs such as
@@@ -1467,8 -1466,8 +1467,8 @@@ union bpf_attr 
   *            If no cookie has been set yet, generate a new cookie. Once
   *            generated, the socket cookie remains stable for the life of the
   *            socket. This helper can be useful for monitoring per socket
-  *            networking traffic statistics as it provides a unique socket
-  *            identifier per namespace.
+  *            networking traffic statistics as it provides a global socket
+  *            identifier that can be assumed unique.
   *    Return
   *            A 8-byte long non-decreasing number on success, or 0 if the
   *            socket field is missing inside *skb*.
   *            **-EPERM** if no permission to send the *sig*.
   *
   *            **-EAGAIN** if bpf program can try again.
 + *
 + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
 + *    Description
 + *            Try to issue a SYN cookie for the packet with corresponding
 + *            IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
 + *
 + *            *iph* points to the start of the IPv4 or IPv6 header, while
 + *            *iph_len* contains **sizeof**\ (**struct iphdr**) or
 + *            **sizeof**\ (**struct ip6hdr**).
 + *
 + *            *th* points to the start of the TCP header, while *th_len*
 + *            contains the length of the TCP header.
 + *
 + *    Return
 + *            On success, lower 32 bits hold the generated SYN cookie in
 + *            followed by 16 bits which hold the MSS value for that cookie,
 + *            and the top 16 bits are unused.
 + *
 + *            On failure, the returned value is one of the following:
 + *
 + *            **-EINVAL** SYN cookie cannot be issued due to error
 + *
 + *            **-ENOENT** SYN cookie should not be issued (no SYN flood)
 + *
 + *            **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
 + *
 + *            **-EPROTONOSUPPORT** IP packet version is not 4 or 6
   */
  #define __BPF_FUNC_MAPPER(FN)         \
        FN(unspec),                     \
        FN(strtoul),                    \
        FN(sk_storage_get),             \
        FN(sk_storage_delete),          \
 -      FN(send_signal),
 +      FN(send_signal),                \
 +      FN(tcp_gen_syncookie),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@@ -3536,10 -3507,6 +3536,10 @@@ enum bpf_task_fd_type 
        BPF_FD_TYPE_URETPROBE,          /* filename + offset */
  };
  
 +#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG           (1U << 0)
 +#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL               (1U << 1)
 +#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP            (1U << 2)
 +
  struct bpf_flow_keys {
        __u16   nhoff;
        __u16   thoff;
                        __u32   ipv6_dst[4];    /* in6_addr; network order */
                };
        };
 +      __u32   flags;
 +      __be32  flow_label;
  };
  
  struct bpf_func_info {
diff --combined net/ipv4/tcp.c
index f8fa1686f7f3e64f5d4ea8163e7f87538cc0d672,77b485d60b9d0e00edc4e2f0d6c5bb3a9460b23b..051ef10374f69bf57dea4950c4a7fef303786a7b
@@@ -984,6 -984,9 +984,9 @@@ new_segment
                        if (!skb)
                                goto wait_for_memory;
  
+ #ifdef CONFIG_TLS_DEVICE
+                       skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+ #endif
                        skb_entail(sk, skb);
                        copy = size_goal;
                }
@@@ -1162,7 -1165,7 +1165,7 @@@ int tcp_sendmsg_locked(struct sock *sk
        struct sockcm_cookie sockc;
        int flags, err, copied = 0;
        int mss_now = 0, size_goal, copied_syn = 0;
 -      bool process_backlog = false;
 +      int process_backlog = 0;
        bool zc = false;
        long timeo;
  
@@@ -1254,10 -1257,9 +1257,10 @@@ new_segment
                        if (!sk_stream_memory_free(sk))
                                goto wait_for_sndbuf;
  
 -                      if (process_backlog && sk_flush_backlog(sk)) {
 -                              process_backlog = false;
 -                              goto restart;
 +                      if (unlikely(process_backlog >= 16)) {
 +                              process_backlog = 0;
 +                              if (sk_flush_backlog(sk))
 +                                      goto restart;
                        }
                        first_skb = tcp_rtx_and_write_queues_empty(sk);
                        skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
                        if (!skb)
                                goto wait_for_memory;
  
 -                      process_backlog = true;
 +                      process_backlog++;
                        skb->ip_summed = CHECKSUM_PARTIAL;
  
                        skb_entail(sk, skb);
@@@ -1777,21 -1779,19 +1780,21 @@@ static int tcp_zerocopy_receive(struct 
                                break;
                        frags = skb_shinfo(skb)->frags;
                        while (offset) {
 -                              if (frags->size > offset)
 +                              if (skb_frag_size(frags) > offset)
                                        goto out;
 -                              offset -= frags->size;
 +                              offset -= skb_frag_size(frags);
                                frags++;
                        }
                }
 -              if (frags->size != PAGE_SIZE || frags->page_offset) {
 +              if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
                        int remaining = zc->recv_skip_hint;
 +                      int size = skb_frag_size(frags);
  
 -                      while (remaining && (frags->size != PAGE_SIZE ||
 -                                           frags->page_offset)) {
 -                              remaining -= frags->size;
 +                      while (remaining && (size != PAGE_SIZE ||
 +                                           skb_frag_off(frags))) {
 +                              remaining -= size;
                                frags++;
 +                              size = skb_frag_size(frags);
                        }
                        zc->recv_skip_hint -= remaining;
                        break;
@@@ -3784,8 -3784,8 +3787,8 @@@ int tcp_md5_hash_skb_data(struct tcp_md
                return 1;
  
        for (i = 0; i < shi->nr_frags; ++i) {
 -              const struct skb_frag_struct *f = &shi->frags[i];
 -              unsigned int offset = f->page_offset;
 +              const skb_frag_t *f = &shi->frags[i];
 +              unsigned int offset = skb_frag_off(f);
                struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
  
                sg_set_page(&sg, page, skb_frag_size(f),
diff --combined net/ipv4/tcp_output.c
index e6d02e05bb1c9fae3ac05ce0ab70a8308e800589,979520e46e33c16ba2237cb36ef7b4dab94f6546..5c46bc4c7e8d26397c304c9dc69d9e559cd72337
@@@ -1320,6 -1320,7 +1320,7 @@@ int tcp_fragment(struct sock *sk, enum 
        buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
        if (!buff)
                return -ENOMEM; /* We'll just try again later. */
+       skb_copy_decrypted(buff, skb);
  
        sk->sk_wmem_queued += buff->truesize;
        sk_mem_charge(sk, buff->truesize);
@@@ -1402,7 -1403,7 +1403,7 @@@ static int __pskb_trim_head(struct sk_b
                } else {
                        shinfo->frags[k] = shinfo->frags[i];
                        if (eat) {
 -                              shinfo->frags[k].page_offset += eat;
 +                              skb_frag_off_add(&shinfo->frags[k], eat);
                                skb_frag_size_sub(&shinfo->frags[k], eat);
                                eat = 0;
                        }
@@@ -1874,6 -1875,7 +1875,7 @@@ static int tso_fragment(struct sock *sk
        buff = sk_stream_alloc_skb(sk, 0, gfp, true);
        if (unlikely(!buff))
                return -ENOMEM;
+       skb_copy_decrypted(buff, skb);
  
        sk->sk_wmem_queued += buff->truesize;
        sk_mem_charge(sk, buff->truesize);
@@@ -2143,6 -2145,7 +2145,7 @@@ static int tcp_mtu_probe(struct sock *s
        sk_mem_charge(sk, nskb->truesize);
  
        skb = tcp_send_head(sk);
+       skb_copy_decrypted(nskb, skb);
  
        TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
        TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
index fe3b7b0c6c662df5f5b4ec858a33c275b1e281e9,d47469f824a10a8628c165cedef587a49528c725..6d00bef023c40a3131228a4f61667d8a3cc3a1b2
@@@ -138,9 -138,14 +138,14 @@@ static void nft_set_trans_bind(const st
                return;
  
        list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
-               if (trans->msg_type == NFT_MSG_NEWSET &&
-                   nft_trans_set(trans) == set) {
-                       set->bound = true;
+               switch (trans->msg_type) {
+               case NFT_MSG_NEWSET:
+                       if (nft_trans_set(trans) == set)
+                               nft_trans_set_bound(trans) = true;
+                       break;
+               case NFT_MSG_NEWSETELEM:
+                       if (nft_trans_elem_set(trans) == set)
+                               nft_trans_elem_set_bound(trans) = true;
                        break;
                }
        }
@@@ -1662,6 -1667,10 +1667,10 @@@ static int nf_tables_addchain(struct nf
  
                chain->flags |= NFT_BASE_CHAIN | flags;
                basechain->policy = NF_ACCEPT;
+               if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+                   nft_chain_offload_priority(basechain) < 0)
+                       return -EOPNOTSUPP;
                flow_block_init(&basechain->flow_block);
        } else {
                chain = kzalloc(sizeof(*chain), GFP_KERNEL);
@@@ -6906,7 -6915,7 +6915,7 @@@ static int __nf_tables_abort(struct ne
                        break;
                case NFT_MSG_NEWSET:
                        trans->ctx.table->use--;
-                       if (nft_trans_set(trans)->bound) {
+                       if (nft_trans_set_bound(trans)) {
                                nft_trans_destroy(trans);
                                break;
                        }
                        nft_trans_destroy(trans);
                        break;
                case NFT_MSG_NEWSETELEM:
-                       if (nft_trans_elem_set(trans)->bound) {
+                       if (nft_trans_elem_set_bound(trans)) {
                                nft_trans_destroy(trans);
                                break;
                        }
@@@ -7593,11 -7602,6 +7602,11 @@@ static struct pernet_operations nf_tabl
        .exit   = nf_tables_exit_net,
  };
  
 +static struct flow_indr_block_ing_entry block_ing_entry = {
 +      .cb = nft_indr_block_get_and_ing_cmd,
 +      .list = LIST_HEAD_INIT(block_ing_entry.list),
 +};
 +
  static int __init nf_tables_module_init(void)
  {
        int err;
                goto err5;
  
        nft_chain_route_init();
 +      flow_indr_add_block_ing_cb(&block_ing_entry);
        return err;
  err5:
        rhltable_destroy(&nft_objname_ht);
@@@ -7646,7 -7649,6 +7655,7 @@@ err1
  
  static void __exit nf_tables_module_exit(void)
  {
 +      flow_indr_del_block_ing_cb(&block_ing_entry);
        nfnetlink_subsys_unregister(&nf_tables_subsys);
        unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
        nft_chain_filter_fini();
index d3c4c9c88bc8ed44db13727d8422805256876a57,c0d18c1d77ac05e0c9b1f98a33f2e18616bc3add..3c2725ade61b208170b0d897d7f003046fe52a7f
@@@ -103,10 -103,11 +103,11 @@@ void nft_offload_update_dependency(stru
  }
  
  static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
-                                        __be16 proto,
-                                       struct netlink_ext_ack *extack)
+                                        __be16 proto, int priority,
+                                        struct netlink_ext_ack *extack)
  {
        common->protocol = proto;
+       common->prio = priority;
        common->extack = extack;
  }
  
@@@ -124,6 -125,15 +125,15 @@@ static int nft_setup_cb_call(struct nft
        return 0;
  }
  
+ int nft_chain_offload_priority(struct nft_base_chain *basechain)
+ {
+       if (basechain->ops.priority <= 0 ||
+           basechain->ops.priority > USHRT_MAX)
+               return -1;
+       return 0;
+ }
  static int nft_flow_offload_rule(struct nft_trans *trans,
                                 enum flow_cls_command command)
  {
        if (flow)
                proto = flow->proto;
  
-       nft_flow_offload_common_init(&cls_flow.common, proto, &extack);
+       nft_flow_offload_common_init(&cls_flow.common, proto,
+                                    basechain->ops.priority, &extack);
        cls_flow.command = command;
        cls_flow.cookie = (unsigned long) rule;
        if (flow)
@@@ -171,110 -182,24 +182,110 @@@ static int nft_flow_offload_unbind(stru
        return 0;
  }
  
 +static int nft_block_setup(struct nft_base_chain *basechain,
 +                         struct flow_block_offload *bo,
 +                         enum flow_block_command cmd)
 +{
 +      int err;
 +
 +      switch (cmd) {
 +      case FLOW_BLOCK_BIND:
 +              err = nft_flow_offload_bind(bo, basechain);
 +              break;
 +      case FLOW_BLOCK_UNBIND:
 +              err = nft_flow_offload_unbind(bo, basechain);
 +              break;
 +      default:
 +              WARN_ON_ONCE(1);
 +              err = -EOPNOTSUPP;
 +      }
 +
 +      return err;
 +}
 +
 +static int nft_block_offload_cmd(struct nft_base_chain *chain,
 +                               struct net_device *dev,
 +                               enum flow_block_command cmd)
 +{
 +      struct netlink_ext_ack extack = {};
 +      struct flow_block_offload bo = {};
 +      int err;
 +
 +      bo.net = dev_net(dev);
 +      bo.block = &chain->flow_block;
 +      bo.command = cmd;
 +      bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 +      bo.extack = &extack;
 +      INIT_LIST_HEAD(&bo.cb_list);
 +
 +      err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 +      if (err < 0)
 +              return err;
 +
 +      return nft_block_setup(chain, &bo, cmd);
 +}
 +
 +static void nft_indr_block_ing_cmd(struct net_device *dev,
 +                                 struct nft_base_chain *chain,
 +                                 flow_indr_block_bind_cb_t *cb,
 +                                 void *cb_priv,
 +                                 enum flow_block_command cmd)
 +{
 +      struct netlink_ext_ack extack = {};
 +      struct flow_block_offload bo = {};
 +
 +      if (!chain)
 +              return;
 +
 +      bo.net = dev_net(dev);
 +      bo.block = &chain->flow_block;
 +      bo.command = cmd;
 +      bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 +      bo.extack = &extack;
 +      INIT_LIST_HEAD(&bo.cb_list);
 +
 +      cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
 +
 +      nft_block_setup(chain, &bo, cmd);
 +}
 +
 +static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
 +                                    struct net_device *dev,
 +                                    enum flow_block_command cmd)
 +{
 +      struct flow_block_offload bo = {};
 +      struct netlink_ext_ack extack = {};
 +
 +      bo.net = dev_net(dev);
 +      bo.block = &chain->flow_block;
 +      bo.command = cmd;
 +      bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 +      bo.extack = &extack;
 +      INIT_LIST_HEAD(&bo.cb_list);
 +
 +      flow_indr_block_call(dev, &bo, cmd);
 +
 +      if (list_empty(&bo.cb_list))
 +              return -EOPNOTSUPP;
 +
 +      return nft_block_setup(chain, &bo, cmd);
 +}
 +
  #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
  
  static int nft_flow_offload_chain(struct nft_trans *trans,
                                  enum flow_block_command cmd)
  {
        struct nft_chain *chain = trans->ctx.chain;
 -      struct netlink_ext_ack extack = {};
 -      struct flow_block_offload bo = {};
        struct nft_base_chain *basechain;
        struct net_device *dev;
 -      int err;
  
        if (!nft_is_base_chain(chain))
                return -EOPNOTSUPP;
  
        basechain = nft_base_chain(chain);
        dev = basechain->ops.dev;
 -      if (!dev || !dev->netdev_ops->ndo_setup_tc)
 +      if (!dev)
                return -EOPNOTSUPP;
  
        /* Only default policy to accept is supported for now. */
            nft_trans_chain_policy(trans) != NF_ACCEPT)
                return -EOPNOTSUPP;
  
 -      bo.command = cmd;
 -      bo.block = &basechain->flow_block;
 -      bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 -      bo.extack = &extack;
 -      INIT_LIST_HEAD(&bo.cb_list);
 -
 -      err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
 -      if (err < 0)
 -              return err;
 -
 -      switch (cmd) {
 -      case FLOW_BLOCK_BIND:
 -              err = nft_flow_offload_bind(&bo, basechain);
 -              break;
 -      case FLOW_BLOCK_UNBIND:
 -              err = nft_flow_offload_unbind(&bo, basechain);
 -              break;
 -      }
 -
 -      return err;
 +      if (dev->netdev_ops->ndo_setup_tc)
 +              return nft_block_offload_cmd(basechain, dev, cmd);
 +      else
 +              return nft_indr_block_offload_cmd(basechain, dev, cmd);
  }
  
  int nft_flow_rule_offload_commit(struct net *net)
  
        return err;
  }
 +
 +void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
 +                                  flow_indr_block_bind_cb_t *cb,
 +                                  void *cb_priv,
 +                                  enum flow_block_command command)
 +{
 +      struct net *net = dev_net(dev);
 +      const struct nft_table *table;
 +      const struct nft_chain *chain;
 +
 +      list_for_each_entry_rcu(table, &net->nft.tables, list) {
 +              if (table->family != NFPROTO_NETDEV)
 +                      continue;
 +
 +              list_for_each_entry_rcu(chain, &table->chains, list) {
 +                      if (nft_is_base_chain(chain)) {
 +                              struct nft_base_chain *basechain;
 +
 +                              basechain = nft_base_chain(chain);
 +                              if (!strncmp(basechain->dev_name, dev->name,
 +                                           IFNAMSIZ)) {
 +                                      nft_indr_block_ing_cmd(dev, basechain,
 +                                                             cb, cb_priv,
 +                                                             command);
 +                                      return;
 +                              }
 +                      }
 +              }
 +      }
 +}
diff --combined net/rxrpc/ar-internal.h
index 63b26baa108a612b8937b94169cb93567c70915f,145335611af66b1e6fcd261cc532be0a72aaa6ae..fa5b030acaa85f09dd1fd4035b7775c7ada3d488
@@@ -226,9 -226,6 +226,9 @@@ struct rxrpc_security 
        int (*verify_packet)(struct rxrpc_call *, struct sk_buff *,
                             unsigned int, unsigned int, rxrpc_seq_t, u16);
  
 +      /* Free crypto request on a call */
 +      void (*free_call_crypto)(struct rxrpc_call *);
 +
        /* Locate the data in a received packet that has been verified. */
        void (*locate_data)(struct rxrpc_call *, struct sk_buff *,
                            unsigned int *, unsigned int *);
   */
  struct rxrpc_local {
        struct rcu_head         rcu;
-       atomic_t                usage;
+       atomic_t                active_users;   /* Number of users of the local endpoint */
+       atomic_t                usage;          /* Number of references to the structure */
        struct rxrpc_net        *rxnet;         /* The network ns in which this resides */
        struct list_head        link;
        struct socket           *socket;        /* my UDP socket */
@@@ -560,7 -558,6 +561,7 @@@ struct rxrpc_call 
        unsigned long           expect_term_by; /* When we expect call termination by */
        u32                     next_rx_timo;   /* Timeout for next Rx packet (jif) */
        u32                     next_req_timo;  /* Timeout for next Rx request packet (jif) */
 +      struct skcipher_request *cipher_req;    /* Packet cipher request buffer */
        struct timer_list       timer;          /* Combined event timer */
        struct work_struct      processor;      /* Event processor */
        rxrpc_notify_rx_t       notify_rx;      /* kernel service Rx notification function */
  
        /* receive-phase ACK management */
        u8                      ackr_reason;    /* reason to ACK */
-       u16                     ackr_skew;      /* skew on packet being ACK'd */
        rxrpc_serial_t          ackr_serial;    /* serial of packet being ACK'd */
        rxrpc_serial_t          ackr_first_seq; /* first sequence number received */
        rxrpc_seq_t             ackr_prev_seq;  /* previous sequence number received */
@@@ -747,7 -743,7 +747,7 @@@ int rxrpc_reject_call(struct rxrpc_soc
  /*
   * call_event.c
   */
- void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
+ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool,
                       enum rxrpc_propose_ack_trace);
  void rxrpc_process_call(struct work_struct *);
  
@@@ -1006,6 -1002,8 +1006,8 @@@ struct rxrpc_local *rxrpc_lookup_local(
  struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *);
  struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *);
  void rxrpc_put_local(struct rxrpc_local *);
+ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *);
+ void rxrpc_unuse_local(struct rxrpc_local *);
  void rxrpc_queue_local(struct rxrpc_local *);
  void rxrpc_destroy_all_locals(struct rxrpc_net *);
  
diff --combined net/sched/sch_taprio.c
index 046fd2c102b4ce9e890a7e8772d415f7cc40f6cf,e25d414ae12fdd9a7c4b8ea1903293e36e4ae12a..540bde009ea51b9a02fe96fdc03f5fdd266a7ef8
@@@ -677,6 -677,10 +677,6 @@@ static const struct nla_policy entry_po
        [TCA_TAPRIO_SCHED_ENTRY_INTERVAL]  = { .type = NLA_U32 },
  };
  
 -static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
 -      [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
 -};
 -
  static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
        [TCA_TAPRIO_ATTR_PRIOMAP]              = {
                .len = sizeof(struct tc_mqprio_qopt)
@@@ -1191,7 -1195,8 +1191,8 @@@ unlock
        spin_unlock_bh(qdisc_lock(sch));
  
  free_sched:
-       kfree(new_admin);
+       if (new_admin)
+               call_rcu(&new_admin->rcu, taprio_free_sched_cb);
  
        return err;
  }
diff --combined net/tipc/link.c
index 289e848084ac23810091639365c0bacd58dbb50e,c2c5c53cad22e26e156bfe13b70fa369a32ee620..6cc75ffd9e2c2741c7734c173609bc0f30ce679e
@@@ -106,8 -106,6 +106,6 @@@ struct tipc_stats 
   * @transmitq: queue for sent, non-acked messages
   * @backlogq: queue for messages waiting to be sent
   * @snt_nxt: next sequence number to use for outbound messages
-  * @prev_from: sequence number of most previous retransmission request
-  * @stale_limit: time when repeated identical retransmits must force link reset
   * @ackers: # of peers that needs to ack each packet before it can be released
   * @acked: # last packet acked by a certain peer. Used for broadcast.
   * @rcv_nxt: next sequence number to expect for inbound messages
@@@ -164,9 -162,7 +162,7 @@@ struct tipc_link 
                u16 limit;
        } backlog[5];
        u16 snd_nxt;
-       u16 prev_from;
        u16 window;
-       unsigned long stale_limit;
  
        /* Reception */
        u16 rcv_nxt;
  
        /* Fragmentation/reassembly */
        struct sk_buff *reasm_buf;
 +      struct sk_buff *reasm_tnlmsg;
  
        /* Broadcast */
        u16 ackers;
@@@ -854,31 -849,18 +850,31 @@@ static int link_schedule_user(struct ti
   */
  static void link_prepare_wakeup(struct tipc_link *l)
  {
 +      struct sk_buff_head *wakeupq = &l->wakeupq;
 +      struct sk_buff_head *inputq = l->inputq;
        struct sk_buff *skb, *tmp;
 -      int imp, i = 0;
 +      struct sk_buff_head tmpq;
 +      int avail[5] = {0,};
 +      int imp = 0;
 +
 +      __skb_queue_head_init(&tmpq);
 +
 +      for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++)
 +              avail[imp] = l->backlog[imp].limit - l->backlog[imp].len;
  
 -      skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
 +      skb_queue_walk_safe(wakeupq, skb, tmp) {
                imp = TIPC_SKB_CB(skb)->chain_imp;
 -              if (l->backlog[imp].len < l->backlog[imp].limit) {
 -                      skb_unlink(skb, &l->wakeupq);
 -                      skb_queue_tail(l->inputq, skb);
 -              } else if (i++ > 10) {
 -                      break;
 -              }
 +              if (avail[imp] <= 0)
 +                      continue;
 +              avail[imp]--;
 +              __skb_unlink(skb, wakeupq);
 +              __skb_queue_tail(&tmpq, skb);
        }
 +
 +      spin_lock_bh(&inputq->lock);
 +      skb_queue_splice_tail(&tmpq, inputq);
 +      spin_unlock_bh(&inputq->lock);
 +
  }
  
  void tipc_link_reset(struct tipc_link *l)
        l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
        l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
        kfree_skb(l->reasm_buf);
 +      kfree_skb(l->reasm_tnlmsg);
        kfree_skb(l->failover_reasm_skb);
        l->reasm_buf = NULL;
 +      l->reasm_tnlmsg = NULL;
        l->failover_reasm_skb = NULL;
        l->rcv_unacked = 0;
        l->snd_nxt = 1;
@@@ -956,10 -936,7 +952,10 @@@ int tipc_link_xmit(struct tipc_link *l
        int rc = 0;
  
        if (unlikely(msg_size(hdr) > mtu)) {
 -              skb_queue_purge(list);
 +              pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n",
 +                      skb_queue_len(list), msg_user(hdr),
 +                      msg_type(hdr), msg_size(hdr), mtu);
 +              __skb_queue_purge(list);
                return -EMSGSIZE;
        }
  
                if (likely(skb_queue_len(transmq) < maxwin)) {
                        _skb = skb_clone(skb, GFP_ATOMIC);
                        if (!_skb) {
 -                              skb_queue_purge(list);
 +                              __skb_queue_purge(list);
                                return -ENOBUFS;
                        }
                        __skb_dequeue(list);
@@@ -1063,47 -1040,53 +1059,53 @@@ static void tipc_link_advance_backlog(s
   * link_retransmit_failure() - Detect repeated retransmit failures
   * @l: tipc link sender
   * @r: tipc link receiver (= l in case of unicast)
-  * @from: seqno of the 1st packet in retransmit request
   * @rc: returned code
   *
   * Return: true if the repeated retransmit failures happens, otherwise
   * false
   */
  static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
-                                   u16 from, int *rc)
+                                   int *rc)
  {
        struct sk_buff *skb = skb_peek(&l->transmq);
        struct tipc_msg *hdr;
  
        if (!skb)
                return false;
-       hdr = buf_msg(skb);
  
-       /* Detect repeated retransmit failures on same packet */
-       if (r->prev_from != from) {
-               r->prev_from = from;
-               r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
-       } else if (time_after(jiffies, r->stale_limit)) {
-               pr_warn("Retransmission failure on link <%s>\n", l->name);
-               link_print(l, "State of link ");
-               pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
-                       msg_user(hdr), msg_type(hdr), msg_size(hdr),
-                       msg_errcode(hdr));
-               pr_info("sqno %u, prev: %x, src: %x\n",
-                       msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
-               trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
-               trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
-               trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+       if (!TIPC_SKB_CB(skb)->retr_cnt)
+               return false;
  
-               if (link_is_bc_sndlink(l))
-                       *rc = TIPC_LINK_DOWN_EVT;
+       if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
+                       msecs_to_jiffies(r->tolerance)))
+               return false;
+       hdr = buf_msg(skb);
+       if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr)))
+               return false;
  
+       pr_warn("Retransmission failure on link <%s>\n", l->name);
+       link_print(l, "State of link ");
+       pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
+               msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
+       pr_info("sqno %u, prev: %x, dest: %x\n",
+               msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr));
+       pr_info("retr_stamp %d, retr_cnt %d\n",
+               jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp),
+               TIPC_SKB_CB(skb)->retr_cnt);
+       trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
+       trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
+       trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+       if (link_is_bc_sndlink(l)) {
+               r->state = LINK_RESET;
+               *rc = TIPC_LINK_DOWN_EVT;
+       } else {
                *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
-               return true;
        }
  
-       return false;
+       return true;
  }
  
  /* tipc_link_bc_retrans() - retransmit zero or more packets
@@@ -1129,7 -1112,7 +1131,7 @@@ static int tipc_link_bc_retrans(struct 
  
        trace_tipc_link_retrans(r, from, to, &l->transmq);
  
-       if (link_retransmit_failure(l, r, from, &rc))
+       if (link_retransmit_failure(l, r, &rc))
                return rc;
  
        skb_queue_walk(&l->transmq, skb) {
                        continue;
                if (more(msg_seqno(hdr), to))
                        break;
-               if (link_is_bc_sndlink(l)) {
-                       if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
-                               continue;
-                       TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
-               }
+               if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
+                       continue;
+               TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
                _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC);
                if (!_skb)
                        return 0;
                _skb->priority = TC_PRIO_CONTROL;
                __skb_queue_tail(xmitq, _skb);
                l->stats.retransmitted++;
+               /* Increase actual retrans counter & mark first time */
+               if (!TIPC_SKB_CB(skb)->retr_cnt++)
+                       TIPC_SKB_CB(skb)->retr_stamp = jiffies;
        }
        return 0;
  }
@@@ -1252,7 -1238,6 +1257,7 @@@ static int tipc_link_tnl_rcv(struct tip
                             struct sk_buff_head *inputq)
  {
        struct sk_buff **reasm_skb = &l->failover_reasm_skb;
 +      struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg;
        struct sk_buff_head *fdefq = &l->failover_deferdq;
        struct tipc_msg *hdr = buf_msg(skb);
        struct sk_buff *iskb;
        int rc = 0;
        u16 seqno;
  
 -      /* SYNCH_MSG */
 -      if (msg_type(hdr) == SYNCH_MSG)
 -              goto drop;
 +      if (msg_type(hdr) == SYNCH_MSG) {
 +              kfree_skb(skb);
 +              return 0;
 +      }
  
 -      /* FAILOVER_MSG */
 -      if (!tipc_msg_extract(skb, &iskb, &ipos)) {
 -              pr_warn_ratelimited("Cannot extract FAILOVER_MSG, defq: %d\n",
 -                                  skb_queue_len(fdefq));
 -              return rc;
 +      /* Not a fragment? */
 +      if (likely(!msg_nof_fragms(hdr))) {
 +              if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) {
 +                      pr_warn_ratelimited("Unable to extract msg, defq: %d\n",
 +                                          skb_queue_len(fdefq));
 +                      return 0;
 +              }
 +              kfree_skb(skb);
 +      } else {
 +              /* Set fragment type for buf_append */
 +              if (msg_fragm_no(hdr) == 1)
 +                      msg_set_type(hdr, FIRST_FRAGMENT);
 +              else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr))
 +                      msg_set_type(hdr, FRAGMENT);
 +              else
 +                      msg_set_type(hdr, LAST_FRAGMENT);
 +
 +              if (!tipc_buf_append(reasm_tnlmsg, &skb)) {
 +                      /* Successful but non-complete reassembly? */
 +                      if (*reasm_tnlmsg || link_is_bc_rcvlink(l))
 +                              return 0;
 +                      pr_warn_ratelimited("Unable to reassemble tunnel msg\n");
 +                      return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 +              }
 +              iskb = skb;
        }
  
        do {
                seqno = buf_seqno(iskb);
 -
                if (unlikely(less(seqno, l->drop_point))) {
                        kfree_skb(iskb);
                        continue;
                }
 -
                if (unlikely(seqno != l->drop_point)) {
                        __tipc_skb_queue_sorted(fdefq, seqno, iskb);
                        continue;
                }
  
                l->drop_point++;
 -
                if (!tipc_data_input(l, iskb, inputq))
                        rc |= tipc_link_input(l, iskb, inputq, reasm_skb);
                if (unlikely(rc))
                        break;
        } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point)));
  
 -drop:
 -      kfree_skb(skb);
        return rc;
  }
  
@@@ -1393,12 -1362,10 +1398,10 @@@ static int tipc_link_advance_transmq(st
        struct tipc_msg *hdr;
        u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
        u16 ack = l->rcv_nxt - 1;
+       bool passed = false;
        u16 seqno, n = 0;
        int rc = 0;
  
-       if (gap && link_retransmit_failure(l, l, acked + 1, &rc))
-               return rc;
        skb_queue_walk_safe(&l->transmq, skb, tmp) {
                seqno = buf_seqno(skb);
  
@@@ -1408,12 -1375,17 +1411,17 @@@ next_gap_ack
                        __skb_unlink(skb, &l->transmq);
                        kfree_skb(skb);
                } else if (less_eq(seqno, acked + gap)) {
-                       /* retransmit skb */
+                       /* First, check if repeated retrans failures occurs? */
+                       if (!passed && link_retransmit_failure(l, l, &rc))
+                               return rc;
+                       passed = true;
+                       /* retransmit skb if unrestricted*/
                        if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
                                continue;
                        TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
-                       _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
+                       _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE,
+                                          GFP_ATOMIC);
                        if (!_skb)
                                continue;
                        hdr = buf_msg(_skb);
                        _skb->priority = TC_PRIO_CONTROL;
                        __skb_queue_tail(xmitq, _skb);
                        l->stats.retransmitted++;
+                       /* Increase actual retrans counter & mark first time */
+                       if (!TIPC_SKB_CB(skb)->retr_cnt++)
+                               TIPC_SKB_CB(skb)->retr_stamp = jiffies;
                } else {
                        /* retry with Gap ACK blocks if any */
                        if (!ga || n >= ga->gack_cnt)
@@@ -1668,7 -1644,7 +1680,7 @@@ void tipc_link_create_dummy_tnl_msg(str
        struct sk_buff *skb;
        u32 dnode = l->addr;
  
 -      skb_queue_head_init(&tnlq);
 +      __skb_queue_head_init(&tnlq);
        skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG,
                              INT_H_SIZE, BASIC_H_SIZE,
                              dnode, onode, 0, 0, 0);
@@@ -1699,18 -1675,14 +1711,18 @@@ void tipc_link_tnl_prepare(struct tipc_
        struct sk_buff *skb, *tnlskb;
        struct tipc_msg *hdr, tnlhdr;
        struct sk_buff_head *queue = &l->transmq;
 -      struct sk_buff_head tmpxq, tnlq;
 +      struct sk_buff_head tmpxq, tnlq, frags;
        u16 pktlen, pktcnt, seqno = l->snd_nxt;
 +      bool pktcnt_need_update = false;
 +      u16 syncpt;
 +      int rc;
  
        if (!tnl)
                return;
  
 -      skb_queue_head_init(&tnlq);
 -      skb_queue_head_init(&tmpxq);
 +      __skb_queue_head_init(&tnlq);
 +      __skb_queue_head_init(&tmpxq);
 +      __skb_queue_head_init(&frags);
  
        /* At least one packet required for safe algorithm => add dummy */
        skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
                pr_warn("%sunable to create tunnel packet\n", link_co_err);
                return;
        }
 -      skb_queue_tail(&tnlq, skb);
 +      __skb_queue_tail(&tnlq, skb);
        tipc_link_xmit(l, &tnlq, &tmpxq);
        __skb_queue_purge(&tmpxq);
  
 +      /* Link Synching:
 +       * From now on, send only one single ("dummy") SYNCH message
 +       * to peer. The SYNCH message does not contain any data, just
 +       * a header conveying the synch point to the peer.
 +       */
 +      if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
 +              tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG,
 +                                       INT_H_SIZE, 0, l->addr,
 +                                       tipc_own_addr(l->net),
 +                                       0, 0, 0);
 +              if (!tnlskb) {
 +                      pr_warn("%sunable to create dummy SYNCH_MSG\n",
 +                              link_co_err);
 +                      return;
 +              }
 +
 +              hdr = buf_msg(tnlskb);
 +              syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1;
 +              msg_set_syncpt(hdr, syncpt);
 +              msg_set_bearer_id(hdr, l->peer_bearer_id);
 +              __skb_queue_tail(&tnlq, tnlskb);
 +              tipc_link_xmit(tnl, &tnlq, xmitq);
 +              return;
 +      }
 +
        /* Initialize reusable tunnel packet header */
        tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
                      mtyp, INT_H_SIZE, l->addr);
                if (queue == &l->backlogq)
                        msg_set_seqno(hdr, seqno++);
                pktlen = msg_size(hdr);
 +
 +              /* Tunnel link MTU is not large enough? This could be
 +               * due to:
 +               * 1) Link MTU has just changed or set differently;
 +               * 2) Or FAILOVER on the top of a SYNCH message
 +               *
 +               * The 2nd case should not happen if peer supports
 +               * TIPC_TUNNEL_ENHANCED
 +               */
 +              if (pktlen > tnl->mtu - INT_H_SIZE) {
 +                      if (mtyp == FAILOVER_MSG &&
 +                          (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
 +                              rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu,
 +                                                     &frags);
 +                              if (rc) {
 +                                      pr_warn("%sunable to frag msg: rc %d\n",
 +                                              link_co_err, rc);
 +                                      return;
 +                              }
 +                              pktcnt += skb_queue_len(&frags) - 1;
 +                              pktcnt_need_update = true;
 +                              skb_queue_splice_tail_init(&frags, &tnlq);
 +                              continue;
 +                      }
 +                      /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED
 +                       * => Just warn it and return!
 +                       */
 +                      pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n",
 +                                          link_co_err, msg_user(hdr),
 +                                          msg_type(hdr), msg_size(hdr));
 +                      return;
 +              }
 +
                msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
                tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC);
                if (!tnlskb) {
                goto tnl;
        }
  
 +      if (pktcnt_need_update)
 +              skb_queue_walk(&tnlq, skb) {
 +                      hdr = buf_msg(skb);
 +                      msg_set_msgcnt(hdr, pktcnt);
 +              }
 +
        tipc_link_xmit(tnl, &tnlq, xmitq);
  
        if (mtyp == FAILOVER_MSG) {
@@@ -2681,7 -2589,7 +2693,7 @@@ int tipc_link_dump(struct tipc_link *l
        i += scnprintf(buf + i, sz - i, " %x", l->peer_caps);
        i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt);
        i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt);
-       i += scnprintf(buf + i, sz - i, " %u", l->prev_from);
+       i += scnprintf(buf + i, sz - i, " %u", 0);
        i += scnprintf(buf + i, sz - i, " %u", 0);
        i += scnprintf(buf + i, sz - i, " %u", l->acked);
  
diff --combined net/tipc/msg.h
index 1c8c8dd32a4e98c6326041e210dd14c216c398aa,d7ebc9e955f6a35fe6b6ac058a48c30c6218fbdc..0daa6f04ca812cfc378b211909a48af77f9739c1
@@@ -102,13 -102,15 +102,15 @@@ struct plist
  #define TIPC_MEDIA_INFO_OFFSET        5
  
  struct tipc_skb_cb {
-       u32 bytes_read;
-       u32 orig_member;
        struct sk_buff *tail;
        unsigned long nxt_retr;
-       bool validated;
+       unsigned long retr_stamp;
+       u32 bytes_read;
+       u32 orig_member;
        u16 chain_imp;
        u16 ackers;
+       u16 retr_cnt;
+       bool validated;
  };
  
  #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
@@@ -721,26 -723,12 +723,26 @@@ static inline void msg_set_last_bcast(s
        msg_set_bits(m, 4, 16, 0xffff, n);
  }
  
 +static inline u32 msg_nof_fragms(struct tipc_msg *m)
 +{
 +      return msg_bits(m, 4, 0, 0xffff);
 +}
 +
 +static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n)
 +{
 +      msg_set_bits(m, 4, 0, 0xffff, n);
 +}
 +
 +static inline u32 msg_fragm_no(struct tipc_msg *m)
 +{
 +      return msg_bits(m, 4, 16, 0xffff);
 +}
 +
  static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n)
  {
        msg_set_bits(m, 4, 16, 0xffff, n);
  }
  
 -
  static inline u16 msg_next_sent(struct tipc_msg *m)
  {
        return msg_bits(m, 4, 0, 0xffff);
@@@ -891,16 -879,6 +893,16 @@@ static inline void msg_set_msgcnt(struc
        msg_set_bits(m, 9, 16, 0xffff, n);
  }
  
 +static inline u16 msg_syncpt(struct tipc_msg *m)
 +{
 +      return msg_bits(m, 9, 16, 0xffff);
 +}
 +
 +static inline void msg_set_syncpt(struct tipc_msg *m, u16 n)
 +{
 +      msg_set_bits(m, 9, 16, 0xffff, n);
 +}
 +
  static inline u32 msg_conn_ack(struct tipc_msg *m)
  {
        return msg_bits(m, 9, 16, 0xffff);
@@@ -1059,8 -1037,6 +1061,8 @@@ bool tipc_msg_bundle(struct sk_buff *sk
  bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
                          u32 mtu, u32 dnode);
  bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
 +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
 +                    int pktmax, struct sk_buff_head *frags);
  int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
                   int offset, int dsz, int mtu, struct sk_buff_head *list);
  bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
diff --combined net/tls/tls_device.c
index d184230665eb649305f39f7a08779503cc57e585,43922d86e5109f9b19dab68be4c950e7440ad7e8..a470df7ffcf9a028f43c8e5f0ce482a42ce99f7f
@@@ -243,14 -243,14 +243,14 @@@ static void tls_append_frag(struct tls_
        skb_frag_t *frag;
  
        frag = &record->frags[record->num_frags - 1];
 -      if (frag->page.p == pfrag->page &&
 -          frag->page_offset + frag->size == pfrag->offset) {
 -              frag->size += size;
 +      if (skb_frag_page(frag) == pfrag->page &&
 +          skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) {
 +              skb_frag_size_add(frag, size);
        } else {
                ++frag;
 -              frag->page.p = pfrag->page;
 -              frag->page_offset = pfrag->offset;
 -              frag->size = size;
 +              __skb_frag_set_page(frag, pfrag->page);
 +              skb_frag_off_set(frag, pfrag->offset);
 +              skb_frag_size_set(frag, size);
                ++record->num_frags;
                get_page(pfrag->page);
        }
@@@ -301,8 -301,8 +301,8 @@@ static int tls_push_record(struct sock 
                frag = &record->frags[i];
                sg_unmark_end(&offload_ctx->sg_tx_data[i]);
                sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag),
 -                          frag->size, frag->page_offset);
 -              sk_mem_charge(sk, frag->size);
 +                          skb_frag_size(frag), skb_frag_off(frag));
 +              sk_mem_charge(sk, skb_frag_size(frag));
                get_page(skb_frag_page(frag));
        }
        sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]);
@@@ -324,7 -324,7 +324,7 @@@ static int tls_create_new_record(struc
  
        frag = &record->frags[0];
        __skb_frag_set_page(frag, pfrag->page);
 -      frag->page_offset = pfrag->offset;
 +      skb_frag_off_set(frag, pfrag->offset);
        skb_frag_size_set(frag, prepend_size);
  
        get_page(pfrag->page);
@@@ -373,9 -373,9 +373,9 @@@ static int tls_push_data(struct sock *s
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_prot_info *prot = &tls_ctx->prot_info;
        struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
-       int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
        int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
        struct tls_record_info *record = ctx->open_record;
+       int tls_push_record_flags;
        struct page_frag *pfrag;
        size_t orig_size = size;
        u32 max_open_record_len;
        if (sk->sk_err)
                return -sk->sk_err;
  
+       flags |= MSG_SENDPAGE_DECRYPTED;
+       tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
        timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
        if (tls_is_partially_sent_record(tls_ctx)) {
                rc = tls_push_partial_record(sk, tls_ctx, flags);
@@@ -576,7 -579,9 +579,9 @@@ void tls_device_write_space(struct soc
                gfp_t sk_allocation = sk->sk_allocation;
  
                sk->sk_allocation = GFP_ATOMIC;
-               tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL);
+               tls_push_partial_record(sk, ctx,
+                                       MSG_DONTWAIT | MSG_NOSIGNAL |
+                                       MSG_SENDPAGE_DECRYPTED);
                sk->sk_allocation = sk_allocation;
        }
  }
index 4393bd4b241973c1ad82edb70a40b63517a7f9ce,a5aa7d3ac6a116a7a8d3c08d840c37a70e42e7a4..0e66371bea13fdb93411c87aeb08a88615b5f8bb
@@@ -134,7 -134,6 +134,7 @@@ enum bpf_map_type 
        BPF_MAP_TYPE_QUEUE,
        BPF_MAP_TYPE_STACK,
        BPF_MAP_TYPE_SK_STORAGE,
 +      BPF_MAP_TYPE_DEVMAP_HASH,
  };
  
  /* Note that tracing related programs such as
@@@ -1467,8 -1466,8 +1467,8 @@@ union bpf_attr 
   *            If no cookie has been set yet, generate a new cookie. Once
   *            generated, the socket cookie remains stable for the life of the
   *            socket. This helper can be useful for monitoring per socket
-  *            networking traffic statistics as it provides a unique socket
-  *            identifier per namespace.
+  *            networking traffic statistics as it provides a global socket
+  *            identifier that can be assumed unique.
   *    Return
   *            A 8-byte long non-decreasing number on success, or 0 if the
   *            socket field is missing inside *skb*.
   *            **-EPERM** if no permission to send the *sig*.
   *
   *            **-EAGAIN** if bpf program can try again.
 + *
 + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
 + *    Description
 + *            Try to issue a SYN cookie for the packet with corresponding
 + *            IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
 + *
 + *            *iph* points to the start of the IPv4 or IPv6 header, while
 + *            *iph_len* contains **sizeof**\ (**struct iphdr**) or
 + *            **sizeof**\ (**struct ip6hdr**).
 + *
 + *            *th* points to the start of the TCP header, while *th_len*
 + *            contains the length of the TCP header.
 + *
 + *    Return
 + *            On success, lower 32 bits hold the generated SYN cookie in
 + *            followed by 16 bits which hold the MSS value for that cookie,
 + *            and the top 16 bits are unused.
 + *
 + *            On failure, the returned value is one of the following:
 + *
 + *            **-EINVAL** SYN cookie cannot be issued due to error
 + *
 + *            **-ENOENT** SYN cookie should not be issued (no SYN flood)
 + *
 + *            **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
 + *
 + *            **-EPROTONOSUPPORT** IP packet version is not 4 or 6
   */
  #define __BPF_FUNC_MAPPER(FN)         \
        FN(unspec),                     \
        FN(strtoul),                    \
        FN(sk_storage_get),             \
        FN(sk_storage_delete),          \
 -      FN(send_signal),
 +      FN(send_signal),                \
 +      FN(tcp_gen_syncookie),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@@ -3536,10 -3507,6 +3536,10 @@@ enum bpf_task_fd_type 
        BPF_FD_TYPE_URETPROBE,          /* filename + offset */
  };
  
 +#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG           (1U << 0)
 +#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL               (1U << 1)
 +#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP            (1U << 2)
 +
  struct bpf_flow_keys {
        __u16   nhoff;
        __u16   thoff;
                        __u32   ipv6_dst[4];    /* in6_addr; network order */
                };
        };
 +      __u32   flags;
 +      __be32  flow_label;
  };
  
  struct bpf_func_info {
diff --combined tools/lib/bpf/libbpf.c
index 2233f919dd88fe4d18302594ae2605b749e91eaa,2b57d7ea78363a7c36da5540d67ff985a5e40963..e0276520171b952888c3b2c41c23e0d1a7804edf
@@@ -39,7 -39,6 +39,7 @@@
  #include <sys/stat.h>
  #include <sys/types.h>
  #include <sys/vfs.h>
 +#include <sys/utsname.h>
  #include <tools/libc_compat.h>
  #include <libelf.h>
  #include <gelf.h>
@@@ -49,7 -48,6 +49,7 @@@
  #include "btf.h"
  #include "str_error.h"
  #include "libbpf_internal.h"
 +#include "hashmap.h"
  
  #ifndef EM_BPF
  #define EM_BPF 247
@@@ -77,12 -75,9 +77,12 @@@ static int __base_pr(enum libbpf_print_
  
  static libbpf_print_fn_t __libbpf_pr = __base_pr;
  
 -void libbpf_set_print(libbpf_print_fn_t fn)
 +libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
  {
 +      libbpf_print_fn_t old_print_fn = __libbpf_pr;
 +
        __libbpf_pr = fn;
 +      return old_print_fn;
  }
  
  __printf(2, 3)
@@@ -187,7 -182,6 +187,6 @@@ struct bpf_program 
        bpf_program_clear_priv_t clear_priv;
  
        enum bpf_attach_type expected_attach_type;
-       int btf_fd;
        void *func_info;
        __u32 func_info_rec_size;
        __u32 func_info_cnt;
@@@ -318,7 -312,6 +317,6 @@@ void bpf_program__unload(struct bpf_pro
        prog->instances.nr = -1;
        zfree(&prog->instances.fds);
  
-       zclose(prog->btf_fd);
        zfree(&prog->func_info);
        zfree(&prog->line_info);
  }
@@@ -397,7 -390,6 +395,6 @@@ bpf_program__init(void *data, size_t si
        prog->instances.fds = NULL;
        prog->instances.nr = -1;
        prog->type = BPF_PROG_TYPE_UNSPEC;
-       prog->btf_fd = -1;
  
        return 0;
  errout:
@@@ -1018,21 -1010,23 +1015,21 @@@ static int bpf_object__init_user_maps(s
        return 0;
  }
  
 -static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
 -                                                   __u32 id)
 +static const struct btf_type *
 +skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
  {
        const struct btf_type *t = btf__type_by_id(btf, id);
  
 -      while (true) {
 -              switch (BTF_INFO_KIND(t->info)) {
 -              case BTF_KIND_VOLATILE:
 -              case BTF_KIND_CONST:
 -              case BTF_KIND_RESTRICT:
 -              case BTF_KIND_TYPEDEF:
 -                      t = btf__type_by_id(btf, t->type);
 -                      break;
 -              default:
 -                      return t;
 -              }
 +      if (res_id)
 +              *res_id = id;
 +
 +      while (btf_is_mod(t) || btf_is_typedef(t)) {
 +              if (res_id)
 +                      *res_id = t->type;
 +              t = btf__type_by_id(btf, t->type);
        }
 +
 +      return t;
  }
  
  /*
  static bool get_map_field_int(const char *map_name, const struct btf *btf,
                              const struct btf_type *def,
                              const struct btf_member *m, __u32 *res) {
 -      const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
 +      const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
        const char *name = btf__name_by_offset(btf, m->name_off);
        const struct btf_array *arr_info;
        const struct btf_type *arr_t;
  
 -      if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
 +      if (!btf_is_ptr(t)) {
                pr_warning("map '%s': attr '%s': expected PTR, got %u.\n",
 -                         map_name, name, BTF_INFO_KIND(t->info));
 +                         map_name, name, btf_kind(t));
                return false;
        }
  
                           map_name, name, t->type);
                return false;
        }
 -      if (BTF_INFO_KIND(arr_t->info) != BTF_KIND_ARRAY) {
 +      if (!btf_is_array(arr_t)) {
                pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n",
 -                         map_name, name, BTF_INFO_KIND(arr_t->info));
 +                         map_name, name, btf_kind(arr_t));
                return false;
        }
 -      arr_info = (const void *)(arr_t + 1);
 +      arr_info = btf_array(arr_t);
        *res = arr_info->nelems;
        return true;
  }
@@@ -1085,11 -1079,11 +1082,11 @@@ static int bpf_object__init_user_btf_ma
        struct bpf_map *map;
        int vlen, i;
  
 -      vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx;
 +      vi = btf_var_secinfos(sec) + var_idx;
        var = btf__type_by_id(obj->btf, vi->type);
 -      var_extra = (const void *)(var + 1);
 +      var_extra = btf_var(var);
        map_name = btf__name_by_offset(obj->btf, var->name_off);
 -      vlen = BTF_INFO_VLEN(var->info);
 +      vlen = btf_vlen(var);
  
        if (map_name == NULL || map_name[0] == '\0') {
                pr_warning("map #%d: empty name.\n", var_idx);
                pr_warning("map '%s' BTF data is corrupted.\n", map_name);
                return -EINVAL;
        }
 -      if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) {
 +      if (!btf_is_var(var)) {
                pr_warning("map '%s': unexpected var kind %u.\n",
 -                         map_name, BTF_INFO_KIND(var->info));
 +                         map_name, btf_kind(var));
                return -EINVAL;
        }
        if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
                return -EOPNOTSUPP;
        }
  
 -      def = skip_mods_and_typedefs(obj->btf, var->type);
 -      if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) {
 +      def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
 +      if (!btf_is_struct(def)) {
                pr_warning("map '%s': unexpected def kind %u.\n",
 -                         map_name, BTF_INFO_KIND(var->info));
 +                         map_name, btf_kind(var));
                return -EINVAL;
        }
        if (def->size > vi->size) {
        pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
                 map_name, map->sec_idx, map->sec_offset);
  
 -      vlen = BTF_INFO_VLEN(def->info);
 -      m = (const void *)(def + 1);
 +      vlen = btf_vlen(def);
 +      m = btf_members(def);
        for (i = 0; i < vlen; i++, m++) {
                const char *name = btf__name_by_offset(obj->btf, m->name_off);
  
                                           map_name, m->type);
                                return -EINVAL;
                        }
 -                      if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
 +                      if (!btf_is_ptr(t)) {
                                pr_warning("map '%s': key spec is not PTR: %u.\n",
 -                                         map_name, BTF_INFO_KIND(t->info));
 +                                         map_name, btf_kind(t));
                                return -EINVAL;
                        }
                        sz = btf__resolve_size(obj->btf, t->type);
                                           map_name, m->type);
                                return -EINVAL;
                        }
 -                      if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
 +                      if (!btf_is_ptr(t)) {
                                pr_warning("map '%s': value spec is not PTR: %u.\n",
 -                                         map_name, BTF_INFO_KIND(t->info));
 +                                         map_name, btf_kind(t));
                                return -EINVAL;
                        }
                        sz = btf__resolve_size(obj->btf, t->type);
@@@ -1294,7 -1288,7 +1291,7 @@@ static int bpf_object__init_user_btf_ma
        nr_types = btf__get_nr_types(obj->btf);
        for (i = 1; i <= nr_types; i++) {
                t = btf__type_by_id(obj->btf, i);
 -              if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
 +              if (!btf_is_datasec(t))
                        continue;
                name = btf__name_by_offset(obj->btf, t->name_off);
                if (strcmp(name, MAPS_ELF_SEC) == 0) {
                return -ENOENT;
        }
  
 -      vlen = BTF_INFO_VLEN(sec->info);
 +      vlen = btf_vlen(sec);
        for (i = 0; i < vlen; i++) {
                err = bpf_object__init_user_btf_map(obj, sec, i,
                                                    obj->efile.btf_maps_shndx,
@@@ -1369,14 -1363,16 +1366,14 @@@ static void bpf_object__sanitize_btf(st
        struct btf *btf = obj->btf;
        struct btf_type *t;
        int i, j, vlen;
 -      __u16 kind;
  
        if (!obj->btf || (has_func && has_datasec))
                return;
  
        for (i = 1; i <= btf__get_nr_types(btf); i++) {
                t = (struct btf_type *)btf__type_by_id(btf, i);
 -              kind = BTF_INFO_KIND(t->info);
  
 -              if (!has_datasec && kind == BTF_KIND_VAR) {
 +              if (!has_datasec && btf_is_var(t)) {
                        /* replace VAR with INT */
                        t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
                        /*
                         * original variable took less than 4 bytes
                         */
                        t->size = 1;
 -                      *(int *)(t+1) = BTF_INT_ENC(0, 0, 8);
 -              } else if (!has_datasec && kind == BTF_KIND_DATASEC) {
 +                      *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
 +              } else if (!has_datasec && btf_is_datasec(t)) {
                        /* replace DATASEC with STRUCT */
 -                      struct btf_var_secinfo *v = (void *)(t + 1);
 -                      struct btf_member *m = (void *)(t + 1);
 +                      const struct btf_var_secinfo *v = btf_var_secinfos(t);
 +                      struct btf_member *m = btf_members(t);
                        struct btf_type *vt;
                        char *name;
  
                                name++;
                        }
  
 -                      vlen = BTF_INFO_VLEN(t->info);
 +                      vlen = btf_vlen(t);
                        t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
                        for (j = 0; j < vlen; j++, v++, m++) {
                                /* order of field assignments is important */
                                vt = (void *)btf__type_by_id(btf, v->type);
                                m->name_off = vt->name_off;
                        }
 -              } else if (!has_func && kind == BTF_KIND_FUNC_PROTO) {
 +              } else if (!has_func && btf_is_func_proto(t)) {
                        /* replace FUNC_PROTO with ENUM */
 -                      vlen = BTF_INFO_VLEN(t->info);
 +                      vlen = btf_vlen(t);
                        t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
                        t->size = sizeof(__u32); /* kernel enforced */
 -              } else if (!has_func && kind == BTF_KIND_FUNC) {
 +              } else if (!has_func && btf_is_func(t)) {
                        /* replace FUNC with TYPEDEF */
                        t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
                }
@@@ -1773,22 -1769,15 +1770,22 @@@ bpf_program__collect_reloc(struct bpf_p
                         (long long) sym.st_value, sym.st_name, name);
  
                shdr_idx = sym.st_shndx;
 +              insn_idx = rel.r_offset / sizeof(struct bpf_insn);
 +              pr_debug("relocation: insn_idx=%u, shdr_idx=%u\n",
 +                       insn_idx, shdr_idx);
 +
 +              if (shdr_idx >= SHN_LORESERVE) {
 +                      pr_warning("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n",
 +                                 name, shdr_idx, insn_idx,
 +                                 insns[insn_idx].code);
 +                      return -LIBBPF_ERRNO__RELOC;
 +              }
                if (!bpf_object__relo_in_known_section(obj, shdr_idx)) {
                        pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n",
                                   prog->section_name, shdr_idx);
                        return -LIBBPF_ERRNO__RELOC;
                }
  
 -              insn_idx = rel.r_offset / sizeof(struct bpf_insn);
 -              pr_debug("relocation: insn_idx=%u\n", insn_idx);
 -
                if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
                        if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
                                pr_warning("incorrect bpf_call opcode\n");
@@@ -2296,900 -2285,9 +2293,897 @@@ bpf_program_reloc_btf_ext(struct bpf_pr
                prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
        }
  
-       if (!insn_offset)
-               prog->btf_fd = btf__fd(obj->btf);
        return 0;
  }
  
 +#define BPF_CORE_SPEC_MAX_LEN 64
 +
 +/* represents BPF CO-RE field or array element accessor */
 +struct bpf_core_accessor {
 +      __u32 type_id;          /* struct/union type or array element type */
 +      __u32 idx;              /* field index or array index */
 +      const char *name;       /* field name or NULL for array accessor */
 +};
 +
 +struct bpf_core_spec {
 +      const struct btf *btf;
 +      /* high-level spec: named fields and array indices only */
 +      struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
 +      /* high-level spec length */
 +      int len;
 +      /* raw, low-level spec: 1-to-1 with accessor spec string */
 +      int raw_spec[BPF_CORE_SPEC_MAX_LEN];
 +      /* raw spec length */
 +      int raw_len;
 +      /* field byte offset represented by spec */
 +      __u32 offset;
 +};
 +
 +static bool str_is_empty(const char *s)
 +{
 +      return !s || !s[0];
 +}
 +
 +/*
 + * Turn bpf_offset_reloc into a low- and high-level spec representation,
 + * validating correctness along the way, as well as calculating resulting
 + * field offset (in bytes), specified by accessor string. Low-level spec
 + * captures every single level of nestedness, including traversing anonymous
 + * struct/union members. High-level one only captures semantically meaningful
 + * "turning points": named fields and array indicies.
 + * E.g., for this case:
 + *
 + *   struct sample {
 + *       int __unimportant;
 + *       struct {
 + *           int __1;
 + *           int __2;
 + *           int a[7];
 + *       };
 + *   };
 + *
 + *   struct sample *s = ...;
 + *
 + *   int x = &s->a[3]; // access string = '0:1:2:3'
 + *
 + * Low-level spec has 1:1 mapping with each element of access string (it's
 + * just a parsed access string representation): [0, 1, 2, 3].
 + *
 + * High-level spec will capture only 3 points:
 + *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
 + *   - field 'a' access (corresponds to '2' in low-level spec);
 + *   - array element #3 access (corresponds to '3' in low-level spec).
 + *
 + */
 +static int bpf_core_spec_parse(const struct btf *btf,
 +                             __u32 type_id,
 +                             const char *spec_str,
 +                             struct bpf_core_spec *spec)
 +{
 +      int access_idx, parsed_len, i;
 +      const struct btf_type *t;
 +      const char *name;
 +      __u32 id;
 +      __s64 sz;
 +
 +      if (str_is_empty(spec_str) || *spec_str == ':')
 +              return -EINVAL;
 +
 +      memset(spec, 0, sizeof(*spec));
 +      spec->btf = btf;
 +
 +      /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
 +      while (*spec_str) {
 +              if (*spec_str == ':')
 +                      ++spec_str;
 +              if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
 +                      return -EINVAL;
 +              if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
 +                      return -E2BIG;
 +              spec_str += parsed_len;
 +              spec->raw_spec[spec->raw_len++] = access_idx;
 +      }
 +
 +      if (spec->raw_len == 0)
 +              return -EINVAL;
 +
 +      /* first spec value is always reloc type array index */
 +      t = skip_mods_and_typedefs(btf, type_id, &id);
 +      if (!t)
 +              return -EINVAL;
 +
 +      access_idx = spec->raw_spec[0];
 +      spec->spec[0].type_id = id;
 +      spec->spec[0].idx = access_idx;
 +      spec->len++;
 +
 +      sz = btf__resolve_size(btf, id);
 +      if (sz < 0)
 +              return sz;
 +      spec->offset = access_idx * sz;
 +
 +      for (i = 1; i < spec->raw_len; i++) {
 +              t = skip_mods_and_typedefs(btf, id, &id);
 +              if (!t)
 +                      return -EINVAL;
 +
 +              access_idx = spec->raw_spec[i];
 +
 +              if (btf_is_composite(t)) {
 +                      const struct btf_member *m;
 +                      __u32 offset;
 +
 +                      if (access_idx >= btf_vlen(t))
 +                              return -EINVAL;
 +                      if (btf_member_bitfield_size(t, access_idx))
 +                              return -EINVAL;
 +
 +                      offset = btf_member_bit_offset(t, access_idx);
 +                      if (offset % 8)
 +                              return -EINVAL;
 +                      spec->offset += offset / 8;
 +
 +                      m = btf_members(t) + access_idx;
 +                      if (m->name_off) {
 +                              name = btf__name_by_offset(btf, m->name_off);
 +                              if (str_is_empty(name))
 +                                      return -EINVAL;
 +
 +                              spec->spec[spec->len].type_id = id;
 +                              spec->spec[spec->len].idx = access_idx;
 +                              spec->spec[spec->len].name = name;
 +                              spec->len++;
 +                      }
 +
 +                      id = m->type;
 +              } else if (btf_is_array(t)) {
 +                      const struct btf_array *a = btf_array(t);
 +
 +                      t = skip_mods_and_typedefs(btf, a->type, &id);
 +                      if (!t || access_idx >= a->nelems)
 +                              return -EINVAL;
 +
 +                      spec->spec[spec->len].type_id = id;
 +                      spec->spec[spec->len].idx = access_idx;
 +                      spec->len++;
 +
 +                      sz = btf__resolve_size(btf, id);
 +                      if (sz < 0)
 +                              return sz;
 +                      spec->offset += access_idx * sz;
 +              } else {
 +                      pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
 +                                 type_id, spec_str, i, id, btf_kind(t));
 +                      return -EINVAL;
 +              }
 +      }
 +
 +      return 0;
 +}
 +
 +static bool bpf_core_is_flavor_sep(const char *s)
 +{
 +      /* check X___Y name pattern, where X and Y are not underscores */
 +      return s[0] != '_' &&                                 /* X */
 +             s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
 +             s[4] != '_';                                   /* Y */
 +}
 +
 +/* Given 'some_struct_name___with_flavor' return the length of a name prefix
 + * before last triple underscore. Struct name part after last triple
 + * underscore is ignored by BPF CO-RE relocation during relocation matching.
 + */
 +static size_t bpf_core_essential_name_len(const char *name)
 +{
 +      size_t n = strlen(name);
 +      int i;
 +
 +      for (i = n - 5; i >= 0; i--) {
 +              if (bpf_core_is_flavor_sep(name + i))
 +                      return i + 1;
 +      }
 +      return n;
 +}
 +
 +/* dynamically sized list of type IDs */
 +struct ids_vec {
 +      __u32 *data;
 +      int len;
 +};
 +
 +static void bpf_core_free_cands(struct ids_vec *cand_ids)
 +{
 +      free(cand_ids->data);
 +      free(cand_ids);
 +}
 +
 +static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 +                                         __u32 local_type_id,
 +                                         const struct btf *targ_btf)
 +{
 +      size_t local_essent_len, targ_essent_len;
 +      const char *local_name, *targ_name;
 +      const struct btf_type *t;
 +      struct ids_vec *cand_ids;
 +      __u32 *new_ids;
 +      int i, err, n;
 +
 +      t = btf__type_by_id(local_btf, local_type_id);
 +      if (!t)
 +              return ERR_PTR(-EINVAL);
 +
 +      local_name = btf__name_by_offset(local_btf, t->name_off);
 +      if (str_is_empty(local_name))
 +              return ERR_PTR(-EINVAL);
 +      local_essent_len = bpf_core_essential_name_len(local_name);
 +
 +      cand_ids = calloc(1, sizeof(*cand_ids));
 +      if (!cand_ids)
 +              return ERR_PTR(-ENOMEM);
 +
 +      n = btf__get_nr_types(targ_btf);
 +      for (i = 1; i <= n; i++) {
 +              t = btf__type_by_id(targ_btf, i);
 +              targ_name = btf__name_by_offset(targ_btf, t->name_off);
 +              if (str_is_empty(targ_name))
 +                      continue;
 +
 +              targ_essent_len = bpf_core_essential_name_len(targ_name);
 +              if (targ_essent_len != local_essent_len)
 +                      continue;
 +
 +              if (strncmp(local_name, targ_name, local_essent_len) == 0) {
 +                      pr_debug("[%d] %s: found candidate [%d] %s\n",
 +                               local_type_id, local_name, i, targ_name);
 +                      new_ids = realloc(cand_ids->data, cand_ids->len + 1);
 +                      if (!new_ids) {
 +                              err = -ENOMEM;
 +                              goto err_out;
 +                      }
 +                      cand_ids->data = new_ids;
 +                      cand_ids->data[cand_ids->len++] = i;
 +              }
 +      }
 +      return cand_ids;
 +err_out:
 +      bpf_core_free_cands(cand_ids);
 +      return ERR_PTR(err);
 +}
 +
 +/* Check two types for compatibility, skipping const/volatile/restrict and
 + * typedefs, to ensure we are relocating offset to the compatible entities:
 + *   - any two STRUCTs/UNIONs are compatible and can be mixed;
 + *   - any two FWDs are compatible;
 + *   - any two PTRs are always compatible;
 + *   - for ENUMs, check sizes, names are ignored;
 + *   - for INT, size and bitness should match, signedness is ignored;
 + *   - for ARRAY, dimensionality is ignored, element types are checked for
 + *     compatibility recursively;
 + *   - everything else shouldn't be ever a target of relocation.
 + * These rules are not set in stone and probably will be adjusted as we get
 + * more experience with using BPF CO-RE relocations.
 + */
 +static int bpf_core_fields_are_compat(const struct btf *local_btf,
 +                                    __u32 local_id,
 +                                    const struct btf *targ_btf,
 +                                    __u32 targ_id)
 +{
 +      const struct btf_type *local_type, *targ_type;
 +
 +recur:
 +      local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
 +      targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
 +      if (!local_type || !targ_type)
 +              return -EINVAL;
 +
 +      if (btf_is_composite(local_type) && btf_is_composite(targ_type))
 +              return 1;
 +      if (btf_kind(local_type) != btf_kind(targ_type))
 +              return 0;
 +
 +      switch (btf_kind(local_type)) {
 +      case BTF_KIND_FWD:
 +      case BTF_KIND_PTR:
 +              return 1;
 +      case BTF_KIND_ENUM:
 +              return local_type->size == targ_type->size;
 +      case BTF_KIND_INT:
 +              return btf_int_offset(local_type) == 0 &&
 +                     btf_int_offset(targ_type) == 0 &&
 +                     local_type->size == targ_type->size &&
 +                     btf_int_bits(local_type) == btf_int_bits(targ_type);
 +      case BTF_KIND_ARRAY:
 +              local_id = btf_array(local_type)->type;
 +              targ_id = btf_array(targ_type)->type;
 +              goto recur;
 +      default:
 +              pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n",
 +                         btf_kind(local_type), local_id, targ_id);
 +              return 0;
 +      }
 +}
 +
 +/*
 + * Given single high-level named field accessor in local type, find
 + * corresponding high-level accessor for a target type. Along the way,
 + * maintain low-level spec for target as well. Also keep updating target
 + * offset.
 + *
 + * Searching is performed through recursive exhaustive enumeration of all
 + * fields of a struct/union. If there are any anonymous (embedded)
 + * structs/unions, they are recursively searched as well. If field with
 + * desired name is found, check compatibility between local and target types,
 + * before returning result.
 + *
 + * 1 is returned, if field is found.
 + * 0 is returned if no compatible field is found.
 + * <0 is returned on error.
 + */
 +static int bpf_core_match_member(const struct btf *local_btf,
 +                               const struct bpf_core_accessor *local_acc,
 +                               const struct btf *targ_btf,
 +                               __u32 targ_id,
 +                               struct bpf_core_spec *spec,
 +                               __u32 *next_targ_id)
 +{
 +      const struct btf_type *local_type, *targ_type;
 +      const struct btf_member *local_member, *m;
 +      const char *local_name, *targ_name;
 +      __u32 local_id;
 +      int i, n, found;
 +
 +      targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
 +      if (!targ_type)
 +              return -EINVAL;
 +      if (!btf_is_composite(targ_type))
 +              return 0;
 +
 +      local_id = local_acc->type_id;
 +      local_type = btf__type_by_id(local_btf, local_id);
 +      local_member = btf_members(local_type) + local_acc->idx;
 +      local_name = btf__name_by_offset(local_btf, local_member->name_off);
 +
 +      n = btf_vlen(targ_type);
 +      m = btf_members(targ_type);
 +      for (i = 0; i < n; i++, m++) {
 +              __u32 offset;
 +
 +              /* bitfield relocations not supported */
 +              if (btf_member_bitfield_size(targ_type, i))
 +                      continue;
 +              offset = btf_member_bit_offset(targ_type, i);
 +              if (offset % 8)
 +                      continue;
 +
 +              /* too deep struct/union/array nesting */
 +              if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
 +                      return -E2BIG;
 +
 +              /* speculate this member will be the good one */
 +              spec->offset += offset / 8;
 +              spec->raw_spec[spec->raw_len++] = i;
 +
 +              targ_name = btf__name_by_offset(targ_btf, m->name_off);
 +              if (str_is_empty(targ_name)) {
 +                      /* embedded struct/union, we need to go deeper */
 +                      found = bpf_core_match_member(local_btf, local_acc,
 +                                                    targ_btf, m->type,
 +                                                    spec, next_targ_id);
 +                      if (found) /* either found or error */
 +                              return found;
 +              } else if (strcmp(local_name, targ_name) == 0) {
 +                      /* matching named field */
 +                      struct bpf_core_accessor *targ_acc;
 +
 +                      targ_acc = &spec->spec[spec->len++];
 +                      targ_acc->type_id = targ_id;
 +                      targ_acc->idx = i;
 +                      targ_acc->name = targ_name;
 +
 +                      *next_targ_id = m->type;
 +                      found = bpf_core_fields_are_compat(local_btf,
 +                                                         local_member->type,
 +                                                         targ_btf, m->type);
 +                      if (!found)
 +                              spec->len--; /* pop accessor */
 +                      return found;
 +              }
 +              /* member turned out not to be what we looked for */
 +              spec->offset -= offset / 8;
 +              spec->raw_len--;
 +      }
 +
 +      return 0;
 +}
 +
 +/*
 + * Try to match local spec to a target type and, if successful, produce full
 + * target spec (high-level, low-level + offset).
 + */
 +static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 +                             const struct btf *targ_btf, __u32 targ_id,
 +                             struct bpf_core_spec *targ_spec)
 +{
 +      const struct btf_type *targ_type;
 +      const struct bpf_core_accessor *local_acc;
 +      struct bpf_core_accessor *targ_acc;
 +      int i, sz, matched;
 +
 +      memset(targ_spec, 0, sizeof(*targ_spec));
 +      targ_spec->btf = targ_btf;
 +
 +      local_acc = &local_spec->spec[0];
 +      targ_acc = &targ_spec->spec[0];
 +
 +      for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
 +              targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
 +                                                 &targ_id);
 +              if (!targ_type)
 +                      return -EINVAL;
 +
 +              if (local_acc->name) {
 +                      matched = bpf_core_match_member(local_spec->btf,
 +                                                      local_acc,
 +                                                      targ_btf, targ_id,
 +                                                      targ_spec, &targ_id);
 +                      if (matched <= 0)
 +                              return matched;
 +              } else {
 +                      /* for i=0, targ_id is already treated as array element
 +                       * type (because it's the original struct), for others
 +                       * we should find array element type first
 +                       */
 +                      if (i > 0) {
 +                              const struct btf_array *a;
 +
 +                              if (!btf_is_array(targ_type))
 +                                      return 0;
 +
 +                              a = btf_array(targ_type);
 +                              if (local_acc->idx >= a->nelems)
 +                                      return 0;
 +                              if (!skip_mods_and_typedefs(targ_btf, a->type,
 +                                                          &targ_id))
 +                                      return -EINVAL;
 +                      }
 +
 +                      /* too deep struct/union/array nesting */
 +                      if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
 +                              return -E2BIG;
 +
 +                      targ_acc->type_id = targ_id;
 +                      targ_acc->idx = local_acc->idx;
 +                      targ_acc->name = NULL;
 +                      targ_spec->len++;
 +                      targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
 +                      targ_spec->raw_len++;
 +
 +                      sz = btf__resolve_size(targ_btf, targ_id);
 +                      if (sz < 0)
 +                              return sz;
 +                      targ_spec->offset += local_acc->idx * sz;
 +              }
 +      }
 +
 +      return 1;
 +}
 +
 +/*
 + * Patch relocatable BPF instruction.
 + * Expected insn->imm value is provided for validation, as well as the new
 + * relocated value.
 + *
 + * Currently three kinds of BPF instructions are supported:
 + * 1. rX = <imm> (assignment with immediate operand);
 + * 2. rX += <imm> (arithmetic operations with immediate operand);
 + * 3. *(rX) = <imm> (indirect memory assignment with immediate operand).
 + *
 + * If actual insn->imm value is wrong, bail out.
 + */
 +static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off,
 +                             __u32 orig_off, __u32 new_off)
 +{
 +      struct bpf_insn *insn;
 +      int insn_idx;
 +      __u8 class;
 +
 +      if (insn_off % sizeof(struct bpf_insn))
 +              return -EINVAL;
 +      insn_idx = insn_off / sizeof(struct bpf_insn);
 +
 +      insn = &prog->insns[insn_idx];
 +      class = BPF_CLASS(insn->code);
 +
 +      if (class == BPF_ALU || class == BPF_ALU64) {
 +              if (BPF_SRC(insn->code) != BPF_K)
 +                      return -EINVAL;
 +              if (insn->imm != orig_off)
 +                      return -EINVAL;
 +              insn->imm = new_off;
 +              pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n",
 +                       bpf_program__title(prog, false),
 +                       insn_idx, orig_off, new_off);
 +      } else {
 +              pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
 +                         bpf_program__title(prog, false),
 +                         insn_idx, insn->code, insn->src_reg, insn->dst_reg,
 +                         insn->off, insn->imm);
 +              return -EINVAL;
 +      }
 +      return 0;
 +}
 +
 +static struct btf *btf_load_raw(const char *path)
 +{
 +      struct btf *btf;
 +      size_t read_cnt;
 +      struct stat st;
 +      void *data;
 +      FILE *f;
 +
 +      if (stat(path, &st))
 +              return ERR_PTR(-errno);
 +
 +      data = malloc(st.st_size);
 +      if (!data)
 +              return ERR_PTR(-ENOMEM);
 +
 +      f = fopen(path, "rb");
 +      if (!f) {
 +              btf = ERR_PTR(-errno);
 +              goto cleanup;
 +      }
 +
 +      read_cnt = fread(data, 1, st.st_size, f);
 +      fclose(f);
 +      if (read_cnt < st.st_size) {
 +              btf = ERR_PTR(-EBADF);
 +              goto cleanup;
 +      }
 +
 +      btf = btf__new(data, read_cnt);
 +
 +cleanup:
 +      free(data);
 +      return btf;
 +}
 +
 +/*
 + * Probe few well-known locations for vmlinux kernel image and try to load BTF
 + * data out of it to use for target BTF.
 + */
 +static struct btf *bpf_core_find_kernel_btf(void)
 +{
 +      struct {
 +              const char *path_fmt;
 +              bool raw_btf;
 +      } locations[] = {
 +              /* try canonical vmlinux BTF through sysfs first */
 +              { "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
 +              /* fall back to trying to find vmlinux ELF on disk otherwise */
 +              { "/boot/vmlinux-%1$s" },
 +              { "/lib/modules/%1$s/vmlinux-%1$s" },
 +              { "/lib/modules/%1$s/build/vmlinux" },
 +              { "/usr/lib/modules/%1$s/kernel/vmlinux" },
 +              { "/usr/lib/debug/boot/vmlinux-%1$s" },
 +              { "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
 +              { "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
 +      };
 +      char path[PATH_MAX + 1];
 +      struct utsname buf;
 +      struct btf *btf;
 +      int i;
 +
 +      uname(&buf);
 +
 +      for (i = 0; i < ARRAY_SIZE(locations); i++) {
 +              snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
 +
 +              if (access(path, R_OK))
 +                      continue;
 +
 +              if (locations[i].raw_btf)
 +                      btf = btf_load_raw(path);
 +              else
 +                      btf = btf__parse_elf(path, NULL);
 +
 +              pr_debug("loading kernel BTF '%s': %ld\n",
 +                       path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
 +              if (IS_ERR(btf))
 +                      continue;
 +
 +              return btf;
 +      }
 +
 +      pr_warning("failed to find valid kernel BTF\n");
 +      return ERR_PTR(-ESRCH);
 +}
 +
 +/* Output spec definition in the format:
 + * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
 + * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
 + */
 +static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
 +{
 +      const struct btf_type *t;
 +      const char *s;
 +      __u32 type_id;
 +      int i;
 +
 +      type_id = spec->spec[0].type_id;
 +      t = btf__type_by_id(spec->btf, type_id);
 +      s = btf__name_by_offset(spec->btf, t->name_off);
 +      libbpf_print(level, "[%u] %s + ", type_id, s);
 +
 +      for (i = 0; i < spec->raw_len; i++)
 +              libbpf_print(level, "%d%s", spec->raw_spec[i],
 +                           i == spec->raw_len - 1 ? " => " : ":");
 +
 +      libbpf_print(level, "%u @ &x", spec->offset);
 +
 +      for (i = 0; i < spec->len; i++) {
 +              if (spec->spec[i].name)
 +                      libbpf_print(level, ".%s", spec->spec[i].name);
 +              else
 +                      libbpf_print(level, "[%u]", spec->spec[i].idx);
 +      }
 +
 +}
 +
 +static size_t bpf_core_hash_fn(const void *key, void *ctx)
 +{
 +      return (size_t)key;
 +}
 +
 +static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
 +{
 +      return k1 == k2;
 +}
 +
 +static void *u32_as_hash_key(__u32 x)
 +{
 +      return (void *)(uintptr_t)x;
 +}
 +
 +/*
 + * CO-RE relocate single instruction.
 + *
 + * The outline and important points of the algorithm:
 + * 1. For given local type, find corresponding candidate target types.
 + *    Candidate type is a type with the same "essential" name, ignoring
 + *    everything after last triple underscore (___). E.g., `sample`,
 + *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
 + *    for each other. Names with triple underscore are referred to as
 + *    "flavors" and are useful, among other things, to allow to
 + *    specify/support incompatible variations of the same kernel struct, which
 + *    might differ between different kernel versions and/or build
 + *    configurations.
 + *
 + *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
 + *    converter, when deduplicated BTF of a kernel still contains more than
 + *    one different types with the same name. In that case, ___2, ___3, etc
 + *    are appended starting from second name conflict. But start flavors are
 + *    also useful to be defined "locally", in BPF program, to extract same
 + *    data from incompatible changes between different kernel
 + *    versions/configurations. For instance, to handle field renames between
 + *    kernel versions, one can use two flavors of the struct name with the
 + *    same common name and use conditional relocations to extract that field,
 + *    depending on target kernel version.
 + * 2. For each candidate type, try to match local specification to this
 + *    candidate target type. Matching involves finding corresponding
 + *    high-level spec accessors, meaning that all named fields should match,
 + *    as well as all array accesses should be within the actual bounds. Also,
 + *    types should be compatible (see bpf_core_fields_are_compat for details).
 + * 3. It is supported and expected that there might be multiple flavors
 + *    matching the spec. As long as all the specs resolve to the same set of
 + *    offsets across all candidates, there is not error. If there is any
 + *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
 + *    imprefection of BTF deduplication, which can cause slight duplication of
 + *    the same BTF type, if some directly or indirectly referenced (by
 + *    pointer) type gets resolved to different actual types in different
 + *    object files. If such situation occurs, deduplicated BTF will end up
 + *    with two (or more) structurally identical types, which differ only in
 + *    types they refer to through pointer. This should be OK in most cases and
 + *    is not an error.
 + * 4. Candidate types search is performed by linearly scanning through all
 + *    types in target BTF. It is anticipated that this is overall more
 + *    efficient memory-wise and not significantly worse (if not better)
 + *    CPU-wise compared to prebuilding a map from all local type names to
 + *    a list of candidate type names. It's also sped up by caching resolved
 + *    list of matching candidates per each local "root" type ID, that has at
 + *    least one bpf_offset_reloc associated with it. This list is shared
 + *    between multiple relocations for the same type ID and is updated as some
 + *    of the candidates are pruned due to structural incompatibility.
 + */
 +static int bpf_core_reloc_offset(struct bpf_program *prog,
 +                               const struct bpf_offset_reloc *relo,
 +                               int relo_idx,
 +                               const struct btf *local_btf,
 +                               const struct btf *targ_btf,
 +                               struct hashmap *cand_cache)
 +{
 +      const char *prog_name = bpf_program__title(prog, false);
 +      struct bpf_core_spec local_spec, cand_spec, targ_spec;
 +      const void *type_key = u32_as_hash_key(relo->type_id);
 +      const struct btf_type *local_type, *cand_type;
 +      const char *local_name, *cand_name;
 +      struct ids_vec *cand_ids;
 +      __u32 local_id, cand_id;
 +      const char *spec_str;
 +      int i, j, err;
 +
 +      local_id = relo->type_id;
 +      local_type = btf__type_by_id(local_btf, local_id);
 +      if (!local_type)
 +              return -EINVAL;
 +
 +      local_name = btf__name_by_offset(local_btf, local_type->name_off);
 +      if (str_is_empty(local_name))
 +              return -EINVAL;
 +
 +      spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
 +      if (str_is_empty(spec_str))
 +              return -EINVAL;
 +
 +      err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
 +      if (err) {
 +              pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
 +                         prog_name, relo_idx, local_id, local_name, spec_str,
 +                         err);
 +              return -EINVAL;
 +      }
 +
 +      pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx);
 +      bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
 +      libbpf_print(LIBBPF_DEBUG, "\n");
 +
 +      if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
 +              cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
 +              if (IS_ERR(cand_ids)) {
 +                      pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
 +                                 prog_name, relo_idx, local_id, local_name,
 +                                 PTR_ERR(cand_ids));
 +                      return PTR_ERR(cand_ids);
 +              }
 +              err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
 +              if (err) {
 +                      bpf_core_free_cands(cand_ids);
 +                      return err;
 +              }
 +      }
 +
 +      for (i = 0, j = 0; i < cand_ids->len; i++) {
 +              cand_id = cand_ids->data[i];
 +              cand_type = btf__type_by_id(targ_btf, cand_id);
 +              cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
 +
 +              err = bpf_core_spec_match(&local_spec, targ_btf,
 +                                        cand_id, &cand_spec);
 +              pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
 +                       prog_name, relo_idx, i, cand_name);
 +              bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
 +              libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
 +              if (err < 0) {
 +                      pr_warning("prog '%s': relo #%d: matching error: %d\n",
 +                                 prog_name, relo_idx, err);
 +                      return err;
 +              }
 +              if (err == 0)
 +                      continue;
 +
 +              if (j == 0) {
 +                      targ_spec = cand_spec;
 +              } else if (cand_spec.offset != targ_spec.offset) {
 +                      /* if there are many candidates, they should all
 +                       * resolve to the same offset
 +                       */
 +                      pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
 +                                 prog_name, relo_idx, cand_spec.offset,
 +                                 targ_spec.offset);
 +                      return -EINVAL;
 +              }
 +
 +              cand_ids->data[j++] = cand_spec.spec[0].type_id;
 +      }
 +
 +      cand_ids->len = j;
 +      if (cand_ids->len == 0) {
 +              pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
 +                         prog_name, relo_idx, local_id, local_name, spec_str);
 +              return -ESRCH;
 +      }
 +
 +      err = bpf_core_reloc_insn(prog, relo->insn_off,
 +                                local_spec.offset, targ_spec.offset);
 +      if (err) {
 +              pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
 +                         prog_name, relo_idx, relo->insn_off, err);
 +              return -EINVAL;
 +      }
 +
 +      return 0;
 +}
 +
 +static int
 +bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)
 +{
 +      const struct btf_ext_info_sec *sec;
 +      const struct bpf_offset_reloc *rec;
 +      const struct btf_ext_info *seg;
 +      struct hashmap_entry *entry;
 +      struct hashmap *cand_cache = NULL;
 +      struct bpf_program *prog;
 +      struct btf *targ_btf;
 +      const char *sec_name;
 +      int i, err = 0;
 +
 +      if (targ_btf_path)
 +              targ_btf = btf__parse_elf(targ_btf_path, NULL);
 +      else
 +              targ_btf = bpf_core_find_kernel_btf();
 +      if (IS_ERR(targ_btf)) {
 +              pr_warning("failed to get target BTF: %ld\n",
 +                         PTR_ERR(targ_btf));
 +              return PTR_ERR(targ_btf);
 +      }
 +
 +      cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
 +      if (IS_ERR(cand_cache)) {
 +              err = PTR_ERR(cand_cache);
 +              goto out;
 +      }
 +
 +      seg = &obj->btf_ext->offset_reloc_info;
 +      for_each_btf_ext_sec(seg, sec) {
 +              sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
 +              if (str_is_empty(sec_name)) {
 +                      err = -EINVAL;
 +                      goto out;
 +              }
 +              prog = bpf_object__find_program_by_title(obj, sec_name);
 +              if (!prog) {
 +                      pr_warning("failed to find program '%s' for CO-RE offset relocation\n",
 +                                 sec_name);
 +                      err = -EINVAL;
 +                      goto out;
 +              }
 +
 +              pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
 +                       sec_name, sec->num_info);
 +
 +              for_each_btf_ext_rec(seg, sec, i, rec) {
 +                      err = bpf_core_reloc_offset(prog, rec, i, obj->btf,
 +                                                  targ_btf, cand_cache);
 +                      if (err) {
 +                              pr_warning("prog '%s': relo #%d: failed to relocate: %d\n",
 +                                         sec_name, i, err);
 +                              goto out;
 +                      }
 +              }
 +      }
 +
 +out:
 +      btf__free(targ_btf);
 +      if (!IS_ERR_OR_NULL(cand_cache)) {
 +              hashmap__for_each_entry(cand_cache, entry, i) {
 +                      bpf_core_free_cands(entry->value);
 +              }
 +              hashmap__free(cand_cache);
 +      }
 +      return err;
 +}
 +
 +static int
 +bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 +{
 +      int err = 0;
 +
 +      if (obj->btf_ext->offset_reloc_info.len)
 +              err = bpf_core_reloc_offsets(obj, targ_btf_path);
 +
 +      return err;
 +}
 +
  static int
  bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
                        struct reloc_desc *relo)
@@@ -3297,21 -2395,14 +3291,21 @@@ bpf_program__relocate(struct bpf_progra
        return 0;
  }
  
 -
  static int
 -bpf_object__relocate(struct bpf_object *obj)
 +bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
  {
        struct bpf_program *prog;
        size_t i;
        int err;
  
 +      if (obj->btf_ext) {
 +              err = bpf_object__relocate_core(obj, targ_btf_path);
 +              if (err) {
 +                      pr_warning("failed to perform CO-RE relocations: %d\n",
 +                                 err);
 +                      return err;
 +              }
 +      }
        for (i = 0; i < obj->nr_programs; i++) {
                prog = &obj->programs[i];
  
@@@ -3366,7 -2457,7 +3360,7 @@@ load_program(struct bpf_program *prog, 
        char *cp, errmsg[STRERR_BUFSIZE];
        int log_buf_size = BPF_LOG_BUF_SIZE;
        char *log_buf;
-       int ret;
+       int btf_fd, ret;
  
        if (!insns || !insns_cnt)
                return -EINVAL;
        load_attr.license = license;
        load_attr.kern_version = kern_version;
        load_attr.prog_ifindex = prog->prog_ifindex;
-       load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0;
+       /* if .BTF.ext was loaded, kernel supports associated BTF for prog */
+       if (prog->obj->btf_ext)
+               btf_fd = bpf_object__btf_fd(prog->obj);
+       else
+               btf_fd = -1;
+       load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0;
        load_attr.func_info = prog->func_info;
        load_attr.func_info_rec_size = prog->func_info_rec_size;
        load_attr.func_info_cnt = prog->func_info_cnt;
@@@ -3712,7 -2808,7 +3711,7 @@@ int bpf_object__load_xattr(struct bpf_o
        obj->loaded = true;
  
        CHECK_ERR(bpf_object__create_maps(obj), err, out);
 -      CHECK_ERR(bpf_object__relocate(obj), err, out);
 +      CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out);
        CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
  
        return 0;
@@@ -5903,13 -4999,15 +5902,15 @@@ int libbpf_num_possible_cpus(void
        static const char *fcpu = "/sys/devices/system/cpu/possible";
        int len = 0, n = 0, il = 0, ir = 0;
        unsigned int start = 0, end = 0;
+       int tmp_cpus = 0;
        static int cpus;
        char buf[128];
        int error = 0;
        int fd = -1;
  
-       if (cpus > 0)
-               return cpus;
+       tmp_cpus = READ_ONCE(cpus);
+       if (tmp_cpus > 0)
+               return tmp_cpus;
  
        fd = open(fcpu, O_RDONLY);
        if (fd < 0) {
        }
        buf[len] = '\0';
  
-       for (ir = 0, cpus = 0; ir <= len; ir++) {
+       for (ir = 0, tmp_cpus = 0; ir <= len; ir++) {
                /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
                if (buf[ir] == ',' || buf[ir] == '\0') {
                        buf[ir] = '\0';
                        } else if (n == 1) {
                                end = start;
                        }
-                       cpus += end - start + 1;
+                       tmp_cpus += end - start + 1;
                        il = ir + 1;
                }
        }
-       if (cpus <= 0) {
-               pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu);
+       if (tmp_cpus <= 0) {
+               pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu);
                return -EINVAL;
        }
-       return cpus;
+       WRITE_ONCE(cpus, tmp_cpus);
+       return tmp_cpus;
  }