From: David S. Miller <davem@davemloft.net>
Date: Mon, 19 Aug 2019 18:54:03 +0000 (-0700)
Subject: Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
X-Git-Tag: v5.4-rc1~131^2~210
X-Git-Url: https://asedeno.scripts.mit.edu/gitweb/?a=commitdiff_plain;h=446bf64b613c4433dac4b15f4eaf326beaad3c8e;hp=-c;p=linux.git

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Merge conflict of mlx5 resolved using instructions in merge
commit 9566e650bf7fdf58384bb06df634f7531ca3a97e.

Signed-off-by: David S. Miller <davem@davemloft.net>
---

446bf64b613c4433dac4b15f4eaf326beaad3c8e
diff --combined MAINTAINERS
index 96d3e60697f5,08176d64eed5..a406947b369e
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -183,7 -183,7 +183,7 @@@ M:	Realtek linux nic maintainers <nic_s
  M:	Heiner Kallweit <hkallweit1@gmail.com>
  L:	netdev@vger.kernel.org
  S:	Maintained
- F:	drivers/net/ethernet/realtek/r8169.c
+ F:	drivers/net/ethernet/realtek/r8169*
  
  8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
  M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@@ -938,14 -938,6 +938,14 @@@ S:	Supporte
  F:	drivers/mux/adgs1408.c
  F:	Documentation/devicetree/bindings/mux/adi,adgs1408.txt
  
 +ANALOG DEVICES INC ADIN DRIVER
 +M:	Alexandru Ardelean <alexaundru.ardelean@analog.com>
 +L:	netdev@vger.kernel.org
 +W:	http://ez.analog.com/community/linux-device-drivers
 +S:	Supported
 +F:	drivers/net/phy/adin.c
 +F:	Documentation/devicetree/bindings/net/adi,adin.yaml
 +
  ANALOG DEVICES INC ADIS DRIVER LIBRARY
  M:	Alexandru Ardelean <alexandru.ardelean@analog.com>
  S:	Supported
@@@ -3643,12 -3635,9 +3643,12 @@@ S:	Maintaine
  F:	Documentation/devicetree/bindings/net/can/
  F:	drivers/net/can/
  F:	include/linux/can/dev.h
 +F:	include/linux/can/led.h
 +F:	include/linux/can/rx-offload.h
  F:	include/linux/can/platform/
  F:	include/uapi/linux/can/error.h
  F:	include/uapi/linux/can/netlink.h
 +F:	include/uapi/linux/can/vxcan.h
  
  CAN NETWORK LAYER
  M:	Oliver Hartkopp <socketcan@hartkopp.net>
@@@ -3661,8 -3650,6 +3661,8 @@@ S:	Maintaine
  F:	Documentation/networking/can.rst
  F:	net/can/
  F:	include/linux/can/core.h
 +F:	include/linux/can/skb.h
 +F:	include/net/netns/can.h
  F:	include/uapi/linux/can.h
  F:	include/uapi/linux/can/bcm.h
  F:	include/uapi/linux/can/raw.h
@@@ -6078,7 -6065,7 +6078,7 @@@ M:	Florian Fainelli <f.fainelli@gmail.c
  M:	Heiner Kallweit <hkallweit1@gmail.com>
  L:	netdev@vger.kernel.org
  S:	Maintained
- F:	Documentation/ABI/testing/sysfs-bus-mdio
+ F:	Documentation/ABI/testing/sysfs-class-net-phydev
  F:	Documentation/devicetree/bindings/net/ethernet-phy.yaml
  F:	Documentation/devicetree/bindings/net/mdio*
  F:	Documentation/networking/phy.rst
@@@ -6357,7 -6344,7 +6357,7 @@@ FPGA MANAGER FRAMEWOR
  M:	Moritz Fischer <mdf@kernel.org>
  L:	linux-fpga@vger.kernel.org
  S:	Maintained
- T:	git git://git.kernel.org/pub/scm/linux/kernel/git/atull/linux-fpga.git
+ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git
  Q:	http://patchwork.kernel.org/project/linux-fpga/list/
  F:	Documentation/fpga/
  F:	Documentation/driver-api/fpga/
@@@ -6390,7 -6377,7 +6390,7 @@@ FRAMEBUFFER LAYE
  M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
  L:	dri-devel@lists.freedesktop.org
  L:	linux-fbdev@vger.kernel.org
- T:	git git://github.com/bzolnier/linux.git
+ T:	git git://anongit.freedesktop.org/drm/drm-misc
  Q:	http://patchwork.kernel.org/project/linux-fbdev/list/
  S:	Maintained
  F:	Documentation/fb/
@@@ -6454,6 -6441,14 +6454,14 @@@ S:	Maintaine
  F:	drivers/perf/fsl_imx8_ddr_perf.c
  F:	Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
  
+ FREESCALE IMX I2C DRIVER
+ M:	Oleksij Rempel <o.rempel@pengutronix.de>
+ R:	Pengutronix Kernel Team <kernel@pengutronix.de>
+ L:	linux-i2c@vger.kernel.org
+ S:	Maintained
+ F:	drivers/i2c/busses/i2c-imx.c
+ F:	Documentation/devicetree/bindings/i2c/i2c-imx.txt
+ 
  FREESCALE IMX LPI2C DRIVER
  M:	Dong Aisheng <aisheng.dong@nxp.com>
  L:	linux-i2c@vger.kernel.org
@@@ -7465,7 -7460,7 +7473,7 @@@ F:	drivers/net/hyperv
  F:	drivers/scsi/storvsc_drv.c
  F:	drivers/uio/uio_hv_generic.c
  F:	drivers/video/fbdev/hyperv_fb.c
- F:	drivers/iommu/hyperv_iommu.c
+ F:	drivers/iommu/hyperv-iommu.c
  F:	net/vmw_vsock/hyperv_transport.c
  F:	include/clocksource/hyperv_timer.h
  F:	include/linux/hyperv.h
@@@ -8055,6 -8050,7 +8063,7 @@@ S:	Maintaine
  F:	drivers/video/fbdev/i810/
  
  INTEL ASoC DRIVERS
+ M:	Cezary Rojewski <cezary.rojewski@intel.com>
  M:	Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
  M:	Liam Girdwood <liam.r.girdwood@linux.intel.com>
  M:	Jie Yang <yang.jie@linux.intel.com>
@@@ -8076,6 -8072,13 +8085,13 @@@ T:	git git://git.code.sf.net/p/intel-sa
  S:	Supported
  F:	drivers/scsi/isci/
  
+ INTEL CPU family model numbers
+ M:	Tony Luck <tony.luck@intel.com>
+ M:	x86@kernel.org
+ L:	linux-kernel@vger.kernel.org
+ S:	Supported
+ F:	arch/x86/include/asm/intel-family.h
+ 
  INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
  M:	Jani Nikula <jani.nikula@linux.intel.com>
  M:	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
@@@ -8427,7 -8430,6 +8443,6 @@@ L:	linux-xfs@vger.kernel.or
  L:	linux-fsdevel@vger.kernel.org
  T:	git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
  S:	Supported
- F:	fs/iomap.c
  F:	fs/iomap/
  F:	include/linux/iomap.h
  
@@@ -11156,7 -11158,6 +11171,7 @@@ S:	Maintaine
  W:	https://fedorahosted.org/dropwatch/
  F:	net/core/drop_monitor.c
  F:	include/uapi/linux/net_dropmon.h
 +F:	include/net/drop_monitor.h
  
  NETWORKING DRIVERS
  M:	"David S. Miller" <davem@davemloft.net>
@@@ -11336,6 -11337,7 +11351,6 @@@ F:	include/net/nfc
  F:	include/uapi/linux/nfc.h
  F:	drivers/nfc/
  F:	include/linux/platform_data/nfcmrvl.h
 -F:	include/linux/platform_data/nxp-nci.h
  F:	Documentation/devicetree/bindings/net/nfc/
  
  NFS, SUNRPC, AND LOCKD CLIENTS
@@@ -13230,7 -13232,7 +13245,7 @@@ M:	Manish Chopra <manishc@marvell.com
  M:	GR-Linux-NIC-Dev@marvell.com
  L:	netdev@vger.kernel.org
  S:	Supported
 -F:	drivers/net/ethernet/qlogic/qlge/
 +F:	drivers/staging/qlge/
  
  QM1D1B0004 MEDIA DRIVER
  M:	Akihiro Tsukada <tskd08@gmail.com>
@@@ -16097,7 -16099,7 +16112,7 @@@ S:	Maintaine
  F:	drivers/net/ethernet/ti/netcp*
  
  TI PCM3060 ASoC CODEC DRIVER
- M:	Kirill Marinushkin <kmarinushkin@birdec.tech>
+ M:	Kirill Marinushkin <kmarinushkin@birdec.com>
  L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
  S:	Maintained
  F:	Documentation/devicetree/bindings/sound/pcm3060.txt
diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 94be97b7952c,8dce4069472b..4c790ffa1a73
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@@ -116,9 -116,6 +116,9 @@@ enum board_idx 
  	BCM57508,
  	BCM57504,
  	BCM57502,
 +	BCM57508_NPAR,
 +	BCM57504_NPAR,
 +	BCM57502_NPAR,
  	BCM58802,
  	BCM58804,
  	BCM58808,
@@@ -164,9 -161,6 +164,9 @@@ static const struct 
  	[BCM57508] = { "Broadcom BCM57508 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" },
  	[BCM57504] = { "Broadcom BCM57504 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" },
  	[BCM57502] = { "Broadcom BCM57502 NetXtreme-E 10Gb/25Gb/50Gb Ethernet" },
 +	[BCM57508_NPAR] = { "Broadcom BCM57508 NetXtreme-E Ethernet Partition" },
 +	[BCM57504_NPAR] = { "Broadcom BCM57504 NetXtreme-E Ethernet Partition" },
 +	[BCM57502_NPAR] = { "Broadcom BCM57502 NetXtreme-E Ethernet Partition" },
  	[BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" },
  	[BCM58804] = { "Broadcom BCM58804 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
  	[BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
@@@ -215,12 -209,6 +215,12 @@@ static const struct pci_device_id bnxt_
  	{ PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 },
  	{ PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 },
  	{ PCI_VDEVICE(BROADCOM, 0x1752), .driver_data = BCM57502 },
 +	{ PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57508_NPAR },
 +	{ PCI_VDEVICE(BROADCOM, 0x1801), .driver_data = BCM57504_NPAR },
 +	{ PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57502_NPAR },
 +	{ PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57508_NPAR },
 +	{ PCI_VDEVICE(BROADCOM, 0x1804), .driver_data = BCM57504_NPAR },
 +	{ PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57502_NPAR },
  	{ PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 },
  	{ PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 },
  #ifdef CONFIG_BNXT_SRIOV
@@@ -840,41 -828,16 +840,41 @@@ static inline int bnxt_alloc_rx_page(st
  	return 0;
  }
  
 -static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 cp_cons,
 -				   u32 agg_bufs)
 +static struct rx_agg_cmp *bnxt_get_agg(struct bnxt *bp,
 +				       struct bnxt_cp_ring_info *cpr,
 +				       u16 cp_cons, u16 curr)
 +{
 +	struct rx_agg_cmp *agg;
 +
 +	cp_cons = RING_CMP(ADV_RAW_CMP(cp_cons, curr));
 +	agg = (struct rx_agg_cmp *)
 +		&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
 +	return agg;
 +}
 +
 +static struct rx_agg_cmp *bnxt_get_tpa_agg_p5(struct bnxt *bp,
 +					      struct bnxt_rx_ring_info *rxr,
 +					      u16 agg_id, u16 curr)
 +{
 +	struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[agg_id];
 +
 +	return &tpa_info->agg_arr[curr];
 +}
 +
 +static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
 +				   u16 start, u32 agg_bufs, bool tpa)
  {
  	struct bnxt_napi *bnapi = cpr->bnapi;
  	struct bnxt *bp = bnapi->bp;
  	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
  	u16 prod = rxr->rx_agg_prod;
  	u16 sw_prod = rxr->rx_sw_agg_prod;
 +	bool p5_tpa = false;
  	u32 i;
  
 +	if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
 +		p5_tpa = true;
 +
  	for (i = 0; i < agg_bufs; i++) {
  		u16 cons;
  		struct rx_agg_cmp *agg;
@@@ -882,10 -845,8 +882,10 @@@
  		struct rx_bd *prod_bd;
  		struct page *page;
  
 -		agg = (struct rx_agg_cmp *)
 -			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
 +		if (p5_tpa)
 +			agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, start + i);
 +		else
 +			agg = bnxt_get_agg(bp, cpr, idx, start + i);
  		cons = agg->rx_agg_cmp_opaque;
  		__clear_bit(cons, rxr->rx_agg_bmap);
  
@@@ -913,6 -874,7 +913,6 @@@
  
  		prod = NEXT_RX_AGG(prod);
  		sw_prod = NEXT_RX_AGG(sw_prod);
 -		cp_cons = NEXT_CMP(cp_cons);
  	}
  	rxr->rx_agg_prod = prod;
  	rxr->rx_sw_agg_prod = sw_prod;
@@@ -926,7 -888,7 +926,7 @@@ static struct sk_buff *bnxt_rx_page_skb
  {
  	unsigned int payload = offset_and_len >> 16;
  	unsigned int len = offset_and_len & 0xffff;
 -	struct skb_frag_struct *frag;
 +	skb_frag_t *frag;
  	struct page *page = data;
  	u16 prod = rxr->rx_prod;
  	struct sk_buff *skb;
@@@ -957,7 -919,7 +957,7 @@@
  
  	frag = &skb_shinfo(skb)->frags[0];
  	skb_frag_size_sub(frag, payload);
 -	frag->page_offset += payload;
 +	skb_frag_off_add(frag, payload);
  	skb->data_len -= payload;
  	skb->tail += payload;
  
@@@ -995,19 -957,15 +995,19 @@@ static struct sk_buff *bnxt_rx_skb(stru
  
  static struct sk_buff *bnxt_rx_pages(struct bnxt *bp,
  				     struct bnxt_cp_ring_info *cpr,
 -				     struct sk_buff *skb, u16 cp_cons,
 -				     u32 agg_bufs)
 +				     struct sk_buff *skb, u16 idx,
 +				     u32 agg_bufs, bool tpa)
  {
  	struct bnxt_napi *bnapi = cpr->bnapi;
  	struct pci_dev *pdev = bp->pdev;
  	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
  	u16 prod = rxr->rx_agg_prod;
 +	bool p5_tpa = false;
  	u32 i;
  
 +	if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
 +		p5_tpa = true;
 +
  	for (i = 0; i < agg_bufs; i++) {
  		u16 cons, frag_len;
  		struct rx_agg_cmp *agg;
@@@ -1015,10 -973,8 +1015,10 @@@
  		struct page *page;
  		dma_addr_t mapping;
  
 -		agg = (struct rx_agg_cmp *)
 -			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
 +		if (p5_tpa)
 +			agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, i);
 +		else
 +			agg = bnxt_get_agg(bp, cpr, idx, i);
  		cons = agg->rx_agg_cmp_opaque;
  		frag_len = (le32_to_cpu(agg->rx_agg_cmp_len_flags_type) &
  			    RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT;
@@@ -1052,7 -1008,7 +1052,7 @@@
  			 * allocated already.
  			 */
  			rxr->rx_agg_prod = prod;
 -			bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs - i);
 +			bnxt_reuse_rx_agg_bufs(cpr, idx, i, agg_bufs - i, tpa);
  			return NULL;
  		}
  
@@@ -1065,6 -1021,7 +1065,6 @@@
  		skb->truesize += PAGE_SIZE;
  
  		prod = NEXT_RX_AGG(prod);
 -		cp_cons = NEXT_CMP(cp_cons);
  	}
  	rxr->rx_agg_prod = prod;
  	return skb;
@@@ -1124,10 -1081,9 +1124,10 @@@ static int bnxt_discard_rx(struct bnxt 
  	} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
  		struct rx_tpa_end_cmp *tpa_end = cmp;
  
 -		agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
 -			    RX_TPA_END_CMP_AGG_BUFS) >>
 -			   RX_TPA_END_CMP_AGG_BUFS_SHIFT;
 +		if (bp->flags & BNXT_FLAG_CHIP_P5)
 +			return 0;
 +
 +		agg_bufs = TPA_END_AGG_BUFS(tpa_end);
  	}
  
  	if (agg_bufs) {
@@@ -1164,60 -1120,26 +1164,60 @@@ static void bnxt_sched_reset(struct bnx
  	rxr->rx_next_cons = 0xffff;
  }
  
 +static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
 +{
 +	struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
 +	u16 idx = agg_id & MAX_TPA_P5_MASK;
 +
 +	if (test_bit(idx, map->agg_idx_bmap))
 +		idx = find_first_zero_bit(map->agg_idx_bmap,
 +					  BNXT_AGG_IDX_BMAP_SIZE);
 +	__set_bit(idx, map->agg_idx_bmap);
 +	map->agg_id_tbl[agg_id] = idx;
 +	return idx;
 +}
 +
 +static void bnxt_free_agg_idx(struct bnxt_rx_ring_info *rxr, u16 idx)
 +{
 +	struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
 +
 +	__clear_bit(idx, map->agg_idx_bmap);
 +}
 +
 +static u16 bnxt_lookup_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
 +{
 +	struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
 +
 +	return map->agg_id_tbl[agg_id];
 +}
 +
  static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
  			   struct rx_tpa_start_cmp *tpa_start,
  			   struct rx_tpa_start_cmp_ext *tpa_start1)
  {
 -	u8 agg_id = TPA_START_AGG_ID(tpa_start);
 -	u16 cons, prod;
 -	struct bnxt_tpa_info *tpa_info;
  	struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf;
 +	struct bnxt_tpa_info *tpa_info;
 +	u16 cons, prod, agg_id;
  	struct rx_bd *prod_bd;
  	dma_addr_t mapping;
  
 +	if (bp->flags & BNXT_FLAG_CHIP_P5) {
 +		agg_id = TPA_START_AGG_ID_P5(tpa_start);
 +		agg_id = bnxt_alloc_agg_idx(rxr, agg_id);
 +	} else {
 +		agg_id = TPA_START_AGG_ID(tpa_start);
 +	}
  	cons = tpa_start->rx_tpa_start_cmp_opaque;
  	prod = rxr->rx_prod;
  	cons_rx_buf = &rxr->rx_buf_ring[cons];
  	prod_rx_buf = &rxr->rx_buf_ring[prod];
  	tpa_info = &rxr->rx_tpa[agg_id];
  
 -	if (unlikely(cons != rxr->rx_next_cons)) {
 -		netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n",
 -			    cons, rxr->rx_next_cons);
 +	if (unlikely(cons != rxr->rx_next_cons ||
 +		     TPA_START_ERROR(tpa_start))) {
 +		netdev_warn(bp->dev, "TPA cons %x, expected cons %x, error code %x\n",
 +			    cons, rxr->rx_next_cons,
 +			    TPA_START_ERROR_CODE(tpa_start1));
  		bnxt_sched_reset(bp, rxr);
  		return;
  	}
@@@ -1262,7 -1184,6 +1262,7 @@@
  	tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2);
  	tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata);
  	tpa_info->hdr_info = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_hdr_info);
 +	tpa_info->agg_count = 0;
  
  	rxr->rx_prod = NEXT_RX(prod);
  	cons = NEXT_RX(cons);
@@@ -1274,37 -1195,13 +1274,37 @@@
  	cons_rx_buf->data = NULL;
  }
  
 -static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 cp_cons,
 -			   u32 agg_bufs)
 +static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 idx, u32 agg_bufs)
  {
  	if (agg_bufs)
 -		bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
 +		bnxt_reuse_rx_agg_bufs(cpr, idx, 0, agg_bufs, true);
  }
  
 +#ifdef CONFIG_INET
 +static void bnxt_gro_tunnel(struct sk_buff *skb, __be16 ip_proto)
 +{
 +	struct udphdr *uh = NULL;
 +
 +	if (ip_proto == htons(ETH_P_IP)) {
 +		struct iphdr *iph = (struct iphdr *)skb->data;
 +
 +		if (iph->protocol == IPPROTO_UDP)
 +			uh = (struct udphdr *)(iph + 1);
 +	} else {
 +		struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
 +
 +		if (iph->nexthdr == IPPROTO_UDP)
 +			uh = (struct udphdr *)(iph + 1);
 +	}
 +	if (uh) {
 +		if (uh->check)
 +			skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
 +		else
 +			skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
 +	}
 +}
 +#endif
 +
  static struct sk_buff *bnxt_gro_func_5731x(struct bnxt_tpa_info *tpa_info,
  					   int payload_off, int tcp_ts,
  					   struct sk_buff *skb)
@@@ -1362,39 -1259,28 +1362,39 @@@
  	}
  
  	if (inner_mac_off) { /* tunnel */
 -		struct udphdr *uh = NULL;
  		__be16 proto = *((__be16 *)(skb->data + outer_ip_off -
  					    ETH_HLEN - 2));
  
 -		if (proto == htons(ETH_P_IP)) {
 -			struct iphdr *iph = (struct iphdr *)skb->data;
 +		bnxt_gro_tunnel(skb, proto);
 +	}
 +#endif
 +	return skb;
 +}
  
 -			if (iph->protocol == IPPROTO_UDP)
 -				uh = (struct udphdr *)(iph + 1);
 -		} else {
 -			struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
 +static struct sk_buff *bnxt_gro_func_5750x(struct bnxt_tpa_info *tpa_info,
 +					   int payload_off, int tcp_ts,
 +					   struct sk_buff *skb)
 +{
 +#ifdef CONFIG_INET
 +	u16 outer_ip_off, inner_ip_off, inner_mac_off;
 +	u32 hdr_info = tpa_info->hdr_info;
 +	int iphdr_len, nw_off;
  
 -			if (iph->nexthdr == IPPROTO_UDP)
 -				uh = (struct udphdr *)(iph + 1);
 -		}
 -		if (uh) {
 -			if (uh->check)
 -				skb_shinfo(skb)->gso_type |=
 -					SKB_GSO_UDP_TUNNEL_CSUM;
 -			else
 -				skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
 -		}
 +	inner_ip_off = BNXT_TPA_INNER_L3_OFF(hdr_info);
 +	inner_mac_off = BNXT_TPA_INNER_L2_OFF(hdr_info);
 +	outer_ip_off = BNXT_TPA_OUTER_L3_OFF(hdr_info);
 +
 +	nw_off = inner_ip_off - ETH_HLEN;
 +	skb_set_network_header(skb, nw_off);
 +	iphdr_len = (tpa_info->flags2 & RX_TPA_START_CMP_FLAGS2_IP_TYPE) ?
 +		     sizeof(struct ipv6hdr) : sizeof(struct iphdr);
 +	skb_set_transport_header(skb, nw_off + iphdr_len);
 +
 +	if (inner_mac_off) { /* tunnel */
 +		__be16 proto = *((__be16 *)(skb->data + outer_ip_off -
 +					    ETH_HLEN - 2));
 +
 +		bnxt_gro_tunnel(skb, proto);
  	}
  #endif
  	return skb;
@@@ -1441,8 -1327,28 +1441,8 @@@ static struct sk_buff *bnxt_gro_func_57
  		return NULL;
  	}
  
 -	if (nw_off) { /* tunnel */
 -		struct udphdr *uh = NULL;
 -
 -		if (skb->protocol == htons(ETH_P_IP)) {
 -			struct iphdr *iph = (struct iphdr *)skb->data;
 -
 -			if (iph->protocol == IPPROTO_UDP)
 -				uh = (struct udphdr *)(iph + 1);
 -		} else {
 -			struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
 -
 -			if (iph->nexthdr == IPPROTO_UDP)
 -				uh = (struct udphdr *)(iph + 1);
 -		}
 -		if (uh) {
 -			if (uh->check)
 -				skb_shinfo(skb)->gso_type |=
 -					SKB_GSO_UDP_TUNNEL_CSUM;
 -			else
 -				skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
 -		}
 -	}
 +	if (nw_off) /* tunnel */
 +		bnxt_gro_tunnel(skb, skb->protocol);
  #endif
  	return skb;
  }
@@@ -1465,10 -1371,9 +1465,10 @@@ static inline struct sk_buff *bnxt_gro_
  	skb_shinfo(skb)->gso_size =
  		le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len);
  	skb_shinfo(skb)->gso_type = tpa_info->gso_type;
 -	payload_off = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
 -		       RX_TPA_END_CMP_PAYLOAD_OFFSET) >>
 -		      RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT;
 +	if (bp->flags & BNXT_FLAG_CHIP_P5)
 +		payload_off = TPA_END_PAYLOAD_OFF_P5(tpa_end1);
 +	else
 +		payload_off = TPA_END_PAYLOAD_OFF(tpa_end);
  	skb = bp->gro_func(tpa_info, payload_off, TPA_END_GRO_TS(tpa_end), skb);
  	if (likely(skb))
  		tcp_gro_complete(skb);
@@@ -1496,14 -1401,14 +1496,14 @@@ static inline struct sk_buff *bnxt_tpa_
  {
  	struct bnxt_napi *bnapi = cpr->bnapi;
  	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
 -	u8 agg_id = TPA_END_AGG_ID(tpa_end);
  	u8 *data_ptr, agg_bufs;
 -	u16 cp_cons = RING_CMP(*raw_cons);
  	unsigned int len;
  	struct bnxt_tpa_info *tpa_info;
  	dma_addr_t mapping;
  	struct sk_buff *skb;
 +	u16 idx = 0, agg_id;
  	void *data;
 +	bool gro;
  
  	if (unlikely(bnapi->in_reset)) {
  		int rc = bnxt_discard_rx(bp, cpr, raw_cons, tpa_end);
@@@ -1513,43 -1418,26 +1513,43 @@@
  		return NULL;
  	}
  
 -	tpa_info = &rxr->rx_tpa[agg_id];
 +	if (bp->flags & BNXT_FLAG_CHIP_P5) {
 +		agg_id = TPA_END_AGG_ID_P5(tpa_end);
 +		agg_id = bnxt_lookup_agg_idx(rxr, agg_id);
 +		agg_bufs = TPA_END_AGG_BUFS_P5(tpa_end1);
 +		tpa_info = &rxr->rx_tpa[agg_id];
 +		if (unlikely(agg_bufs != tpa_info->agg_count)) {
 +			netdev_warn(bp->dev, "TPA end agg_buf %d != expected agg_bufs %d\n",
 +				    agg_bufs, tpa_info->agg_count);
 +			agg_bufs = tpa_info->agg_count;
 +		}
 +		tpa_info->agg_count = 0;
 +		*event |= BNXT_AGG_EVENT;
 +		bnxt_free_agg_idx(rxr, agg_id);
 +		idx = agg_id;
 +		gro = !!(bp->flags & BNXT_FLAG_GRO);
 +	} else {
 +		agg_id = TPA_END_AGG_ID(tpa_end);
 +		agg_bufs = TPA_END_AGG_BUFS(tpa_end);
 +		tpa_info = &rxr->rx_tpa[agg_id];
 +		idx = RING_CMP(*raw_cons);
 +		if (agg_bufs) {
 +			if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons))
 +				return ERR_PTR(-EBUSY);
 +
 +			*event |= BNXT_AGG_EVENT;
 +			idx = NEXT_CMP(idx);
 +		}
 +		gro = !!TPA_END_GRO(tpa_end);
 +	}
  	data = tpa_info->data;
  	data_ptr = tpa_info->data_ptr;
  	prefetch(data_ptr);
  	len = tpa_info->len;
  	mapping = tpa_info->mapping;
  
 -	agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
 -		    RX_TPA_END_CMP_AGG_BUFS) >> RX_TPA_END_CMP_AGG_BUFS_SHIFT;
 -
 -	if (agg_bufs) {
 -		if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons))
 -			return ERR_PTR(-EBUSY);
 -
 -		*event |= BNXT_AGG_EVENT;
 -		cp_cons = NEXT_CMP(cp_cons);
 -	}
 -
  	if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
 -		bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
 +		bnxt_abort_tpa(cpr, idx, agg_bufs);
  		if (agg_bufs > MAX_SKB_FRAGS)
  			netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
  				    agg_bufs, (int)MAX_SKB_FRAGS);
@@@ -1559,7 -1447,7 +1559,7 @@@
  	if (len <= bp->rx_copy_thresh) {
  		skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping);
  		if (!skb) {
 -			bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
 +			bnxt_abort_tpa(cpr, idx, agg_bufs);
  			return NULL;
  		}
  	} else {
@@@ -1568,7 -1456,7 +1568,7 @@@
  
  		new_data = __bnxt_alloc_rx_data(bp, &new_mapping, GFP_ATOMIC);
  		if (!new_data) {
 -			bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
 +			bnxt_abort_tpa(cpr, idx, agg_bufs);
  			return NULL;
  		}
  
@@@ -1583,7 -1471,7 +1583,7 @@@
  
  		if (!skb) {
  			kfree(data);
 -			bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
 +			bnxt_abort_tpa(cpr, idx, agg_bufs);
  			return NULL;
  		}
  		skb_reserve(skb, bp->rx_offset);
@@@ -1591,7 -1479,7 +1591,7 @@@
  	}
  
  	if (agg_bufs) {
 -		skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs);
 +		skb = bnxt_rx_pages(bp, cpr, skb, idx, agg_bufs, true);
  		if (!skb) {
  			/* Page reuse already handled by bnxt_rx_pages(). */
  			return NULL;
@@@ -1620,24 -1508,12 +1620,24 @@@
  			(tpa_info->flags2 & RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3;
  	}
  
 -	if (TPA_END_GRO(tpa_end))
 +	if (gro)
  		skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb);
  
  	return skb;
  }
  
 +static void bnxt_tpa_agg(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 +			 struct rx_agg_cmp *rx_agg)
 +{
 +	u16 agg_id = TPA_AGG_AGG_ID(rx_agg);
 +	struct bnxt_tpa_info *tpa_info;
 +
 +	agg_id = bnxt_lookup_agg_idx(rxr, agg_id);
 +	tpa_info = &rxr->rx_tpa[agg_id];
 +	BUG_ON(tpa_info->agg_count >= MAX_SKB_FRAGS);
 +	tpa_info->agg_arr[tpa_info->agg_count++] = *rx_agg;
 +}
 +
  static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
  			     struct sk_buff *skb)
  {
@@@ -1679,13 -1555,6 +1679,13 @@@ static int bnxt_rx_pkt(struct bnxt *bp
  	rxcmp = (struct rx_cmp *)
  			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
  
 +	cmp_type = RX_CMP_TYPE(rxcmp);
 +
 +	if (cmp_type == CMP_TYPE_RX_TPA_AGG_CMP) {
 +		bnxt_tpa_agg(bp, rxr, (struct rx_agg_cmp *)rxcmp);
 +		goto next_rx_no_prod_no_len;
 +	}
 +
  	tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
  	cp_cons = RING_CMP(tmp_raw_cons);
  	rxcmp1 = (struct rx_cmp_ext *)
@@@ -1694,6 -1563,8 +1694,6 @@@
  	if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
  		return -EBUSY;
  
 -	cmp_type = RX_CMP_TYPE(rxcmp);
 -
  	prod = rxr->rx_prod;
  
  	if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) {
@@@ -1752,8 -1623,7 +1752,8 @@@
  
  		bnxt_reuse_rx_data(rxr, cons, data);
  		if (agg_bufs)
 -			bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
 +			bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, agg_bufs,
 +					       false);
  
  		rc = -EIO;
  		if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
@@@ -1776,8 -1646,7 +1776,8 @@@
  		bnxt_reuse_rx_data(rxr, cons, data);
  		if (!skb) {
  			if (agg_bufs)
 -				bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
 +				bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0,
 +						       agg_bufs, false);
  			rc = -ENOMEM;
  			goto next_rx;
  		}
@@@ -1797,7 -1666,7 +1797,7 @@@
  	}
  
  	if (agg_bufs) {
 -		skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs);
 +		skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs, false);
  		if (!skb) {
  			rc = -ENOMEM;
  			goto next_rx;
@@@ -2152,9 -2021,9 +2152,9 @@@ static void __bnxt_poll_work_done(struc
  	if (bnapi->events & BNXT_RX_EVENT) {
  		struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
  
- 		bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
  		if (bnapi->events & BNXT_AGG_EVENT)
  			bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+ 		bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
  	}
  	bnapi->events = 0;
  }
@@@ -2456,11 -2325,10 +2456,11 @@@ static void bnxt_free_rx_skbs(struct bn
  	max_agg_idx = bp->rx_agg_nr_pages * RX_DESC_CNT;
  	for (i = 0; i < bp->rx_nr_rings; i++) {
  		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 +		struct bnxt_tpa_idx_map *map;
  		int j;
  
  		if (rxr->rx_tpa) {
 -			for (j = 0; j < MAX_TPA; j++) {
 +			for (j = 0; j < bp->max_tpa; j++) {
  				struct bnxt_tpa_info *tpa_info =
  							&rxr->rx_tpa[j];
  				u8 *data = tpa_info->data;
@@@ -2527,9 -2395,6 +2527,9 @@@
  			__free_page(rxr->rx_page);
  			rxr->rx_page = NULL;
  		}
 +		map = rxr->rx_tpa_idx_map;
 +		if (map)
 +			memset(map->agg_idx_bmap, 0, sizeof(map->agg_idx_bmap));
  	}
  }
  
@@@ -2618,61 -2483,6 +2618,61 @@@ static int bnxt_alloc_ring(struct bnxt 
  	return 0;
  }
  
 +static void bnxt_free_tpa_info(struct bnxt *bp)
 +{
 +	int i;
 +
 +	for (i = 0; i < bp->rx_nr_rings; i++) {
 +		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 +
 +		kfree(rxr->rx_tpa_idx_map);
 +		rxr->rx_tpa_idx_map = NULL;
 +		if (rxr->rx_tpa) {
 +			kfree(rxr->rx_tpa[0].agg_arr);
 +			rxr->rx_tpa[0].agg_arr = NULL;
 +		}
 +		kfree(rxr->rx_tpa);
 +		rxr->rx_tpa = NULL;
 +	}
 +}
 +
 +static int bnxt_alloc_tpa_info(struct bnxt *bp)
 +{
 +	int i, j, total_aggs = 0;
 +
 +	bp->max_tpa = MAX_TPA;
 +	if (bp->flags & BNXT_FLAG_CHIP_P5) {
 +		if (!bp->max_tpa_v2)
 +			return 0;
 +		bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
 +		total_aggs = bp->max_tpa * MAX_SKB_FRAGS;
 +	}
 +
 +	for (i = 0; i < bp->rx_nr_rings; i++) {
 +		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 +		struct rx_agg_cmp *agg;
 +
 +		rxr->rx_tpa = kcalloc(bp->max_tpa, sizeof(struct bnxt_tpa_info),
 +				      GFP_KERNEL);
 +		if (!rxr->rx_tpa)
 +			return -ENOMEM;
 +
 +		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
 +			continue;
 +		agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL);
 +		rxr->rx_tpa[0].agg_arr = agg;
 +		if (!agg)
 +			return -ENOMEM;
 +		for (j = 1; j < bp->max_tpa; j++)
 +			rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS;
 +		rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
 +					      GFP_KERNEL);
 +		if (!rxr->rx_tpa_idx_map)
 +			return -ENOMEM;
 +	}
 +	return 0;
 +}
 +
  static void bnxt_free_rx_rings(struct bnxt *bp)
  {
  	int i;
@@@ -2680,7 -2490,6 +2680,7 @@@
  	if (!bp->rx_ring)
  		return;
  
 +	bnxt_free_tpa_info(bp);
  	for (i = 0; i < bp->rx_nr_rings; i++) {
  		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
  		struct bnxt_ring_struct *ring;
@@@ -2694,6 -2503,9 +2694,6 @@@
  		page_pool_destroy(rxr->page_pool);
  		rxr->page_pool = NULL;
  
 -		kfree(rxr->rx_tpa);
 -		rxr->rx_tpa = NULL;
 -
  		kfree(rxr->rx_agg_bmap);
  		rxr->rx_agg_bmap = NULL;
  
@@@ -2727,7 -2539,7 +2727,7 @@@ static int bnxt_alloc_rx_page_pool(stru
  
  static int bnxt_alloc_rx_rings(struct bnxt *bp)
  {
 -	int i, rc, agg_rings = 0, tpa_rings = 0;
 +	int i, rc = 0, agg_rings = 0;
  
  	if (!bp->rx_ring)
  		return -ENOMEM;
@@@ -2735,6 -2547,9 +2735,6 @@@
  	if (bp->flags & BNXT_FLAG_AGG_RINGS)
  		agg_rings = 1;
  
 -	if (bp->flags & BNXT_FLAG_TPA)
 -		tpa_rings = 1;
 -
  	for (i = 0; i < bp->rx_nr_rings; i++) {
  		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
  		struct bnxt_ring_struct *ring;
@@@ -2776,11 -2591,17 +2776,11 @@@
  			rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL);
  			if (!rxr->rx_agg_bmap)
  				return -ENOMEM;
 -
 -			if (tpa_rings) {
 -				rxr->rx_tpa = kcalloc(MAX_TPA,
 -						sizeof(struct bnxt_tpa_info),
 -						GFP_KERNEL);
 -				if (!rxr->rx_tpa)
 -					return -ENOMEM;
 -			}
  		}
  	}
 -	return 0;
 +	if (bp->flags & BNXT_FLAG_TPA)
 +		rc = bnxt_alloc_tpa_info(bp);
 +	return rc;
  }
  
  static void bnxt_free_tx_rings(struct bnxt *bp)
@@@ -3132,7 -2953,7 +3132,7 @@@ static int bnxt_init_one_rx_ring(struc
  			u8 *data;
  			dma_addr_t mapping;
  
 -			for (i = 0; i < MAX_TPA; i++) {
 +			for (i = 0; i < bp->max_tpa; i++) {
  				data = __bnxt_alloc_rx_data(bp, &mapping,
  							    GFP_KERNEL);
  				if (!data)
@@@ -3647,7 -3468,7 +3647,7 @@@ static void bnxt_free_ring_stats(struc
  	if (!bp->bnapi)
  		return;
  
 -	size = sizeof(struct ctx_hw_stats);
 +	size = bp->hw_ring_stats_size;
  
  	for (i = 0; i < bp->cp_nr_rings; i++) {
  		struct bnxt_napi *bnapi = bp->bnapi[i];
@@@ -3666,7 -3487,7 +3666,7 @@@ static int bnxt_alloc_stats(struct bnx
  	u32 size, i;
  	struct pci_dev *pdev = bp->pdev;
  
 -	size = sizeof(struct ctx_hw_stats);
 +	size = bp->hw_ring_stats_size;
  
  	for (i = 0; i < bp->cp_nr_rings; i++) {
  		struct bnxt_napi *bnapi = bp->bnapi[i];
@@@ -4593,7 -4414,6 +4593,7 @@@ static int bnxt_hwrm_clear_vnic_filter(
  static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
  {
  	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
 +	u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX;
  	struct hwrm_vnic_tpa_cfg_input req = {0};
  
  	if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
@@@ -4633,14 -4453,9 +4633,14 @@@
  			nsegs = (MAX_SKB_FRAGS - n) / n;
  		}
  
 -		segs = ilog2(nsegs);
 +		if (bp->flags & BNXT_FLAG_CHIP_P5) {
 +			segs = MAX_TPA_SEGS_P5;
 +			max_aggs = bp->max_tpa;
 +		} else {
 +			segs = ilog2(nsegs);
 +		}
  		req.max_agg_segs = cpu_to_le16(segs);
 -		req.max_aggs = cpu_to_le16(VNIC_TPA_CFG_REQ_MAX_AGGS_MAX);
 +		req.max_aggs = cpu_to_le16(max_aggs);
  
  		req.min_agg_len = cpu_to_le32(512);
  	}
@@@ -5000,12 -4815,6 +5000,12 @@@ static int bnxt_hwrm_vnic_qcaps(struct 
  		if (flags &
  		    VNIC_QCAPS_RESP_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP)
  			bp->flags |= BNXT_FLAG_ROCE_MIRROR_CAP;
 +		bp->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported);
 +		if (bp->max_tpa_v2)
 +			bp->hw_ring_stats_size =
 +				sizeof(struct ctx_hw_stats_ext);
 +		else
 +			bp->hw_ring_stats_size = sizeof(struct ctx_hw_stats);
  	}
  	mutex_unlock(&bp->hwrm_cmd_lock);
  	return rc;
@@@ -5255,6 -5064,7 +5255,7 @@@ static void bnxt_set_db(struct bnxt *bp
  
  static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
  {
+ 	bool agg_rings = !!(bp->flags & BNXT_FLAG_AGG_RINGS);
  	int i, rc = 0;
  	u32 type;
  
@@@ -5330,7 -5140,9 +5331,9 @@@
  		if (rc)
  			goto err_out;
  		bnxt_set_db(bp, &rxr->rx_db, type, map_idx, ring->fw_ring_id);
- 		bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
+ 		/* If we have agg rings, post agg buffers first. */
+ 		if (!agg_rings)
+ 			bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
  		bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id;
  		if (bp->flags & BNXT_FLAG_CHIP_P5) {
  			struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
@@@ -5349,7 -5161,7 +5352,7 @@@
  		}
  	}
  
- 	if (bp->flags & BNXT_FLAG_AGG_RINGS) {
+ 	if (agg_rings) {
  		type = HWRM_RING_ALLOC_AGG;
  		for (i = 0; i < bp->rx_nr_rings; i++) {
  			struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
@@@ -5365,6 -5177,7 +5368,7 @@@
  			bnxt_set_db(bp, &rxr->rx_agg_db, type, map_idx,
  				    ring->fw_ring_id);
  			bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+ 			bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
  			bp->grp_info[grp_idx].agg_fw_ring_id = ring->fw_ring_id;
  		}
  	}
@@@ -6203,7 -6016,6 +6207,7 @@@ static int bnxt_hwrm_stat_ctx_alloc(str
  
  	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1);
  
 +	req.stats_dma_length = cpu_to_le16(bp->hw_ring_stats_size);
  	req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
  
  	mutex_lock(&bp->hwrm_cmd_lock);
@@@ -7208,19 -7020,29 +7212,29 @@@ static void bnxt_hwrm_clear_vnic_rss(st
  		bnxt_hwrm_vnic_set_rss(bp, i, false);
  }
  
- static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
- 				    bool irq_re_init)
+ static void bnxt_clear_vnic(struct bnxt *bp)
  {
- 	if (bp->vnic_info) {
- 		bnxt_hwrm_clear_vnic_filter(bp);
+ 	if (!bp->vnic_info)
+ 		return;
+ 
+ 	bnxt_hwrm_clear_vnic_filter(bp);
+ 	if (!(bp->flags & BNXT_FLAG_CHIP_P5)) {
  		/* clear all RSS setting before free vnic ctx */
  		bnxt_hwrm_clear_vnic_rss(bp);
  		bnxt_hwrm_vnic_ctx_free(bp);
- 		/* before free the vnic, undo the vnic tpa settings */
- 		if (bp->flags & BNXT_FLAG_TPA)
- 			bnxt_set_tpa(bp, false);
- 		bnxt_hwrm_vnic_free(bp);
  	}
+ 	/* before free the vnic, undo the vnic tpa settings */
+ 	if (bp->flags & BNXT_FLAG_TPA)
+ 		bnxt_set_tpa(bp, false);
+ 	bnxt_hwrm_vnic_free(bp);
+ 	if (bp->flags & BNXT_FLAG_CHIP_P5)
+ 		bnxt_hwrm_vnic_ctx_free(bp);
+ }
+ 
+ static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
+ 				    bool irq_re_init)
+ {
+ 	bnxt_clear_vnic(bp);
  	bnxt_hwrm_ring_free(bp, close_path);
  	bnxt_hwrm_ring_grp_free(bp);
  	if (irq_re_init) {
@@@ -9484,8 -9306,7 +9498,8 @@@ static int bnxt_set_features(struct net
  	if (changes & BNXT_FLAG_TPA) {
  		update_tpa = true;
  		if ((bp->flags & BNXT_FLAG_TPA) == 0 ||
 -		    (flags & BNXT_FLAG_TPA) == 0)
 +		    (flags & BNXT_FLAG_TPA) == 0 ||
 +		    (bp->flags & BNXT_FLAG_CHIP_P5))
  			re_init = true;
  	}
  
@@@ -9495,8 -9316,9 +9509,8 @@@
  	if (flags != bp->flags) {
  		u32 old_flags = bp->flags;
  
 -		bp->flags = flags;
 -
  		if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
 +			bp->flags = flags;
  			if (update_tpa)
  				bnxt_set_ring_params(bp);
  			return rc;
@@@ -9504,14 -9326,12 +9518,14 @@@
  
  		if (re_init) {
  			bnxt_close_nic(bp, false, false);
 +			bp->flags = flags;
  			if (update_tpa)
  				bnxt_set_ring_params(bp);
  
  			return bnxt_open_nic(bp, false, false);
  		}
  		if (update_tpa) {
 +			bp->flags = flags;
  			rc = bnxt_set_tpa(bp,
  					  (flags & BNXT_FLAG_TPA) ?
  					  true : false);
@@@ -9908,68 -9728,6 +9922,68 @@@ static void bnxt_init_dflt_coal(struct 
  	bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
  }
  
 +static int bnxt_fw_init_one_p1(struct bnxt *bp)
 +{
 +	int rc;
 +
 +	bp->fw_cap = 0;
 +	rc = bnxt_hwrm_ver_get(bp);
 +	if (rc)
 +		return rc;
 +
 +	if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
 +		rc = bnxt_alloc_kong_hwrm_resources(bp);
 +		if (rc)
 +			bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL;
 +	}
 +
 +	if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
 +	    bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
 +		rc = bnxt_alloc_hwrm_short_cmd_req(bp);
 +		if (rc)
 +			return rc;
 +	}
 +	rc = bnxt_hwrm_func_reset(bp);
 +	if (rc)
 +		return -ENODEV;
 +
 +	bnxt_hwrm_fw_set_time(bp);
 +	return 0;
 +}
 +
 +static int bnxt_fw_init_one_p2(struct bnxt *bp)
 +{
 +	int rc;
 +
 +	/* Get the MAX capabilities for this function */
 +	rc = bnxt_hwrm_func_qcaps(bp);
 +	if (rc) {
 +		netdev_err(bp->dev, "hwrm query capability failure rc: %x\n",
 +			   rc);
 +		return -ENODEV;
 +	}
 +
 +	rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp);
 +	if (rc)
 +		netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
 +			    rc);
 +
 +	rc = bnxt_hwrm_func_drv_rgtr(bp);
 +	if (rc)
 +		return -ENODEV;
 +
 +	rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
 +	if (rc)
 +		return -ENODEV;
 +
 +	bnxt_hwrm_func_qcfg(bp);
 +	bnxt_hwrm_vnic_qcaps(bp);
 +	bnxt_hwrm_port_led_qcaps(bp);
 +	bnxt_ethtool_init(bp);
 +	bnxt_dcb_init(bp);
 +	return 0;
 +}
 +
  static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
  {
  	int rc;
@@@ -10925,18 -10683,32 +10939,18 @@@ static int bnxt_init_one(struct pci_de
  		goto init_err_pci_clean;
  
  	mutex_init(&bp->hwrm_cmd_lock);
 -	rc = bnxt_hwrm_ver_get(bp);
 +
 +	rc = bnxt_fw_init_one_p1(bp);
  	if (rc)
  		goto init_err_pci_clean;
  
 -	if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
 -		rc = bnxt_alloc_kong_hwrm_resources(bp);
 -		if (rc)
 -			bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL;
 -	}
 -
 -	if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
 -	    bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
 -		rc = bnxt_alloc_hwrm_short_cmd_req(bp);
 -		if (rc)
 -			goto init_err_pci_clean;
 -	}
 -
  	if (BNXT_CHIP_P5(bp))
  		bp->flags |= BNXT_FLAG_CHIP_P5;
  
 -	rc = bnxt_hwrm_func_reset(bp);
 +	rc = bnxt_fw_init_one_p2(bp);
  	if (rc)
  		goto init_err_pci_clean;
  
 -	bnxt_hwrm_fw_set_time(bp);
 -
  	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
  			   NETIF_F_TSO | NETIF_F_TSO6 |
  			   NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
@@@ -10974,14 -10746,41 +10988,14 @@@
  		bp->gro_func = bnxt_gro_func_5730x;
  		if (BNXT_CHIP_P4(bp))
  			bp->gro_func = bnxt_gro_func_5731x;
 +		else if (BNXT_CHIP_P5(bp))
 +			bp->gro_func = bnxt_gro_func_5750x;
  	}
  	if (!BNXT_CHIP_P4_PLUS(bp))
  		bp->flags |= BNXT_FLAG_DOUBLE_DB;
  
 -	rc = bnxt_hwrm_func_drv_rgtr(bp);
 -	if (rc)
 -		goto init_err_pci_clean;
 -
 -	rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
 -	if (rc)
 -		goto init_err_pci_clean;
 -
  	bp->ulp_probe = bnxt_ulp_probe;
  
 -	rc = bnxt_hwrm_queue_qportcfg(bp);
 -	if (rc) {
 -		netdev_err(bp->dev, "hwrm query qportcfg failure rc: %x\n",
 -			   rc);
 -		rc = -1;
 -		goto init_err_pci_clean;
 -	}
 -	/* Get the MAX capabilities for this function */
 -	rc = bnxt_hwrm_func_qcaps(bp);
 -	if (rc) {
 -		netdev_err(bp->dev, "hwrm query capability failure rc: %x\n",
 -			   rc);
 -		rc = -1;
 -		goto init_err_pci_clean;
 -	}
 -
 -	rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp);
 -	if (rc)
 -		netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
 -			    rc);
 -
  	rc = bnxt_init_mac_addr(bp);
  	if (rc) {
  		dev_err(&pdev->dev, "Unable to initialize mac address.\n");
@@@ -10995,6 -10794,11 +11009,6 @@@
  		if (rc)
  			goto init_err_pci_clean;
  	}
 -	bnxt_hwrm_func_qcfg(bp);
 -	bnxt_hwrm_vnic_qcaps(bp);
 -	bnxt_hwrm_port_led_qcaps(bp);
 -	bnxt_ethtool_init(bp);
 -	bnxt_dcb_init(bp);
  
  	/* MTU range: 60 - FW defined max */
  	dev->min_mtu = ETH_ZLEN;
@@@ -11130,7 -10934,8 +11144,7 @@@ shutdown_exit
  #ifdef CONFIG_PM_SLEEP
  static int bnxt_suspend(struct device *device)
  {
 -	struct pci_dev *pdev = to_pci_dev(device);
 -	struct net_device *dev = pci_get_drvdata(pdev);
 +	struct net_device *dev = dev_get_drvdata(device);
  	struct bnxt *bp = netdev_priv(dev);
  	int rc = 0;
  
@@@ -11146,7 -10951,8 +11160,7 @@@
  
  static int bnxt_resume(struct device *device)
  {
 -	struct pci_dev *pdev = to_pci_dev(device);
 -	struct net_device *dev = pci_get_drvdata(pdev);
 +	struct net_device *dev = dev_get_drvdata(device);
  	struct bnxt *bp = netdev_priv(dev);
  	int rc = 0;
  
diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 3a3d8a9be5ed,8445a0cce849..b624174c8594
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@@ -137,44 -137,7 +137,44 @@@ reset_coalesce
  	return rc;
  }
  
 -#define BNXT_NUM_STATS	22
 +static const char * const bnxt_ring_stats_str[] = {
 +	"rx_ucast_packets",
 +	"rx_mcast_packets",
 +	"rx_bcast_packets",
 +	"rx_discards",
 +	"rx_drops",
 +	"rx_ucast_bytes",
 +	"rx_mcast_bytes",
 +	"rx_bcast_bytes",
 +	"tx_ucast_packets",
 +	"tx_mcast_packets",
 +	"tx_bcast_packets",
 +	"tx_discards",
 +	"tx_drops",
 +	"tx_ucast_bytes",
 +	"tx_mcast_bytes",
 +	"tx_bcast_bytes",
 +};
 +
 +static const char * const bnxt_ring_tpa_stats_str[] = {
 +	"tpa_packets",
 +	"tpa_bytes",
 +	"tpa_events",
 +	"tpa_aborts",
 +};
 +
 +static const char * const bnxt_ring_tpa2_stats_str[] = {
 +	"rx_tpa_eligible_pkt",
 +	"rx_tpa_eligible_bytes",
 +	"rx_tpa_pkt",
 +	"rx_tpa_bytes",
 +	"rx_tpa_errors",
 +};
 +
 +static const char * const bnxt_ring_sw_stats_str[] = {
 +	"rx_l4_csum_errors",
 +	"missed_irqs",
 +};
  
  #define BNXT_RX_STATS_ENTRY(counter)	\
  	{ BNXT_RX_STATS_OFFSET(counter), __stringify(counter) }
@@@ -244,20 -207,6 +244,20 @@@
  	BNXT_TX_STATS_EXT_COS_ENTRY(6),				\
  	BNXT_TX_STATS_EXT_COS_ENTRY(7)				\
  
 +#define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(n)			\
 +	BNXT_RX_STATS_EXT_ENTRY(rx_discard_bytes_cos##n),	\
 +	BNXT_RX_STATS_EXT_ENTRY(rx_discard_packets_cos##n)
 +
 +#define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES				\
 +	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(0),				\
 +	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(1),				\
 +	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(2),				\
 +	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(3),				\
 +	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(4),				\
 +	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(5),				\
 +	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(6),				\
 +	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(7)
 +
  #define BNXT_RX_STATS_PRI_ENTRY(counter, n)		\
  	{ BNXT_RX_STATS_EXT_OFFSET(counter##_cos0),	\
  	  __stringify(counter##_pri##n) }
@@@ -403,7 -352,6 +403,7 @@@ static const struct 
  	BNXT_RX_STATS_EXT_ENTRY(rx_buffer_passed_threshold),
  	BNXT_RX_STATS_EXT_ENTRY(rx_pcs_symbol_err),
  	BNXT_RX_STATS_EXT_ENTRY(rx_corrected_bits),
 +	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES,
  };
  
  static const struct {
@@@ -469,29 -417,9 +469,29 @@@ static const struct 
  	 ARRAY_SIZE(bnxt_tx_pkts_pri_arr))
  #define BNXT_NUM_PCIE_STATS ARRAY_SIZE(bnxt_pcie_stats_arr)
  
 +static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp)
 +{
 +	if (BNXT_SUPPORTS_TPA(bp)) {
 +		if (bp->max_tpa_v2)
 +			return ARRAY_SIZE(bnxt_ring_tpa2_stats_str);
 +		return ARRAY_SIZE(bnxt_ring_tpa_stats_str);
 +	}
 +	return 0;
 +}
 +
 +static int bnxt_get_num_ring_stats(struct bnxt *bp)
 +{
 +	int num_stats;
 +
 +	num_stats = ARRAY_SIZE(bnxt_ring_stats_str) +
 +		    ARRAY_SIZE(bnxt_ring_sw_stats_str) +
 +		    bnxt_get_num_tpa_ring_stats(bp);
 +	return num_stats * bp->cp_nr_rings;
 +}
 +
  static int bnxt_get_num_stats(struct bnxt *bp)
  {
 -	int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
 +	int num_stats = bnxt_get_num_ring_stats(bp);
  
  	num_stats += BNXT_NUM_SW_FUNC_STATS;
  
@@@ -532,11 -460,10 +532,11 @@@ static void bnxt_get_ethtool_stats(stru
  {
  	u32 i, j = 0;
  	struct bnxt *bp = netdev_priv(dev);
 -	u32 stat_fields = sizeof(struct ctx_hw_stats) / 8;
 +	u32 stat_fields = ARRAY_SIZE(bnxt_ring_stats_str) +
 +			  bnxt_get_num_tpa_ring_stats(bp);
  
  	if (!bp->bnapi) {
 -		j += BNXT_NUM_STATS * bp->cp_nr_rings + BNXT_NUM_SW_FUNC_STATS;
 +		j += bnxt_get_num_ring_stats(bp) + BNXT_NUM_SW_FUNC_STATS;
  		goto skip_ring_stats;
  	}
  
@@@ -624,39 -551,56 +624,39 @@@ skip_ring_stats
  static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
  {
  	struct bnxt *bp = netdev_priv(dev);
 -	u32 i;
 +	static const char * const *str;
 +	u32 i, j, num_str;
  
  	switch (stringset) {
 -	/* The number of strings must match BNXT_NUM_STATS defined above. */
  	case ETH_SS_STATS:
  		for (i = 0; i < bp->cp_nr_rings; i++) {
 -			sprintf(buf, "[%d]: rx_ucast_packets", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: rx_mcast_packets", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: rx_bcast_packets", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: rx_discards", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: rx_drops", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: rx_ucast_bytes", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: rx_mcast_bytes", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: rx_bcast_bytes", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tx_ucast_packets", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tx_mcast_packets", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tx_bcast_packets", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tx_discards", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tx_drops", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tx_ucast_bytes", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tx_mcast_bytes", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tx_bcast_bytes", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tpa_packets", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tpa_bytes", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tpa_events", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: tpa_aborts", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: rx_l4_csum_errors", i);
 -			buf += ETH_GSTRING_LEN;
 -			sprintf(buf, "[%d]: missed_irqs", i);
 -			buf += ETH_GSTRING_LEN;
 +			num_str = ARRAY_SIZE(bnxt_ring_stats_str);
 +			for (j = 0; j < num_str; j++) {
 +				sprintf(buf, "[%d]: %s", i,
 +					bnxt_ring_stats_str[j]);
 +				buf += ETH_GSTRING_LEN;
 +			}
 +			if (!BNXT_SUPPORTS_TPA(bp))
 +				goto skip_tpa_stats;
 +
 +			if (bp->max_tpa_v2) {
 +				num_str = ARRAY_SIZE(bnxt_ring_tpa2_stats_str);
 +				str = bnxt_ring_tpa2_stats_str;
 +			} else {
 +				num_str = ARRAY_SIZE(bnxt_ring_tpa_stats_str);
 +				str = bnxt_ring_tpa_stats_str;
 +			}
 +			for (j = 0; j < num_str; j++) {
 +				sprintf(buf, "[%d]: %s", i, str[j]);
 +				buf += ETH_GSTRING_LEN;
 +			}
 +skip_tpa_stats:
 +			num_str = ARRAY_SIZE(bnxt_ring_sw_stats_str);
 +			for (j = 0; j < num_str; j++) {
 +				sprintf(buf, "[%d]: %s", i,
 +					bnxt_ring_sw_stats_str[j]);
 +				buf += ETH_GSTRING_LEN;
 +			}
  		}
  		for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) {
  			strcpy(buf, bnxt_sw_func_stats[i].string);
@@@ -2072,21 -2016,19 +2072,19 @@@ static int bnxt_flash_package_from_file
  	mutex_lock(&bp->hwrm_cmd_lock);
  	hwrm_err = _hwrm_send_message(bp, &install, sizeof(install),
  				      INSTALL_PACKAGE_TIMEOUT);
- 	if (hwrm_err)
- 		goto flash_pkg_exit;
- 
- 	if (resp->error_code) {
+ 	if (hwrm_err) {
  		u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err;
  
- 		if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
+ 		if (resp->error_code && error_code ==
+ 		    NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
  			install.flags |= cpu_to_le16(
  			       NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG);
  			hwrm_err = _hwrm_send_message(bp, &install,
  						      sizeof(install),
  						      INSTALL_PACKAGE_TIMEOUT);
- 			if (hwrm_err)
- 				goto flash_pkg_exit;
  		}
+ 		if (hwrm_err)
+ 			goto flash_pkg_exit;
  	}
  
  	if (resp->result) {
diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index dd99c55d9a88,d692251ee252..ae6a47dd7dc9
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@@ -3236,8 -3236,10 +3236,10 @@@ static ssize_t blocked_fl_write(struct 
  		return -ENOMEM;
  
  	err = bitmap_parse_user(ubuf, count, t, adap->sge.egr_sz);
- 	if (err)
+ 	if (err) {
+ 		kvfree(t);
  		return err;
+ 	}
  
  	bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz);
  	kvfree(t);
@@@ -3529,6 -3531,7 +3531,6 @@@ int t4_setup_debugfs(struct adapter *ad
  {
  	int i;
  	u32 size = 0;
 -	struct dentry *de;
  
  	static struct t4_debugfs_entry t4_debugfs_files[] = {
  		{ "cim_la", &cim_la_fops, 0400, 0 },
@@@ -3639,8 -3642,8 +3641,8 @@@
  		}
  	}
  
 -	de = debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap,
 -				      &flash_debugfs_fops, adap->params.sf_size);
 +	debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap,
 +				 &flash_debugfs_fops, adap->params.sf_size);
  	debugfs_create_bool("use_backdoor", 0600,
  			    adap->debugfs_root, &adap->use_bd);
  	debugfs_create_bool("trace_rss", 0600,
diff --combined drivers/net/ethernet/ibm/ibmvnic.c
index 81a05ea38237,cebd20f3128d..07efa2b40003
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@@ -1485,7 -1485,7 +1485,7 @@@ static netdev_tx_t ibmvnic_xmit(struct 
  
  			memcpy(dst + cur,
  			       page_address(skb_frag_page(frag)) +
 -			       frag->page_offset, skb_frag_size(frag));
 +			       skb_frag_off(frag), skb_frag_size(frag));
  			cur += skb_frag_size(frag);
  		}
  	} else {
@@@ -1568,6 -1568,8 +1568,8 @@@
  		lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
  					       (u64)tx_buff->indir_dma,
  					       (u64)num_entries);
+ 		dma_unmap_single(dev, tx_buff->indir_dma,
+ 				 sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
  	} else {
  		tx_buff->num_entries = num_entries;
  		lpar_rc = send_subcrq(adapter, handle_array[queue_num],
@@@ -2788,7 -2790,6 +2790,6 @@@ static int ibmvnic_complete_tx(struct i
  	union sub_crq *next;
  	int index;
  	int i, j;
- 	u8 *first;
  
  restart_loop:
  	while (pending_scrq(adapter, scrq)) {
@@@ -2818,14 -2819,6 +2819,6 @@@
  
  				txbuff->data_dma[j] = 0;
  			}
- 			/* if sub_crq was sent indirectly */
- 			first = &txbuff->indir_arr[0].generic.first;
- 			if (*first == IBMVNIC_CRQ_CMD) {
- 				dma_unmap_single(dev, txbuff->indir_dma,
- 						 sizeof(txbuff->indir_arr),
- 						 DMA_TO_DEVICE);
- 				*first = 0;
- 			}
  
  			if (txbuff->last_frag) {
  				dev_kfree_skb_any(txbuff->skb);
diff --combined drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index dc7b128c780e,7882148abb43..17b7ae9f46ec
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@@ -1785,7 -1785,7 +1785,7 @@@ static bool ixgbe_is_non_eop(struct ixg
  static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
  			    struct sk_buff *skb)
  {
 -	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
 +	skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
  	unsigned char *va;
  	unsigned int pull_len;
  
@@@ -1807,7 -1807,7 +1807,7 @@@
  
  	/* update all of the pointers */
  	skb_frag_size_sub(frag, pull_len);
 -	frag->page_offset += pull_len;
 +	skb_frag_off_add(frag, pull_len);
  	skb->data_len -= pull_len;
  	skb->tail += pull_len;
  }
@@@ -1840,11 -1840,11 +1840,11 @@@ static void ixgbe_dma_sync_frag(struct 
  					      skb_headlen(skb),
  					      DMA_FROM_DEVICE);
  	} else {
 -		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
 +		skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
  
  		dma_sync_single_range_for_cpu(rx_ring->dev,
  					      IXGBE_CB(skb)->dma,
 -					      frag->page_offset,
 +					      skb_frag_off(frag),
  					      skb_frag_size(frag),
  					      DMA_FROM_DEVICE);
  	}
@@@ -7897,11 -7897,8 +7897,8 @@@ static void ixgbe_service_task(struct w
  		return;
  	}
  	if (ixgbe_check_fw_error(adapter)) {
- 		if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
- 			rtnl_lock();
+ 		if (!test_bit(__IXGBE_DOWN, &adapter->state))
  			unregister_netdev(adapter->netdev);
- 			rtnl_unlock();
- 		}
  		ixgbe_service_event_complete(adapter);
  		return;
  	}
@@@ -8186,7 -8183,7 +8183,7 @@@ static int ixgbe_tx_map(struct ixgbe_ri
  	struct sk_buff *skb = first->skb;
  	struct ixgbe_tx_buffer *tx_buffer;
  	union ixgbe_adv_tx_desc *tx_desc;
 -	struct skb_frag_struct *frag;
 +	skb_frag_t *frag;
  	dma_addr_t dma;
  	unsigned int data_len, size;
  	u32 tx_flags = first->tx_flags;
@@@ -8605,8 -8602,7 +8602,8 @@@ netdev_tx_t ixgbe_xmit_frame_ring(struc
  	 * otherwise try next time
  	 */
  	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
 -		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
 +		count += TXD_USE_COUNT(skb_frag_size(
 +						&skb_shinfo(skb)->frags[f]));
  
  	if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
  		tx_ring->tx_stats.tx_busy++;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0807992090b8,65bec19a438f..8cf548c7ad9c
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@@ -184,8 -184,13 +184,13 @@@ static inline int mlx5e_get_max_num_cha
  
  struct mlx5e_tx_wqe {
  	struct mlx5_wqe_ctrl_seg ctrl;
- 	struct mlx5_wqe_eth_seg  eth;
- 	struct mlx5_wqe_data_seg data[0];
+ 	union {
+ 		struct {
+ 			struct mlx5_wqe_eth_seg  eth;
+ 			struct mlx5_wqe_data_seg data[0];
+ 		};
+ 		u8 tls_progress_params_ctx[0];
+ 	};
  };
  
  struct mlx5e_rx_wqe_ll {
@@@ -351,7 -356,6 +356,7 @@@ enum 
  	MLX5E_SQ_STATE_IPSEC,
  	MLX5E_SQ_STATE_AM,
  	MLX5E_SQ_STATE_TLS,
 +	MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
  };
  
  struct mlx5e_sq_wqe_info {
@@@ -476,6 -480,8 +481,6 @@@ struct mlx5e_xdp_mpwqe 
  	struct mlx5e_tx_wqe *wqe;
  	u8                   ds_count;
  	u8                   pkt_count;
 -	u8                   max_ds_count;
 -	u8                   complete;
  	u8                   inline_on;
  };
  
@@@ -1099,6 -1105,8 +1104,8 @@@ u32 mlx5e_ethtool_get_rxfh_key_size(str
  u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
  int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
  			      struct ethtool_ts_info *info);
+ int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+ 			       struct ethtool_flash *flash);
  void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
  				  struct ethtool_pauseparam *pauseparam);
  int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
@@@ -1127,6 -1135,7 +1134,6 @@@ void mlx5e_build_rq_params(struct mlx5_
  			   struct mlx5e_params *params);
  void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
  			    u16 num_channels);
 -u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
  void mlx5e_rx_dim_work(struct work_struct *work);
  void mlx5e_tx_dim_work(struct work_struct *work);
  
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 6e54fefea410,c7f86453c638..817c6ea7e349
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@@ -1,6 -1,7 +1,6 @@@
  /* SPDX-License-Identifier: GPL-2.0 */
  /* Copyright (c) 2019 Mellanox Technologies. */
  
 -#include <net/devlink.h>
  #include "reporter.h"
  #include "lib/eq.h"
  
@@@ -75,26 -76,21 +75,21 @@@ static int mlx5e_tx_reporter_err_cqe_re
  	u8 state;
  	int err;
  
- 	if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
- 		return 0;
- 
  	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
  	if (err) {
  		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
  			   sq->sqn, err);
- 		return err;
+ 		goto out;
  	}
  
- 	if (state != MLX5_SQC_STATE_ERR) {
- 		netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
- 		return -EINVAL;
- 	}
+ 	if (state != MLX5_SQC_STATE_ERR)
+ 		goto out;
  
  	mlx5e_tx_disable_queue(sq->txq);
  
  	err = mlx5e_wait_for_sq_flush(sq);
  	if (err)
- 		return err;
+ 		goto out;
  
  	/* At this point, no new packets will arrive from the stack as TXQ is
  	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
@@@ -103,20 -99,24 +98,24 @@@
  
  	err = mlx5e_sq_to_ready(sq, state);
  	if (err)
- 		return err;
+ 		goto out;
  
  	mlx5e_reset_txqsq_cc_pc(sq);
  	sq->stats->recover++;
+ 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
  	mlx5e_activate_txqsq(sq);
  
  	return 0;
+ out:
+ 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ 	return err;
  }
  
  static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter,
  				 char *err_str,
  				 struct mlx5e_tx_err_ctx *err_ctx)
  {
 -	if (IS_ERR_OR_NULL(tx_reporter)) {
 +	if (!tx_reporter) {
  		netdev_err(err_ctx->sq->channel->netdev, err_str);
  		return err_ctx->recover(err_ctx->sq);
  	}
@@@ -288,27 -288,23 +287,27 @@@ static const struct devlink_health_repo
  
  int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
  {
 +	struct devlink_health_reporter *reporter;
  	struct mlx5_core_dev *mdev = priv->mdev;
  	struct devlink *devlink = priv_to_devlink(mdev);
  
 -	priv->tx_reporter =
 +	reporter =
  		devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
  					       MLX5_REPORTER_TX_GRACEFUL_PERIOD,
  					       true, priv);
 -	if (IS_ERR(priv->tx_reporter))
 +	if (IS_ERR(reporter)) {
  		netdev_warn(priv->netdev,
  			    "Failed to create tx reporter, err = %ld\n",
 -			    PTR_ERR(priv->tx_reporter));
 -	return IS_ERR_OR_NULL(priv->tx_reporter);
 +			    PTR_ERR(reporter));
 +		return PTR_ERR(reporter);
 +	}
 +	priv->tx_reporter = reporter;
 +	return 0;
  }
  
  void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
  {
 -	if (IS_ERR_OR_NULL(priv->tx_reporter))
 +	if (!priv->tx_reporter)
  		return;
  
  	devlink_health_reporter_destroy(priv->tx_reporter);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index f701e4f3c076,7f78c004d12f..2c4d1f415968
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@@ -60,28 -60,24 +60,28 @@@ int mlx5e_open_xsk(struct mlx5e_priv *p
  		   struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
  		   struct mlx5e_channel *c)
  {
 -	struct mlx5e_channel_param cparam = {};
 +	struct mlx5e_channel_param *cparam;
  	struct dim_cq_moder icocq_moder = {};
  	int err;
  
  	if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
  		return -EINVAL;
  
 -	mlx5e_build_xsk_cparam(priv, params, xsk, &cparam);
 +	cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL);
 +	if (!cparam)
 +		return -ENOMEM;
  
 -	err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq);
 +	mlx5e_build_xsk_cparam(priv, params, xsk, cparam);
 +
 +	err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq);
  	if (unlikely(err))
 -		return err;
 +		goto err_free_cparam;
  
 -	err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq);
 +	err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq);
  	if (unlikely(err))
  		goto err_close_rx_cq;
  
 -	err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq);
 +	err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq);
  	if (unlikely(err))
  		goto err_close_rq;
  
@@@ -91,23 -87,21 +91,23 @@@
  	 * is disabled and then reenabled, but the SQ continues receiving CQEs
  	 * from the old UMEM.
  	 */
 -	err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true);
 +	err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true);
  	if (unlikely(err))
  		goto err_close_tx_cq;
  
 -	err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq);
 +	err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq);
  	if (unlikely(err))
  		goto err_close_sq;
  
  	/* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
  	 * triggered and NAPI to be called on the correct CPU.
  	 */
 -	err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq);
 +	err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq);
  	if (unlikely(err))
  		goto err_close_icocq;
  
 +	kvfree(cparam);
 +
  	spin_lock_init(&c->xskicosq_lock);
  
  	set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
@@@ -129,9 -123,6 +129,9 @@@ err_close_rq
  err_close_rx_cq:
  	mlx5e_close_cq(&c->xskrq.cq);
  
 +err_free_cparam:
 +	kvfree(cparam);
 +
  	return err;
  }
  
@@@ -152,7 -143,10 +152,10 @@@ void mlx5e_activate_xsk(struct mlx5e_ch
  {
  	set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
  	/* TX queue is created active. */
+ 
+ 	spin_lock(&c->xskicosq_lock);
  	mlx5e_trigger_irq(&c->xskicosq);
+ 	spin_unlock(&c->xskicosq_lock);
  }
  
  void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 02530b50609c,20e628c907e5..7347d673f448
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@@ -1081,6 -1081,14 +1081,14 @@@ int mlx5e_ethtool_set_link_ksettings(st
  	link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) :
  		mlx5e_port_speed2linkmodes(mdev, speed, !ext);
  
+ 	if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) &&
+ 	    autoneg != AUTONEG_ENABLE) {
+ 		netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n",
+ 			   __func__);
+ 		err = -EINVAL;
+ 		goto out;
+ 	}
+ 
  	link_modes = link_modes & eproto.cap;
  	if (!link_modes) {
  		netdev_err(priv->netdev, "%s: Not supported link mode(s) requested",
@@@ -1338,6 -1346,9 +1346,9 @@@ int mlx5e_ethtool_set_pauseparam(struc
  	struct mlx5_core_dev *mdev = priv->mdev;
  	int err;
  
+ 	if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+ 		return -EOPNOTSUPP;
+ 
  	if (pauseparam->autoneg)
  		return -EINVAL;
  
@@@ -1679,6 -1690,40 +1690,40 @@@ static int mlx5e_get_module_eeprom(stru
  	return 0;
  }
  
+ int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+ 			       struct ethtool_flash *flash)
+ {
+ 	struct mlx5_core_dev *mdev = priv->mdev;
+ 	struct net_device *dev = priv->netdev;
+ 	const struct firmware *fw;
+ 	int err;
+ 
+ 	if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
+ 		return -EOPNOTSUPP;
+ 
+ 	err = request_firmware_direct(&fw, flash->data, &dev->dev);
+ 	if (err)
+ 		return err;
+ 
+ 	dev_hold(dev);
+ 	rtnl_unlock();
+ 
+ 	err = mlx5_firmware_flash(mdev, fw, NULL);
+ 	release_firmware(fw);
+ 
+ 	rtnl_lock();
+ 	dev_put(dev);
+ 	return err;
+ }
+ 
+ static int mlx5e_flash_device(struct net_device *dev,
+ 			      struct ethtool_flash *flash)
+ {
+ 	struct mlx5e_priv *priv = netdev_priv(dev);
+ 
+ 	return mlx5e_ethtool_flash_device(priv, flash);
+ }
+ 
  static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
  				     bool is_rx_cq)
  {
@@@ -1913,27 -1958,21 +1958,27 @@@ static u32 mlx5e_get_priv_flags(struct 
  	return priv->channels.params.pflags;
  }
  
 -#ifndef CONFIG_MLX5_EN_RXNFC
 -/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS
 - * otherwise this function will be defined from en_fs_ethtool.c
 - */
  static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
  {
  	struct mlx5e_priv *priv = netdev_priv(dev);
  
 -	if (info->cmd != ETHTOOL_GRXRINGS)
 -		return -EOPNOTSUPP;
 -	/* ring_count is needed by ethtool -x */
 -	info->data = priv->channels.params.num_channels;
 -	return 0;
 +	/* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part
 +	 * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc,
 +	 * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc
 +	 * is compiled out via CONFIG_MLX5_EN_RXNFC=n.
 +	 */
 +	if (info->cmd == ETHTOOL_GRXRINGS) {
 +		info->data = priv->channels.params.num_channels;
 +		return 0;
 +	}
 +
 +	return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs);
 +}
 +
 +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 +{
 +	return mlx5e_ethtool_set_rxnfc(dev, cmd);
  }
 -#endif
  
  const struct ethtool_ops mlx5e_ethtool_ops = {
  	.get_drvinfo       = mlx5e_get_drvinfo,
@@@ -1954,7 -1993,9 +1999,7 @@@
  	.get_rxfh          = mlx5e_get_rxfh,
  	.set_rxfh          = mlx5e_set_rxfh,
  	.get_rxnfc         = mlx5e_get_rxnfc,
 -#ifdef CONFIG_MLX5_EN_RXNFC
  	.set_rxnfc         = mlx5e_set_rxnfc,
 -#endif
  	.get_tunable       = mlx5e_get_tunable,
  	.set_tunable       = mlx5e_set_tunable,
  	.get_pauseparam    = mlx5e_get_pauseparam,
@@@ -1965,6 -2006,7 +2010,7 @@@
  	.set_wol	   = mlx5e_set_wol,
  	.get_module_info   = mlx5e_get_module_info,
  	.get_module_eeprom = mlx5e_get_module_eeprom,
+ 	.flash_device      = mlx5e_flash_device,
  	.get_priv_flags    = mlx5e_get_priv_flags,
  	.set_priv_flags    = mlx5e_set_priv_flags,
  	.self_test         = mlx5e_self_test,
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9a2fcef6e7f0,9d5f6e56188f..0c8e847a9eee
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@@ -1130,8 -1130,6 +1130,8 @@@ static int mlx5e_alloc_txqsq(struct mlx
  	sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
  	sq->stop_room = MLX5E_SQ_STOP_ROOM;
  	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
 +	if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
 +		set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
  	if (MLX5_IPSEC_DEV(c->priv->mdev))
  		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
  	if (mlx5_accel_is_tls_device(c->priv->mdev)) {
@@@ -1323,7 -1321,6 +1323,6 @@@ err_free_txqsq
  void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
  {
  	sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
- 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
  	set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
  	netdev_tx_reset_queue(sq->txq);
  	netif_tx_start_queue(sq->txq);
@@@ -2324,7 -2321,7 +2323,7 @@@ int mlx5e_open_channels(struct mlx5e_pr
  			goto err_close_channels;
  	}
  
 -	if (!IS_ERR_OR_NULL(priv->tx_reporter))
 +	if (priv->tx_reporter)
  		devlink_health_reporter_state_update(priv->tx_reporter,
  						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
  
@@@ -3425,7 -3422,7 +3424,7 @@@ out
  #ifdef CONFIG_MLX5_ESWITCH
  static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
  				     struct flow_cls_offload *cls_flower,
 -				     int flags)
 +				     unsigned long flags)
  {
  	switch (cls_flower->command) {
  	case FLOW_CLS_REPLACE:
@@@ -3445,12 -3442,12 +3444,12 @@@
  static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
  				   void *cb_priv)
  {
 +	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
  	struct mlx5e_priv *priv = cb_priv;
  
  	switch (type) {
  	case TC_SETUP_CLSFLOWER:
 -		return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS |
 -						 MLX5E_TC_NIC_OFFLOAD);
 +		return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
  	default:
  		return -EOPNOTSUPP;
  	}
@@@ -3643,7 -3640,7 +3642,7 @@@ static int set_feature_tc_num_filters(s
  {
  	struct mlx5e_priv *priv = netdev_priv(netdev);
  
 -	if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) {
 +	if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
  		netdev_err(netdev,
  			   "Active offloaded tc filters, can't turn hw_tc_offload off\n");
  		return -EINVAL;
@@@ -3784,10 -3781,9 +3783,10 @@@ static netdev_features_t mlx5e_fix_feat
  			netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
  	}
  	if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
 -		features &= ~NETIF_F_LRO;
 -		if (params->lro_en)
 +		if (features & NETIF_F_LRO) {
  			netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
 +			features &= ~NETIF_F_LRO;
 +		}
  	}
  
  	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
@@@ -3954,8 -3950,7 +3953,8 @@@ int mlx5e_hwstamp_set(struct mlx5e_pri
  	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
  	case HWTSTAMP_FILTER_NTP_ALL:
  		/* Disable CQE compression */
 -		netdev_warn(priv->netdev, "Disabling cqe compression");
 +		if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
 +			netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
  		err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
  		if (err) {
  			netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
@@@ -4773,7 -4768,7 +4772,7 @@@ void mlx5e_build_nic_params(struct mlx5
  	mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
  
  	/* TX inline */
 -	params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
 +	mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
  
  	/* RSS */
  	mlx5e_build_rss_params(rss_params, params->num_channels);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 5be3da621499,00b2d4a86159..c57f7533a6d0
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@@ -38,8 -38,6 +38,8 @@@
  #include <linux/mlx5/fs.h>
  #include <linux/mlx5/device.h>
  #include <linux/rhashtable.h>
 +#include <linux/refcount.h>
 +#include <linux/completion.h>
  #include <net/tc_act/tc_mirred.h>
  #include <net/tc_act/tc_vlan.h>
  #include <net/tc_act/tc_tunnel_key.h>
@@@ -67,20 -65,19 +67,20 @@@ struct mlx5_nic_flow_attr 
  	struct mlx5_fc		*counter;
  };
  
 -#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
 +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
  
  enum {
 -	MLX5E_TC_FLOW_INGRESS	= MLX5E_TC_INGRESS,
 -	MLX5E_TC_FLOW_EGRESS	= MLX5E_TC_EGRESS,
 -	MLX5E_TC_FLOW_ESWITCH	= MLX5E_TC_ESW_OFFLOAD,
 -	MLX5E_TC_FLOW_NIC	= MLX5E_TC_NIC_OFFLOAD,
 -	MLX5E_TC_FLOW_OFFLOADED	= BIT(MLX5E_TC_FLOW_BASE),
 -	MLX5E_TC_FLOW_HAIRPIN	= BIT(MLX5E_TC_FLOW_BASE + 1),
 -	MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2),
 -	MLX5E_TC_FLOW_SLOW	  = BIT(MLX5E_TC_FLOW_BASE + 3),
 -	MLX5E_TC_FLOW_DUP         = BIT(MLX5E_TC_FLOW_BASE + 4),
 -	MLX5E_TC_FLOW_NOT_READY   = BIT(MLX5E_TC_FLOW_BASE + 5),
 +	MLX5E_TC_FLOW_FLAG_INGRESS	= MLX5E_TC_FLAG_INGRESS_BIT,
 +	MLX5E_TC_FLOW_FLAG_EGRESS	= MLX5E_TC_FLAG_EGRESS_BIT,
 +	MLX5E_TC_FLOW_FLAG_ESWITCH	= MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
 +	MLX5E_TC_FLOW_FLAG_NIC		= MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
 +	MLX5E_TC_FLOW_FLAG_OFFLOADED	= MLX5E_TC_FLOW_BASE,
 +	MLX5E_TC_FLOW_FLAG_HAIRPIN	= MLX5E_TC_FLOW_BASE + 1,
 +	MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS	= MLX5E_TC_FLOW_BASE + 2,
 +	MLX5E_TC_FLOW_FLAG_SLOW		= MLX5E_TC_FLOW_BASE + 3,
 +	MLX5E_TC_FLOW_FLAG_DUP		= MLX5E_TC_FLOW_BASE + 4,
 +	MLX5E_TC_FLOW_FLAG_NOT_READY	= MLX5E_TC_FLOW_BASE + 5,
 +	MLX5E_TC_FLOW_FLAG_DELETED	= MLX5E_TC_FLOW_BASE + 6,
  };
  
  #define MLX5E_TC_MAX_SPLITS 1
@@@ -103,7 -100,6 +103,7 @@@
   *        container_of(helper item, containing struct type, helper field[index])
   */
  struct encap_flow_item {
 +	struct mlx5e_encap_entry *e; /* attached encap instance */
  	struct list_head list;
  	int index;
  };
@@@ -112,7 -108,7 +112,7 @@@ struct mlx5e_tc_flow 
  	struct rhash_head	node;
  	struct mlx5e_priv	*priv;
  	u64			cookie;
 -	u16			flags;
 +	unsigned long		flags;
  	struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
  	/* Flow can be associated with multiple encap IDs.
  	 * The number of encaps is bounded by the number of supported
@@@ -120,14 -116,10 +120,14 @@@
  	 */
  	struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
  	struct mlx5e_tc_flow    *peer_flow;
 +	struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */
  	struct list_head	mod_hdr; /* flows sharing the same mod hdr ID */
 +	struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
  	struct list_head	hairpin; /* flows sharing the same hairpin */
  	struct list_head	peer;    /* flows with peer flow */
  	struct list_head	unready; /* flows not ready to be offloaded (e.g due to missing route) */
 +	refcount_t		refcnt;
 +	struct rcu_head		rcu_head;
  	union {
  		struct mlx5_esw_flow_attr esw_attr[0];
  		struct mlx5_nic_flow_attr nic_attr[0];
@@@ -165,20 -157,12 +165,20 @@@ struct mlx5e_hairpin_entry 
  	/* a node of a hash table which keeps all the  hairpin entries */
  	struct hlist_node hairpin_hlist;
  
 +	/* protects flows list */
 +	spinlock_t flows_lock;
  	/* flows sharing the same hairpin */
  	struct list_head flows;
 +	/* hpe's that were not fully initialized when dead peer update event
 +	 * function traversed them.
 +	 */
 +	struct list_head dead_peer_wait_list;
  
  	u16 peer_vhca_id;
  	u8 prio;
  	struct mlx5e_hairpin *hp;
 +	refcount_t refcnt;
 +	struct completion res_ready;
  };
  
  struct mod_hdr_key {
@@@ -190,93 -174,16 +190,93 @@@ struct mlx5e_mod_hdr_entry 
  	/* a node of a hash table which keeps all the mod_hdr entries */
  	struct hlist_node mod_hdr_hlist;
  
 +	/* protects flows list */
 +	spinlock_t flows_lock;
  	/* flows sharing the same mod_hdr entry */
  	struct list_head flows;
  
  	struct mod_hdr_key key;
  
  	u32 mod_hdr_id;
 +
 +	refcount_t refcnt;
 +	struct completion res_ready;
 +	int compl_result;
  };
  
  #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
  
 +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 +			      struct mlx5e_tc_flow *flow);
 +
 +static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
 +{
 +	if (!flow || !refcount_inc_not_zero(&flow->refcnt))
 +		return ERR_PTR(-EINVAL);
 +	return flow;
 +}
 +
 +static void mlx5e_flow_put(struct mlx5e_priv *priv,
 +			   struct mlx5e_tc_flow *flow)
 +{
 +	if (refcount_dec_and_test(&flow->refcnt)) {
 +		mlx5e_tc_del_flow(priv, flow);
 +		kfree_rcu(flow, rcu_head);
 +	}
 +}
 +
 +static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
 +{
 +	/* Complete all memory stores before setting bit. */
 +	smp_mb__before_atomic();
 +	set_bit(flag, &flow->flags);
 +}
 +
 +#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
 +
 +static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
 +				     unsigned long flag)
 +{
 +	/* test_and_set_bit() provides all necessary barriers */
 +	return test_and_set_bit(flag, &flow->flags);
 +}
 +
 +#define flow_flag_test_and_set(flow, flag)			\
 +	__flow_flag_test_and_set(flow,				\
 +				 MLX5E_TC_FLOW_FLAG_##flag)
 +
 +static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
 +{
 +	/* Complete all memory stores before clearing bit. */
 +	smp_mb__before_atomic();
 +	clear_bit(flag, &flow->flags);
 +}
 +
 +#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
 +						      MLX5E_TC_FLOW_FLAG_##flag)
 +
 +static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
 +{
 +	bool ret = test_bit(flag, &flow->flags);
 +
 +	/* Read fields of flow structure only after checking flags. */
 +	smp_mb__after_atomic();
 +	return ret;
 +}
 +
 +#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
 +						    MLX5E_TC_FLOW_FLAG_##flag)
 +
 +static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
 +{
 +	return flow_flag_test(flow, ESWITCH);
 +}
 +
 +static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
 +{
 +	return flow_flag_test(flow, OFFLOADED);
 +}
 +
  static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
  {
  	return jhash(key->actions,
@@@ -292,62 -199,15 +292,62 @@@ static inline int cmp_mod_hdr_info(stru
  	return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
  }
  
 +static struct mod_hdr_tbl *
 +get_mod_hdr_table(struct mlx5e_priv *priv, int namespace)
 +{
 +	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 +
 +	return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr :
 +		&priv->fs.tc.mod_hdr;
 +}
 +
 +static struct mlx5e_mod_hdr_entry *
 +mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key)
 +{
 +	struct mlx5e_mod_hdr_entry *mh, *found = NULL;
 +
 +	hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
 +		if (!cmp_mod_hdr_info(&mh->key, key)) {
 +			refcount_inc(&mh->refcnt);
 +			found = mh;
 +			break;
 +		}
 +	}
 +
 +	return found;
 +}
 +
 +static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
 +			      struct mlx5e_mod_hdr_entry *mh,
 +			      int namespace)
 +{
 +	struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace);
 +
 +	if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
 +		return;
 +	hash_del(&mh->mod_hdr_hlist);
 +	mutex_unlock(&tbl->lock);
 +
 +	WARN_ON(!list_empty(&mh->flows));
 +	if (mh->compl_result > 0)
 +		mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
 +
 +	kfree(mh);
 +}
 +
 +static int get_flow_name_space(struct mlx5e_tc_flow *flow)
 +{
 +	return mlx5e_is_eswitch_flow(flow) ?
 +		MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
 +}
  static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
  				struct mlx5e_tc_flow *flow,
  				struct mlx5e_tc_flow_parse_attr *parse_attr)
  {
 -	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
  	int num_actions, actions_size, namespace, err;
  	struct mlx5e_mod_hdr_entry *mh;
 +	struct mod_hdr_tbl *tbl;
  	struct mod_hdr_key key;
 -	bool found = false;
  	u32 hash_key;
  
  	num_actions  = parse_attr->num_mod_hdr_actions;
@@@ -358,82 -218,80 +358,82 @@@
  
  	hash_key = hash_mod_hdr_info(&key);
  
 -	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 -		namespace = MLX5_FLOW_NAMESPACE_FDB;
 -		hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
 -				       mod_hdr_hlist, hash_key) {
 -			if (!cmp_mod_hdr_info(&mh->key, &key)) {
 -				found = true;
 -				break;
 -			}
 -		}
 -	} else {
 -		namespace = MLX5_FLOW_NAMESPACE_KERNEL;
 -		hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
 -				       mod_hdr_hlist, hash_key) {
 -			if (!cmp_mod_hdr_info(&mh->key, &key)) {
 -				found = true;
 -				break;
 -			}
 -		}
 -	}
 +	namespace = get_flow_name_space(flow);
 +	tbl = get_mod_hdr_table(priv, namespace);
 +
 +	mutex_lock(&tbl->lock);
 +	mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
 +	if (mh) {
 +		mutex_unlock(&tbl->lock);
 +		wait_for_completion(&mh->res_ready);
  
 -	if (found)
 +		if (mh->compl_result < 0) {
 +			err = -EREMOTEIO;
 +			goto attach_header_err;
 +		}
  		goto attach_flow;
 +	}
  
  	mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
 -	if (!mh)
 +	if (!mh) {
 +		mutex_unlock(&tbl->lock);
  		return -ENOMEM;
 +	}
  
  	mh->key.actions = (void *)mh + sizeof(*mh);
  	memcpy(mh->key.actions, key.actions, actions_size);
  	mh->key.num_actions = num_actions;
 +	spin_lock_init(&mh->flows_lock);
  	INIT_LIST_HEAD(&mh->flows);
 +	refcount_set(&mh->refcnt, 1);
 +	init_completion(&mh->res_ready);
 +
 +	hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
 +	mutex_unlock(&tbl->lock);
  
  	err = mlx5_modify_header_alloc(priv->mdev, namespace,
  				       mh->key.num_actions,
  				       mh->key.actions,
  				       &mh->mod_hdr_id);
 -	if (err)
 -		goto out_err;
 -
 -	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 -		hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
 -	else
 -		hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
 +	if (err) {
 +		mh->compl_result = err;
 +		goto alloc_header_err;
 +	}
 +	mh->compl_result = 1;
 +	complete_all(&mh->res_ready);
  
  attach_flow:
 +	flow->mh = mh;
 +	spin_lock(&mh->flows_lock);
  	list_add(&flow->mod_hdr, &mh->flows);
 -	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 +	spin_unlock(&mh->flows_lock);
 +	if (mlx5e_is_eswitch_flow(flow))
  		flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
  	else
  		flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
  
  	return 0;
  
 -out_err:
 -	kfree(mh);
 +alloc_header_err:
 +	complete_all(&mh->res_ready);
 +attach_header_err:
 +	mlx5e_mod_hdr_put(priv, mh, namespace);
  	return err;
  }
  
  static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
  				 struct mlx5e_tc_flow *flow)
  {
 -	struct list_head *next = flow->mod_hdr.next;
 +	/* flow wasn't fully initialized */
 +	if (!flow->mh)
 +		return;
  
 +	spin_lock(&flow->mh->flows_lock);
  	list_del(&flow->mod_hdr);
 +	spin_unlock(&flow->mh->flows_lock);
  
 -	if (list_empty(next)) {
 -		struct mlx5e_mod_hdr_entry *mh;
 -
 -		mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
 -
 -		mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
 -		hash_del(&mh->mod_hdr_hlist);
 -		kfree(mh);
 -	}
 +	mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow));
 +	flow->mh = NULL;
  }
  
  static
@@@ -697,35 -555,13 +697,35 @@@ static struct mlx5e_hairpin_entry *mlx5
  
  	hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
  			       hairpin_hlist, hash_key) {
 -		if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio)
 +		if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
 +			refcount_inc(&hpe->refcnt);
  			return hpe;
 +		}
  	}
  
  	return NULL;
  }
  
 +static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
 +			      struct mlx5e_hairpin_entry *hpe)
 +{
 +	/* no more hairpin flows for us, release the hairpin pair */
 +	if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
 +		return;
 +	hash_del(&hpe->hairpin_hlist);
 +	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 +
 +	if (!IS_ERR_OR_NULL(hpe->hp)) {
 +		netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
 +			   dev_name(hpe->hp->pair->peer_mdev->device));
 +
 +		mlx5e_hairpin_destroy(hpe->hp);
 +	}
 +
 +	WARN_ON(!list_empty(&hpe->flows));
 +	kfree(hpe);
 +}
 +
  #define UNKNOWN_MATCH_PRIO 8
  
  static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
@@@ -791,37 -627,17 +791,37 @@@ static int mlx5e_hairpin_flow_add(struc
  				     extack);
  	if (err)
  		return err;
 +
 +	mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
  	hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
 -	if (hpe)
 +	if (hpe) {
 +		mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 +		wait_for_completion(&hpe->res_ready);
 +
 +		if (IS_ERR(hpe->hp)) {
 +			err = -EREMOTEIO;
 +			goto out_err;
 +		}
  		goto attach_flow;
 +	}
  
  	hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
 -	if (!hpe)
 +	if (!hpe) {
 +		mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
  		return -ENOMEM;
 +	}
  
 +	spin_lock_init(&hpe->flows_lock);
  	INIT_LIST_HEAD(&hpe->flows);
 +	INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
  	hpe->peer_vhca_id = peer_id;
  	hpe->prio = match_prio;
 +	refcount_set(&hpe->refcnt, 1);
 +	init_completion(&hpe->res_ready);
 +
 +	hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
 +		 hash_hairpin_info(peer_id, match_prio));
 +	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
  
  	params.log_data_size = 15;
  	params.log_data_size = min_t(u8, params.log_data_size,
@@@ -843,11 -659,9 +843,11 @@@
  	params.num_channels = link_speed64;
  
  	hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
 +	hpe->hp = hp;
 +	complete_all(&hpe->res_ready);
  	if (IS_ERR(hp)) {
  		err = PTR_ERR(hp);
 -		goto create_hairpin_err;
 +		goto out_err;
  	}
  
  	netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
@@@ -855,39 -669,46 +855,39 @@@
  		   dev_name(hp->pair->peer_mdev->device),
  		   hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
  
 -	hpe->hp = hp;
 -	hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
 -		 hash_hairpin_info(peer_id, match_prio));
 -
  attach_flow:
  	if (hpe->hp->num_channels > 1) {
 -		flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS;
 +		flow_flag_set(flow, HAIRPIN_RSS);
  		flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
  	} else {
  		flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
  	}
 +
 +	flow->hpe = hpe;
 +	spin_lock(&hpe->flows_lock);
  	list_add(&flow->hairpin, &hpe->flows);
 +	spin_unlock(&hpe->flows_lock);
  
  	return 0;
  
 -create_hairpin_err:
 -	kfree(hpe);
 +out_err:
 +	mlx5e_hairpin_put(priv, hpe);
  	return err;
  }
  
  static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
  				   struct mlx5e_tc_flow *flow)
  {
 -	struct list_head *next = flow->hairpin.next;
 +	/* flow wasn't fully initialized */
 +	if (!flow->hpe)
 +		return;
  
 +	spin_lock(&flow->hpe->flows_lock);
  	list_del(&flow->hairpin);
 +	spin_unlock(&flow->hpe->flows_lock);
  
 -	/* no more hairpin flows for us, release the hairpin pair */
 -	if (list_empty(next)) {
 -		struct mlx5e_hairpin_entry *hpe;
 -
 -		hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
 -
 -		netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
 -			   dev_name(hpe->hp->pair->peer_mdev->device));
 -
 -		mlx5e_hairpin_destroy(hpe->hp);
 -		hash_del(&hpe->hairpin_hlist);
 -		kfree(hpe);
 -	}
 +	mlx5e_hairpin_put(priv, flow->hpe);
 +	flow->hpe = NULL;
  }
  
  static int
@@@ -906,17 -727,18 +906,17 @@@ mlx5e_tc_add_nic_flow(struct mlx5e_pri
  		.flags    = FLOW_ACT_NO_APPEND,
  	};
  	struct mlx5_fc *counter = NULL;
 -	bool table_created = false;
  	int err, dest_ix = 0;
  
  	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
  	flow_context->flow_tag = attr->flow_tag;
  
 -	if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
 +	if (flow_flag_test(flow, HAIRPIN)) {
  		err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
 -		if (err) {
 -			goto err_add_hairpin_flow;
 -		}
 -		if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
 +		if (err)
 +			return err;
 +
 +		if (flow_flag_test(flow, HAIRPIN_RSS)) {
  			dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
  			dest[dest_ix].ft = attr->hairpin_ft;
  		} else {
@@@ -932,9 -754,10 +932,9 @@@
  
  	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
  		counter = mlx5_fc_create(dev, true);
 -		if (IS_ERR(counter)) {
 -			err = PTR_ERR(counter);
 -			goto err_fc_create;
 -		}
 +		if (IS_ERR(counter))
 +			return PTR_ERR(counter);
 +
  		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
  		dest[dest_ix].counter_id = mlx5_fc_id(counter);
  		dest_ix++;
@@@ -946,10 -769,9 +946,10 @@@
  		flow_act.modify_id = attr->mod_hdr_id;
  		kfree(parse_attr->mod_hdr_actions);
  		if (err)
 -			goto err_create_mod_hdr_id;
 +			return err;
  	}
  
 +	mutex_lock(&priv->fs.tc.t_lock);
  	if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
  		int tc_grp_size, tc_tbl_size;
  		u32 max_flow_counter;
@@@ -969,13 -791,15 +969,13 @@@
  							    MLX5E_TC_TABLE_NUM_GROUPS,
  							    MLX5E_TC_FT_LEVEL, 0);
  		if (IS_ERR(priv->fs.tc.t)) {
 +			mutex_unlock(&priv->fs.tc.t_lock);
  			NL_SET_ERR_MSG_MOD(extack,
  					   "Failed to create tc offload table\n");
  			netdev_err(priv->netdev,
  				   "Failed to create tc offload table\n");
 -			err = PTR_ERR(priv->fs.tc.t);
 -			goto err_create_ft;
 +			return PTR_ERR(priv->fs.tc.t);
  		}
 -
 -		table_created = true;
  	}
  
  	if (attr->match_level != MLX5_MATCH_NONE)
@@@ -983,12 -807,29 +983,12 @@@
  
  	flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
  					    &flow_act, dest, dest_ix);
 +	mutex_unlock(&priv->fs.tc.t_lock);
  
 -	if (IS_ERR(flow->rule[0])) {
 -		err = PTR_ERR(flow->rule[0]);
 -		goto err_add_rule;
 -	}
 +	if (IS_ERR(flow->rule[0]))
 +		return PTR_ERR(flow->rule[0]);
  
  	return 0;
 -
 -err_add_rule:
 -	if (table_created) {
 -		mlx5_destroy_flow_table(priv->fs.tc.t);
 -		priv->fs.tc.t = NULL;
 -	}
 -err_create_ft:
 -	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 -		mlx5e_detach_mod_hdr(priv, flow);
 -err_create_mod_hdr_id:
 -	mlx5_fc_destroy(dev, counter);
 -err_fc_create:
 -	if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
 -		mlx5e_hairpin_flow_del(priv, flow);
 -err_add_hairpin_flow:
 -	return err;
  }
  
  static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
@@@ -998,21 -839,18 +998,21 @@@
  	struct mlx5_fc *counter = NULL;
  
  	counter = attr->counter;
 -	mlx5_del_flow_rules(flow->rule[0]);
 +	if (!IS_ERR_OR_NULL(flow->rule[0]))
 +		mlx5_del_flow_rules(flow->rule[0]);
  	mlx5_fc_destroy(priv->mdev, counter);
  
 -	if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)  && priv->fs.tc.t) {
 +	mutex_lock(&priv->fs.tc.t_lock);
 +	if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
  		mlx5_destroy_flow_table(priv->fs.tc.t);
  		priv->fs.tc.t = NULL;
  	}
 +	mutex_unlock(&priv->fs.tc.t_lock);
  
  	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
  		mlx5e_detach_mod_hdr(priv, flow);
  
 -	if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
 +	if (flow_flag_test(flow, HAIRPIN))
  		mlx5e_hairpin_flow_del(priv, flow);
  }
  
@@@ -1047,6 -885,7 +1047,6 @@@ mlx5e_tc_offload_fdb_rules(struct mlx5_
  		}
  	}
  
 -	flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
  	return rule;
  }
  
@@@ -1055,7 -894,7 +1055,7 @@@ mlx5e_tc_unoffload_fdb_rules(struct mlx
  			     struct mlx5e_tc_flow *flow,
  			   struct mlx5_esw_flow_attr *attr)
  {
 -	flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
 +	flow_flag_clear(flow, OFFLOADED);
  
  	if (attr->split_count)
  		mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
@@@ -1078,7 -917,7 +1078,7 @@@ mlx5e_tc_offload_to_slow_path(struct ml
  
  	rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
  	if (!IS_ERR(rule))
 -		flow->flags |= MLX5E_TC_FLOW_SLOW;
 +		flow_flag_set(flow, SLOW);
  
  	return rule;
  }
@@@ -1093,26 -932,7 +1093,26 @@@ mlx5e_tc_unoffload_from_slow_path(struc
  	slow_attr->split_count = 0;
  	slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
  	mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
 -	flow->flags &= ~MLX5E_TC_FLOW_SLOW;
 +	flow_flag_clear(flow, SLOW);
 +}
 +
 +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
 + * function.
 + */
 +static void unready_flow_add(struct mlx5e_tc_flow *flow,
 +			     struct list_head *unready_flows)
 +{
 +	flow_flag_set(flow, NOT_READY);
 +	list_add_tail(&flow->unready, unready_flows);
 +}
 +
 +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
 + * function.
 + */
 +static void unready_flow_del(struct mlx5e_tc_flow *flow)
 +{
 +	list_del(&flow->unready);
 +	flow_flag_clear(flow, NOT_READY);
  }
  
  static void add_unready_flow(struct mlx5e_tc_flow *flow)
@@@ -1125,24 -945,14 +1125,24 @@@
  	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
  	uplink_priv = &rpriv->uplink_priv;
  
 -	flow->flags |= MLX5E_TC_FLOW_NOT_READY;
 -	list_add_tail(&flow->unready, &uplink_priv->unready_flows);
 +	mutex_lock(&uplink_priv->unready_flows_lock);
 +	unready_flow_add(flow, &uplink_priv->unready_flows);
 +	mutex_unlock(&uplink_priv->unready_flows_lock);
  }
  
  static void remove_unready_flow(struct mlx5e_tc_flow *flow)
  {
 -	list_del(&flow->unready);
 -	flow->flags &= ~MLX5E_TC_FLOW_NOT_READY;
 +	struct mlx5_rep_uplink_priv *uplink_priv;
 +	struct mlx5e_rep_priv *rpriv;
 +	struct mlx5_eswitch *esw;
 +
 +	esw = flow->priv->mdev->priv.eswitch;
 +	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 +	uplink_priv = &rpriv->uplink_priv;
 +
 +	mutex_lock(&uplink_priv->unready_flows_lock);
 +	unready_flow_del(flow);
 +	mutex_unlock(&uplink_priv->unready_flows_lock);
  }
  
  static int
@@@ -1170,12 -980,14 +1170,12 @@@ mlx5e_tc_add_fdb_flow(struct mlx5e_pri
  
  	if (attr->chain > max_chain) {
  		NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
 -		err = -EOPNOTSUPP;
 -		goto err_max_prio_chain;
 +		return -EOPNOTSUPP;
  	}
  
  	if (attr->prio > max_prio) {
  		NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
 -		err = -EOPNOTSUPP;
 -		goto err_max_prio_chain;
 +		return -EOPNOTSUPP;
  	}
  
  	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
@@@ -1190,7 -1002,7 +1190,7 @@@
  		err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
  					 extack, &encap_dev, &encap_valid);
  		if (err)
 -			goto err_attach_encap;
 +			return err;
  
  		out_priv = netdev_priv(encap_dev);
  		rpriv = out_priv->ppriv;
@@@ -1200,19 -1012,21 +1200,19 @@@
  
  	err = mlx5_eswitch_add_vlan_action(esw, attr);
  	if (err)
 -		goto err_add_vlan;
 +		return err;
  
  	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
  		err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
  		kfree(parse_attr->mod_hdr_actions);
  		if (err)
 -			goto err_mod_hdr;
 +			return err;
  	}
  
  	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
  		counter = mlx5_fc_create(attr->counter_dev, true);
 -		if (IS_ERR(counter)) {
 -			err = PTR_ERR(counter);
 -			goto err_create_counter;
 -		}
 +		if (IS_ERR(counter))
 +			return PTR_ERR(counter);
  
  		attr->counter = counter;
  	}
@@@ -1230,12 -1044,27 +1230,12 @@@
  		flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
  	}
  
 -	if (IS_ERR(flow->rule[0])) {
 -		err = PTR_ERR(flow->rule[0]);
 -		goto err_add_rule;
 -	}
 +	if (IS_ERR(flow->rule[0]))
 +		return PTR_ERR(flow->rule[0]);
 +	else
 +		flow_flag_set(flow, OFFLOADED);
  
  	return 0;
 -
 -err_add_rule:
 -	mlx5_fc_destroy(attr->counter_dev, counter);
 -err_create_counter:
 -	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 -		mlx5e_detach_mod_hdr(priv, flow);
 -err_mod_hdr:
 -	mlx5_eswitch_del_vlan_action(esw, attr);
 -err_add_vlan:
 -	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
 -		if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
 -			mlx5e_detach_encap(priv, flow, out_index);
 -err_attach_encap:
 -err_max_prio_chain:
 -	return err;
  }
  
  static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
@@@ -1259,14 -1088,14 +1259,14 @@@ static void mlx5e_tc_del_fdb_flow(struc
  	struct mlx5_esw_flow_attr slow_attr;
  	int out_index;
  
 -	if (flow->flags & MLX5E_TC_FLOW_NOT_READY) {
 +	if (flow_flag_test(flow, NOT_READY)) {
  		remove_unready_flow(flow);
  		kvfree(attr->parse_attr);
  		return;
  	}
  
 -	if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 -		if (flow->flags & MLX5E_TC_FLOW_SLOW)
 +	if (mlx5e_is_offloaded_flow(flow)) {
 +		if (flow_flag_test(flow, SLOW))
  			mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
  		else
  			mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
@@@ -1294,9 -1123,9 +1294,9 @@@ void mlx5e_tc_encap_flows_add(struct ml
  {
  	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
  	struct mlx5_esw_flow_attr slow_attr, *esw_attr;
 +	struct encap_flow_item *efi, *tmp;
  	struct mlx5_flow_handle *rule;
  	struct mlx5_flow_spec *spec;
 -	struct encap_flow_item *efi;
  	struct mlx5e_tc_flow *flow;
  	int err;
  
@@@ -1313,14 -1142,11 +1313,14 @@@
  	e->flags |= MLX5_ENCAP_ENTRY_VALID;
  	mlx5e_rep_queue_neigh_stats_work(priv);
  
 -	list_for_each_entry(efi, &e->flows, list) {
 +	list_for_each_entry_safe(efi, tmp, &e->flows, list) {
  		bool all_flow_encaps_valid = true;
  		int i;
  
  		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
 +		if (IS_ERR(mlx5e_flow_get(flow)))
 +			continue;
 +
  		esw_attr = flow->esw_attr;
  		spec = &esw_attr->parse_attr->spec;
  
@@@ -1340,23 -1166,19 +1340,23 @@@
  		}
  		/* Do not offload flows with unresolved neighbors */
  		if (!all_flow_encaps_valid)
 -			continue;
 +			goto loop_cont;
  		/* update from slow path rule to encap rule */
  		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
  		if (IS_ERR(rule)) {
  			err = PTR_ERR(rule);
  			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
  				       err);
 -			continue;
 +			goto loop_cont;
  		}
  
  		mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
 -		flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */
  		flow->rule[0] = rule;
 +		/* was unset when slow path rule removed */
 +		flow_flag_set(flow, OFFLOADED);
 +
 +loop_cont:
 +		mlx5e_flow_put(priv, flow);
  	}
  }
  
@@@ -1365,17 -1187,14 +1365,17 @@@ void mlx5e_tc_encap_flows_del(struct ml
  {
  	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
  	struct mlx5_esw_flow_attr slow_attr;
 +	struct encap_flow_item *efi, *tmp;
  	struct mlx5_flow_handle *rule;
  	struct mlx5_flow_spec *spec;
 -	struct encap_flow_item *efi;
  	struct mlx5e_tc_flow *flow;
  	int err;
  
 -	list_for_each_entry(efi, &e->flows, list) {
 +	list_for_each_entry_safe(efi, tmp, &e->flows, list) {
  		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
 +		if (IS_ERR(mlx5e_flow_get(flow)))
 +			continue;
 +
  		spec = &flow->esw_attr->parse_attr->spec;
  
  		/* update from encap rule to slow path rule */
@@@ -1387,16 -1206,12 +1387,16 @@@
  			err = PTR_ERR(rule);
  			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
  				       err);
 -			continue;
 +			goto loop_cont;
  		}
  
  		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
 -		flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */
  		flow->rule[0] = rule;
 +		/* was unset when fast path rule removed */
 +		flow_flag_set(flow, OFFLOADED);
 +
 +loop_cont:
 +		mlx5e_flow_put(priv, flow);
  	}
  
  	/* we know that the encap is valid */
@@@ -1406,7 -1221,7 +1406,7 @@@
  
  static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
  {
 -	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 +	if (mlx5e_is_eswitch_flow(flow))
  		return flow->esw_attr->counter;
  	else
  		return flow->nic_attr->counter;
@@@ -1433,32 -1248,21 +1433,32 @@@ void mlx5e_tc_update_neigh_used_value(s
  		return;
  
  	list_for_each_entry(e, &nhe->encap_list, encap_list) {
 -		struct encap_flow_item *efi;
 -		if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
 +		struct encap_flow_item *efi, *tmp;
 +
 +		if (!(e->flags & MLX5_ENCAP_ENTRY_VALID) ||
 +		    !mlx5e_encap_take(e))
  			continue;
 -		list_for_each_entry(efi, &e->flows, list) {
 +
 +		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
  			flow = container_of(efi, struct mlx5e_tc_flow,
  					    encaps[efi->index]);
 -			if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 +			if (IS_ERR(mlx5e_flow_get(flow)))
 +				continue;
 +
 +			if (mlx5e_is_offloaded_flow(flow)) {
  				counter = mlx5e_tc_get_counter(flow);
  				lastuse = mlx5_fc_query_lastuse(counter);
  				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
 +					mlx5e_flow_put(netdev_priv(e->out_dev), flow);
  					neigh_used = true;
  					break;
  				}
  			}
 +
 +			mlx5e_flow_put(netdev_priv(e->out_dev), flow);
  		}
 +
 +		mlx5e_encap_put(netdev_priv(e->out_dev), e);
  		if (neigh_used)
  			break;
  	}
@@@ -1478,66 -1282,40 +1478,66 @@@
  	}
  }
  
 -static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 -			       struct mlx5e_tc_flow *flow, int out_index)
 +static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
  {
 -	struct list_head *next = flow->encaps[out_index].list.next;
 +	WARN_ON(!list_empty(&e->flows));
 +	mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
  
 -	list_del(&flow->encaps[out_index].list);
 -	if (list_empty(next)) {
 -		struct mlx5e_encap_entry *e;
 +	if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 +		mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
  
 -		e = list_entry(next, struct mlx5e_encap_entry, flows);
 -		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 +	kfree(e->encap_header);
 +	kfree(e);
 +}
  
 -		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 -			mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
 +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
 +{
 +	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
  
 -		hash_del_rcu(&e->encap_hlist);
 -		kfree(e->encap_header);
 -		kfree(e);
 +	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
 +		return;
 +	hash_del_rcu(&e->encap_hlist);
 +	mutex_unlock(&esw->offloads.encap_tbl_lock);
 +
 +	mlx5e_encap_dealloc(priv, e);
 +}
 +
 +static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 +			       struct mlx5e_tc_flow *flow, int out_index)
 +{
 +	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
 +	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 +
 +	/* flow wasn't fully initialized */
 +	if (!e)
 +		return;
 +
 +	mutex_lock(&esw->offloads.encap_tbl_lock);
 +	list_del(&flow->encaps[out_index].list);
 +	flow->encaps[out_index].e = NULL;
 +	if (!refcount_dec_and_test(&e->refcnt)) {
 +		mutex_unlock(&esw->offloads.encap_tbl_lock);
 +		return;
  	}
 +	hash_del_rcu(&e->encap_hlist);
 +	mutex_unlock(&esw->offloads.encap_tbl_lock);
 +
 +	mlx5e_encap_dealloc(priv, e);
  }
  
  static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
  {
  	struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
  
 -	if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
 -	    !(flow->flags & MLX5E_TC_FLOW_DUP))
 +	if (!flow_flag_test(flow, ESWITCH) ||
 +	    !flow_flag_test(flow, DUP))
  		return;
  
  	mutex_lock(&esw->offloads.peer_mutex);
  	list_del(&flow->peer);
  	mutex_unlock(&esw->offloads.peer_mutex);
  
 -	flow->flags &= ~MLX5E_TC_FLOW_DUP;
 +	flow_flag_clear(flow, DUP);
  
  	mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
  	kvfree(flow->peer_flow);
@@@ -1561,7 -1339,7 +1561,7 @@@ static void mlx5e_tc_del_fdb_peer_flow(
  static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
  			      struct mlx5e_tc_flow *flow)
  {
 -	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 +	if (mlx5e_is_eswitch_flow(flow)) {
  		mlx5e_tc_del_fdb_peer_flow(flow);
  		mlx5e_tc_del_fdb_flow(priv, flow);
  	} else {
@@@ -1702,7 -1480,7 +1702,7 @@@ static int __parse_cls_flower(struct ml
  			      struct mlx5_flow_spec *spec,
  			      struct flow_cls_offload *f,
  			      struct net_device *filter_dev,
- 			      u8 *match_level, u8 *tunnel_match_level)
+ 			      u8 *inner_match_level, u8 *outer_match_level)
  {
  	struct netlink_ext_ack *extack = f->common.extack;
  	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@@ -1717,8 -1495,9 +1717,9 @@@
  	struct flow_dissector *dissector = rule->match.dissector;
  	u16 addr_type = 0;
  	u8 ip_proto = 0;
+ 	u8 *match_level;
  
- 	*match_level = MLX5_MATCH_NONE;
+ 	match_level = outer_match_level;
  
  	if (dissector->used_keys &
  	    ~(BIT(FLOW_DISSECTOR_KEY_META) |
@@@ -1746,12 -1525,14 +1747,14 @@@
  	}
  
  	if (mlx5e_get_tc_tun(filter_dev)) {
- 		if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level))
+ 		if (parse_tunnel_attr(priv, spec, f, filter_dev,
+ 				      outer_match_level))
  			return -EOPNOTSUPP;
  
- 		/* In decap flow, header pointers should point to the inner
+ 		/* At this point, header pointers should point to the inner
  		 * headers, outer header were already set by parse_tunnel_attr
  		 */
+ 		match_level = inner_match_level;
  		headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
  						       spec);
  		headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
@@@ -2053,37 -1834,41 +2056,43 @@@ static int parse_cls_flower(struct mlx5
  			    struct flow_cls_offload *f,
  			    struct net_device *filter_dev)
  {
+ 	u8 inner_match_level, outer_match_level, non_tunnel_match_level;
  	struct netlink_ext_ack *extack = f->common.extack;
  	struct mlx5_core_dev *dev = priv->mdev;
  	struct mlx5_eswitch *esw = dev->priv.eswitch;
  	struct mlx5e_rep_priv *rpriv = priv->ppriv;
- 	u8 match_level, tunnel_match_level = MLX5_MATCH_NONE;
  	struct mlx5_eswitch_rep *rep;
 +	bool is_eswitch_flow;
  	int err;
  
- 	err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level);
+ 	inner_match_level = MLX5_MATCH_NONE;
+ 	outer_match_level = MLX5_MATCH_NONE;
+ 
+ 	err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level,
+ 				 &outer_match_level);
+ 	non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
+ 				 outer_match_level : inner_match_level;
  
 -	if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
 +	is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
 +	if (!err && is_eswitch_flow) {
  		rep = rpriv->rep;
  		if (rep->vport != MLX5_VPORT_UPLINK &&
  		    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
- 		    esw->offloads.inline_mode < match_level)) {
+ 		    esw->offloads.inline_mode < non_tunnel_match_level)) {
  			NL_SET_ERR_MSG_MOD(extack,
  					   "Flow is not offloaded due to min inline setting");
  			netdev_warn(priv->netdev,
  				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
- 				    match_level, esw->offloads.inline_mode);
+ 				    non_tunnel_match_level, esw->offloads.inline_mode);
  			return -EOPNOTSUPP;
  		}
  	}
  
 -	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 +	if (is_eswitch_flow) {
- 		flow->esw_attr->match_level = match_level;
- 		flow->esw_attr->tunnel_match_level = tunnel_match_level;
+ 		flow->esw_attr->inner_match_level = inner_match_level;
+ 		flow->esw_attr->outer_match_level = outer_match_level;
  	} else {
- 		flow->nic_attr->match_level = match_level;
+ 		flow->nic_attr->match_level = non_tunnel_match_level;
  	}
  
  	return err;
@@@ -2600,15 -2385,14 +2609,15 @@@ static bool actions_match_supported(str
  {
  	u32 actions;
  
 -	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 +	if (mlx5e_is_eswitch_flow(flow))
  		actions = flow->esw_attr->action;
  	else
  		actions = flow->nic_attr->action;
  
 -	if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
 +	if (flow_flag_test(flow, EGRESS) &&
  	    !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
 -	      (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)))
 +	      (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
 +	      (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
  		return false;
  
  	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
@@@ -2758,7 -2542,7 +2767,7 @@@ static int parse_tc_nic_actions(struct 
  			if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
  			    same_hw_devs(priv, netdev_priv(peer_dev))) {
  				parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
 -				flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
 +				flow_flag_set(flow, HAIRPIN);
  				action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
  					  MLX5_FLOW_CONTEXT_ACTION_COUNT;
  			} else {
@@@ -2845,31 -2629,6 +2854,31 @@@ static bool is_merged_eswitch_dev(struc
  
  
  
 +bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
 +{
 +	return refcount_inc_not_zero(&e->refcnt);
 +}
 +
 +static struct mlx5e_encap_entry *
 +mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
 +		uintptr_t hash_key)
 +{
 +	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 +	struct mlx5e_encap_entry *e;
 +	struct encap_key e_key;
 +
 +	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
 +				   encap_hlist, hash_key) {
 +		e_key.ip_tun_key = &e->tun_info->key;
 +		e_key.tc_tunnel = e->tunnel;
 +		if (!cmp_encap_info(&e_key, key) &&
 +		    mlx5e_encap_take(e))
 +			return e;
 +	}
 +
 +	return NULL;
 +}
 +
  static int mlx5e_attach_encap(struct mlx5e_priv *priv,
  			      struct mlx5e_tc_flow *flow,
  			      struct net_device *mirred_dev,
@@@ -2882,10 -2641,11 +2891,10 @@@
  	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
  	struct mlx5e_tc_flow_parse_attr *parse_attr;
  	const struct ip_tunnel_info *tun_info;
 -	struct encap_key key, e_key;
 +	struct encap_key key;
  	struct mlx5e_encap_entry *e;
  	unsigned short family;
  	uintptr_t hash_key;
 -	bool found = false;
  	int err = 0;
  
  	parse_attr = attr->parse_attr;
@@@ -2900,59 -2660,42 +2909,59 @@@
  
  	hash_key = hash_encap_info(&key);
  
 -	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
 -				   encap_hlist, hash_key) {
 -		e_key.ip_tun_key = &e->tun_info->key;
 -		e_key.tc_tunnel = e->tunnel;
 -		if (!cmp_encap_info(&e_key, &key)) {
 -			found = true;
 -			break;
 -		}
 -	}
 +	mutex_lock(&esw->offloads.encap_tbl_lock);
 +	e = mlx5e_encap_get(priv, &key, hash_key);
  
  	/* must verify if encap is valid or not */
 -	if (found)
 +	if (e) {
 +		mutex_unlock(&esw->offloads.encap_tbl_lock);
 +		wait_for_completion(&e->res_ready);
 +
 +		/* Protect against concurrent neigh update. */
 +		mutex_lock(&esw->offloads.encap_tbl_lock);
 +		if (e->compl_result) {
 +			err = -EREMOTEIO;
 +			goto out_err;
 +		}
  		goto attach_flow;
 +	}
  
  	e = kzalloc(sizeof(*e), GFP_KERNEL);
 -	if (!e)
 -		return -ENOMEM;
 +	if (!e) {
 +		err = -ENOMEM;
 +		goto out_err;
 +	}
 +
 +	refcount_set(&e->refcnt, 1);
 +	init_completion(&e->res_ready);
  
  	e->tun_info = tun_info;
  	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
 -	if (err)
 +	if (err) {
 +		kfree(e);
 +		e = NULL;
  		goto out_err;
 +	}
  
  	INIT_LIST_HEAD(&e->flows);
 +	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
 +	mutex_unlock(&esw->offloads.encap_tbl_lock);
  
  	if (family == AF_INET)
  		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
  	else if (family == AF_INET6)
  		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
  
 -	if (err)
 +	/* Protect against concurrent neigh update. */
 +	mutex_lock(&esw->offloads.encap_tbl_lock);
 +	complete_all(&e->res_ready);
 +	if (err) {
 +		e->compl_result = err;
  		goto out_err;
 -
 -	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
 +	}
  
  attach_flow:
 +	flow->encaps[out_index].e = e;
  	list_add(&flow->encaps[out_index].list, &e->flows);
  	flow->encaps[out_index].index = out_index;
  	*encap_dev = e->out_dev;
@@@ -2963,14 -2706,11 +2972,14 @@@
  	} else {
  		*encap_valid = false;
  	}
 +	mutex_unlock(&esw->offloads.encap_tbl_lock);
  
  	return err;
  
  out_err:
 -	kfree(e);
 +	mutex_unlock(&esw->offloads.encap_tbl_lock);
 +	if (e)
 +		mlx5e_encap_put(priv, e);
  	return err;
  }
  
@@@ -3150,16 -2890,12 +3159,16 @@@ static int parse_tc_fdb_actions(struct 
  			if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
  				struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
  				struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
 -				struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
 +				struct net_device *uplink_upper;
  
 +				rcu_read_lock();
 +				uplink_upper =
 +					netdev_master_upper_dev_get_rcu(uplink_dev);
  				if (uplink_upper &&
  				    netif_is_lag_master(uplink_upper) &&
  				    uplink_upper == out_dev)
  					out_dev = uplink_dev;
 +				rcu_read_unlock();
  
  				if (is_vlan_dev(out_dev)) {
  					err = add_vlan_push_action(priv, attr,
@@@ -3330,19 -3066,19 +3339,19 @@@
  	return 0;
  }
  
 -static void get_flags(int flags, u16 *flow_flags)
 +static void get_flags(int flags, unsigned long *flow_flags)
  {
 -	u16 __flow_flags = 0;
 +	unsigned long __flow_flags = 0;
  
 -	if (flags & MLX5E_TC_INGRESS)
 -		__flow_flags |= MLX5E_TC_FLOW_INGRESS;
 -	if (flags & MLX5E_TC_EGRESS)
 -		__flow_flags |= MLX5E_TC_FLOW_EGRESS;
 +	if (flags & MLX5_TC_FLAG(INGRESS))
 +		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
 +	if (flags & MLX5_TC_FLAG(EGRESS))
 +		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
  
 -	if (flags & MLX5E_TC_ESW_OFFLOAD)
 -		__flow_flags |= MLX5E_TC_FLOW_ESWITCH;
 -	if (flags & MLX5E_TC_NIC_OFFLOAD)
 -		__flow_flags |= MLX5E_TC_FLOW_NIC;
 +	if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
 +		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
 +	if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
 +		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
  
  	*flow_flags = __flow_flags;
  }
@@@ -3354,13 -3090,12 +3363,13 @@@ static const struct rhashtable_params t
  	.automatic_shrinking = true,
  };
  
 -static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags)
 +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
 +				    unsigned long flags)
  {
  	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
  	struct mlx5e_rep_priv *uplink_rpriv;
  
 -	if (flags & MLX5E_TC_ESW_OFFLOAD) {
 +	if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
  		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
  		return &uplink_rpriv->uplink_priv.tc_ht;
  	} else /* NIC offload */
@@@ -3371,7 -3106,7 +3380,7 @@@ static bool is_peer_flow_needed(struct 
  {
  	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
  	bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
 -			      flow->flags & MLX5E_TC_FLOW_INGRESS;
 +		flow_flag_test(flow, INGRESS);
  	bool act_is_encap = !!(attr->action &
  			       MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
  	bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
@@@ -3390,13 -3125,13 +3399,13 @@@
  
  static int
  mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
 -		 struct flow_cls_offload *f, u16 flow_flags,
 +		 struct flow_cls_offload *f, unsigned long flow_flags,
  		 struct mlx5e_tc_flow_parse_attr **__parse_attr,
  		 struct mlx5e_tc_flow **__flow)
  {
  	struct mlx5e_tc_flow_parse_attr *parse_attr;
  	struct mlx5e_tc_flow *flow;
 -	int err;
 +	int out_index, err;
  
  	flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
  	parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
@@@ -3408,11 -3143,6 +3417,11 @@@
  	flow->cookie = f->cookie;
  	flow->flags = flow_flags;
  	flow->priv = priv;
 +	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
 +		INIT_LIST_HEAD(&flow->encaps[out_index].list);
 +	INIT_LIST_HEAD(&flow->mod_hdr);
 +	INIT_LIST_HEAD(&flow->hairpin);
 +	refcount_set(&flow->refcnt, 1);
  
  	*__flow = flow;
  	*__parse_attr = parse_attr;
@@@ -3437,7 -3167,7 +3446,7 @@@ mlx5e_flow_esw_attr_init(struct mlx5_es
  
  	esw_attr->parse_attr = parse_attr;
  	esw_attr->chain = f->common.chain_index;
- 	esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
+ 	esw_attr->prio = f->common.prio;
  
  	esw_attr->in_rep = in_rep;
  	esw_attr->in_mdev = in_mdev;
@@@ -3452,7 -3182,7 +3461,7 @@@
  static struct mlx5e_tc_flow *
  __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
  		     struct flow_cls_offload *f,
 -		     u16 flow_flags,
 +		     unsigned long flow_flags,
  		     struct net_device *filter_dev,
  		     struct mlx5_eswitch_rep *in_rep,
  		     struct mlx5_core_dev *in_mdev)
@@@ -3463,7 -3193,7 +3472,7 @@@
  	struct mlx5e_tc_flow *flow;
  	int attr_size, err;
  
 -	flow_flags |= MLX5E_TC_FLOW_ESWITCH;
 +	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
  	attr_size  = sizeof(struct mlx5_esw_flow_attr);
  	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
  			       &parse_attr, &flow);
@@@ -3495,14 -3225,15 +3504,14 @@@
  	return flow;
  
  err_free:
 -	kfree(flow);
 -	kvfree(parse_attr);
 +	mlx5e_flow_put(priv, flow);
  out:
  	return ERR_PTR(err);
  }
  
  static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
  				      struct mlx5e_tc_flow *flow,
 -				      u16 flow_flags)
 +				      unsigned long flow_flags)
  {
  	struct mlx5e_priv *priv = flow->priv, *peer_priv;
  	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
@@@ -3540,7 -3271,7 +3549,7 @@@
  	}
  
  	flow->peer_flow = peer_flow;
 -	flow->flags |= MLX5E_TC_FLOW_DUP;
 +	flow_flag_set(flow, DUP);
  	mutex_lock(&esw->offloads.peer_mutex);
  	list_add_tail(&flow->peer, &esw->offloads.peer_flows);
  	mutex_unlock(&esw->offloads.peer_mutex);
@@@ -3553,7 -3284,7 +3562,7 @@@ out
  static int
  mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
  		   struct flow_cls_offload *f,
 -		   u16 flow_flags,
 +		   unsigned long flow_flags,
  		   struct net_device *filter_dev,
  		   struct mlx5e_tc_flow **__flow)
  {
@@@ -3587,7 -3318,7 +3596,7 @@@ out
  static int
  mlx5e_add_nic_flow(struct mlx5e_priv *priv,
  		   struct flow_cls_offload *f,
 -		   u16 flow_flags,
 +		   unsigned long flow_flags,
  		   struct net_device *filter_dev,
  		   struct mlx5e_tc_flow **__flow)
  {
@@@ -3601,7 -3332,7 +3610,7 @@@
  	if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
  		return -EOPNOTSUPP;
  
 -	flow_flags |= MLX5E_TC_FLOW_NIC;
 +	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
  	attr_size  = sizeof(struct mlx5_nic_flow_attr);
  	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
  			       &parse_attr, &flow);
@@@ -3622,14 -3353,14 +3631,14 @@@
  	if (err)
  		goto err_free;
  
 -	flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
 +	flow_flag_set(flow, OFFLOADED);
  	kvfree(parse_attr);
  	*__flow = flow;
  
  	return 0;
  
  err_free:
 -	kfree(flow);
 +	mlx5e_flow_put(priv, flow);
  	kvfree(parse_attr);
  out:
  	return err;
@@@ -3638,12 -3369,12 +3647,12 @@@
  static int
  mlx5e_tc_add_flow(struct mlx5e_priv *priv,
  		  struct flow_cls_offload *f,
 -		  int flags,
 +		  unsigned long flags,
  		  struct net_device *filter_dev,
  		  struct mlx5e_tc_flow **flow)
  {
  	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 -	u16 flow_flags;
 +	unsigned long flow_flags;
  	int err;
  
  	get_flags(flags, &flow_flags);
@@@ -3662,16 -3393,14 +3671,16 @@@
  }
  
  int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
 -			   struct flow_cls_offload *f, int flags)
 +			   struct flow_cls_offload *f, unsigned long flags)
  {
  	struct netlink_ext_ack *extack = f->common.extack;
  	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
  	struct mlx5e_tc_flow *flow;
  	int err = 0;
  
 -	flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
 +	rcu_read_lock();
 +	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
 +	rcu_read_unlock();
  	if (flow) {
  		NL_SET_ERR_MSG_MOD(extack,
  				   "flow cookie already exists, ignoring");
@@@ -3686,62 -3415,51 +3695,62 @@@
  	if (err)
  		goto out;
  
 -	err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
 +	err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
  	if (err)
  		goto err_free;
  
  	return 0;
  
  err_free:
 -	mlx5e_tc_del_flow(priv, flow);
 -	kfree(flow);
 +	mlx5e_flow_put(priv, flow);
  out:
  	return err;
  }
  
 -#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS)
 -#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS)
 -
  static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
  {
 -	if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK))
 -		return true;
 +	bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
 +	bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
  
 -	return false;
 +	return flow_flag_test(flow, INGRESS) == dir_ingress &&
 +		flow_flag_test(flow, EGRESS) == dir_egress;
  }
  
  int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
 -			struct flow_cls_offload *f, int flags)
 +			struct flow_cls_offload *f, unsigned long flags)
  {
  	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
  	struct mlx5e_tc_flow *flow;
 +	int err;
  
 +	rcu_read_lock();
  	flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
 -	if (!flow || !same_flow_direction(flow, flags))
 -		return -EINVAL;
 +	if (!flow || !same_flow_direction(flow, flags)) {
 +		err = -EINVAL;
 +		goto errout;
 +	}
  
 +	/* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
 +	 * set.
 +	 */
 +	if (flow_flag_test_and_set(flow, DELETED)) {
 +		err = -EINVAL;
 +		goto errout;
 +	}
  	rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
 +	rcu_read_unlock();
  
 -	mlx5e_tc_del_flow(priv, flow);
 -
 -	kfree(flow);
 +	mlx5e_flow_put(priv, flow);
  
  	return 0;
 +
 +errout:
 +	rcu_read_unlock();
 +	return err;
  }
  
  int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
 -		       struct flow_cls_offload *f, int flags)
 +		       struct flow_cls_offload *f, unsigned long flags)
  {
  	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
  	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
@@@ -3751,24 -3469,15 +3760,24 @@@
  	u64 lastuse = 0;
  	u64 packets = 0;
  	u64 bytes = 0;
 +	int err = 0;
  
 -	flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
 -	if (!flow || !same_flow_direction(flow, flags))
 -		return -EINVAL;
 +	rcu_read_lock();
 +	flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
 +						tc_ht_params));
 +	rcu_read_unlock();
 +	if (IS_ERR(flow))
 +		return PTR_ERR(flow);
  
 -	if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 +	if (!same_flow_direction(flow, flags)) {
 +		err = -EINVAL;
 +		goto errout;
 +	}
 +
 +	if (mlx5e_is_offloaded_flow(flow)) {
  		counter = mlx5e_tc_get_counter(flow);
  		if (!counter)
 -			return 0;
 +			goto errout;
  
  		mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
  	}
@@@ -3780,8 -3489,8 +3789,8 @@@
  	if (!peer_esw)
  		goto out;
  
 -	if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
 -	    (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
 +	if (flow_flag_test(flow, DUP) &&
 +	    flow_flag_test(flow->peer_flow, OFFLOADED)) {
  		u64 bytes2;
  		u64 packets2;
  		u64 lastuse2;
@@@ -3800,117 -3509,15 +3809,117 @@@ no_peer_counter
  	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
  out:
  	flow_stats_update(&f->stats, bytes, packets, lastuse);
 +errout:
 +	mlx5e_flow_put(priv, flow);
 +	return err;
 +}
 +
 +static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
 +			       struct netlink_ext_ack *extack)
 +{
 +	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 +	struct mlx5_eswitch *esw;
 +	u16 vport_num;
 +	u32 rate_mbps;
 +	int err;
 +
 +	esw = priv->mdev->priv.eswitch;
 +	/* rate is given in bytes/sec.
 +	 * First convert to bits/sec and then round to the nearest mbit/secs.
 +	 * mbit means million bits.
 +	 * Moreover, if rate is non zero we choose to configure to a minimum of
 +	 * 1 mbit/sec.
 +	 */
 +	rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
 +	vport_num = rpriv->rep->vport;
 +
 +	err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
 +	if (err)
 +		NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
 +
 +	return err;
 +}
 +
 +static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
 +					struct flow_action *flow_action,
 +					struct netlink_ext_ack *extack)
 +{
 +	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 +	const struct flow_action_entry *act;
 +	int err;
 +	int i;
 +
 +	if (!flow_action_has_entries(flow_action)) {
 +		NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
 +		return -EINVAL;
 +	}
 +
 +	if (!flow_offload_has_one_action(flow_action)) {
 +		NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
 +		return -EOPNOTSUPP;
 +	}
 +
 +	flow_action_for_each(i, act, flow_action) {
 +		switch (act->id) {
 +		case FLOW_ACTION_POLICE:
 +			err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
 +			if (err)
 +				return err;
 +
 +			rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
 +			break;
 +		default:
 +			NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
 +			return -EOPNOTSUPP;
 +		}
 +	}
  
  	return 0;
  }
  
 +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
 +				struct tc_cls_matchall_offload *ma)
 +{
 +	struct netlink_ext_ack *extack = ma->common.extack;
 +	int prio = TC_H_MAJ(ma->common.prio) >> 16;
 +
 +	if (prio != 1) {
 +		NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
 +		return -EINVAL;
 +	}
 +
 +	return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
 +}
 +
 +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
 +			     struct tc_cls_matchall_offload *ma)
 +{
 +	struct netlink_ext_ack *extack = ma->common.extack;
 +
 +	return apply_police_params(priv, 0, extack);
 +}
 +
 +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
 +			     struct tc_cls_matchall_offload *ma)
 +{
 +	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 +	struct rtnl_link_stats64 cur_stats;
 +	u64 dbytes;
 +	u64 dpkts;
 +
 +	cur_stats = priv->stats.vf_vport;
 +	dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
 +	dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
 +	rpriv->prev_vf_vport_stats = cur_stats;
 +	flow_stats_update(&ma->stats, dpkts, dbytes, jiffies);
 +}
 +
  static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
  					      struct mlx5e_priv *peer_priv)
  {
  	struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
 -	struct mlx5e_hairpin_entry *hpe;
 +	struct mlx5e_hairpin_entry *hpe, *tmp;
 +	LIST_HEAD(init_wait_list);
  	u16 peer_vhca_id;
  	int bkt;
  
@@@ -3919,18 -3526,9 +3928,18 @@@
  
  	peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
  
 -	hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
 -		if (hpe->peer_vhca_id == peer_vhca_id)
 +	mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
 +	hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
 +		if (refcount_inc_not_zero(&hpe->refcnt))
 +			list_add(&hpe->dead_peer_wait_list, &init_wait_list);
 +	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 +
 +	list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
 +		wait_for_completion(&hpe->res_ready);
 +		if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
  			hpe->hp->pair->peer_gone = true;
 +
 +		mlx5e_hairpin_put(priv, hpe);
  	}
  }
  
@@@ -3966,10 -3564,7 +3975,10 @@@ int mlx5e_tc_nic_init(struct mlx5e_pri
  	struct mlx5e_tc_table *tc = &priv->fs.tc;
  	int err;
  
 -	hash_init(tc->mod_hdr_tbl);
 +	mutex_init(&tc->t_lock);
 +	mutex_init(&tc->mod_hdr.lock);
 +	hash_init(tc->mod_hdr.hlist);
 +	mutex_init(&tc->hairpin_tbl_lock);
  	hash_init(tc->hairpin_tbl);
  
  	err = rhashtable_init(&tc->ht, &tc_ht_params);
@@@ -4001,16 -3596,12 +4010,16 @@@ void mlx5e_tc_nic_cleanup(struct mlx5e_
  	if (tc->netdevice_nb.notifier_call)
  		unregister_netdevice_notifier(&tc->netdevice_nb);
  
 +	mutex_destroy(&tc->mod_hdr.lock);
 +	mutex_destroy(&tc->hairpin_tbl_lock);
 +
  	rhashtable_destroy(&tc->ht);
  
  	if (!IS_ERR_OR_NULL(tc->t)) {
  		mlx5_destroy_flow_table(tc->t);
  		tc->t = NULL;
  	}
 +	mutex_destroy(&tc->t_lock);
  }
  
  int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
@@@ -4023,7 -3614,7 +4032,7 @@@ void mlx5e_tc_esw_cleanup(struct rhasht
  	rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
  }
  
 -int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags)
 +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
  {
  	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
  
@@@ -4045,10 -3636,10 +4054,10 @@@ void mlx5e_tc_reoffload_flows_work(stru
  			     reoffload_flows_work);
  	struct mlx5e_tc_flow *flow, *tmp;
  
 -	rtnl_lock();
 +	mutex_lock(&rpriv->unready_flows_lock);
  	list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
  		if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
 -			remove_unready_flow(flow);
 +			unready_flow_del(flow);
  	}
 -	rtnl_unlock();
 +	mutex_unlock(&rpriv->unready_flows_lock);
  }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 86db0e9776da,04685dbb280c..aba9e7a6ad3c
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@@ -35,7 -35,6 +35,7 @@@
  
  #include <linux/if_ether.h>
  #include <linux/if_link.h>
 +#include <linux/atomic.h>
  #include <net/devlink.h>
  #include <linux/mlx5/device.h>
  #include <linux/mlx5/eswitch.h>
@@@ -102,13 -101,6 +102,13 @@@ struct mlx5_vport_info 
  	bool                    trusted;
  };
  
 +/* Vport context events */
 +enum mlx5_eswitch_vport_event {
 +	MLX5_VPORT_UC_ADDR_CHANGE = BIT(0),
 +	MLX5_VPORT_MC_ADDR_CHANGE = BIT(1),
 +	MLX5_VPORT_PROMISC_CHANGE = BIT(3),
 +};
 +
  struct mlx5_vport {
  	struct mlx5_core_dev    *dev;
  	int                     vport;
@@@ -130,7 -122,7 +130,7 @@@
  	} qos;
  
  	bool                    enabled;
 -	u16                     enabled_events;
 +	enum mlx5_eswitch_vport_event enabled_events;
  };
  
  enum offloads_fdb_flags {
@@@ -181,14 -173,13 +181,14 @@@ struct mlx5_esw_offload 
  	struct mlx5_eswitch_rep *vport_reps;
  	struct list_head peer_flows;
  	struct mutex peer_mutex;
 +	struct mutex encap_tbl_lock; /* protects encap_tbl */
  	DECLARE_HASHTABLE(encap_tbl, 8);
 -	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
 +	struct mod_hdr_tbl mod_hdr;
  	DECLARE_HASHTABLE(termtbl_tbl, 8);
  	struct mutex termtbl_mutex; /* protects termtbl hash */
  	const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
  	u8 inline_mode;
 -	u64 num_flows;
 +	atomic64_t num_flows;
  	enum devlink_eswitch_encap_mode encap;
  };
  
@@@ -216,11 -207,8 +216,11 @@@ enum 
  struct mlx5_eswitch {
  	struct mlx5_core_dev    *dev;
  	struct mlx5_nb          nb;
 +	/* legacy data structures */
  	struct mlx5_eswitch_fdb fdb_table;
  	struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
 +	struct esw_mc_addr mc_promisc;
 +	/* end of legacy */
  	struct workqueue_struct *work_queue;
  	struct mlx5_vport       *vports;
  	u32 flags;
@@@ -230,6 -218,7 +230,6 @@@
  	 * and async SRIOV admin state changes
  	 */
  	struct mutex            state_lock;
 -	struct esw_mc_addr	mc_promisc;
  
  	struct {
  		bool            enabled;
@@@ -244,8 -233,8 +244,8 @@@
  	struct mlx5_esw_functions esw_funcs;
  };
  
 -void esw_offloads_cleanup(struct mlx5_eswitch *esw);
 -int esw_offloads_init(struct mlx5_eswitch *esw);
 +void esw_offloads_disable(struct mlx5_eswitch *esw);
 +int esw_offloads_enable(struct mlx5_eswitch *esw);
  void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
  int esw_offloads_init_reps(struct mlx5_eswitch *esw);
  void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
@@@ -262,8 -251,6 +262,8 @@@ void esw_vport_disable_ingress_acl(stru
  				   struct mlx5_vport *vport);
  void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
  					       struct mlx5_vport *vport);
 +int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
 +			       u32 rate_mbps);
  
  /* E-Switch API */
  int mlx5_eswitch_init(struct mlx5_core_dev *dev);
@@@ -390,8 -377,8 +390,8 @@@ struct mlx5_esw_flow_attr 
  		struct mlx5_termtbl_handle *termtbl;
  	} dests[MLX5_MAX_FLOW_FWD_VPORTS];
  	u32	mod_hdr_id;
- 	u8	match_level;
- 	u8	tunnel_match_level;
+ 	u8	inner_match_level;
+ 	u8	outer_match_level;
  	struct mlx5_fc *counter;
  	u32	chain;
  	u16	prio;
@@@ -526,11 -513,6 +526,11 @@@ void mlx5e_tc_clean_fdb_peer_flows(stru
  	     (vport) = &(esw)->vports[i],		\
  	     (i) < (esw)->total_vports; (i)++)
  
 +#define mlx5_esw_for_all_vports_reverse(esw, i, vport)	\
 +	for ((i) = (esw)->total_vports - 1;		\
 +	     (vport) = &(esw)->vports[i],		\
 +	     (i) >= MLX5_VPORT_PF; (i)--)
 +
  #define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)	\
  	for ((i) = MLX5_VPORT_FIRST_VF;			\
  	     (vport) = &(esw)->vports[(i)],		\
@@@ -592,11 -574,6 +592,11 @@@ bool mlx5_eswitch_is_vf_vport(const str
  void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
  int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
  
 +void
 +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
 +				 enum mlx5_eswitch_vport_event enabled_events);
 +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw);
 +
  #else  /* CONFIG_MLX5_ESWITCH */
  /* eswitch API stubs */
  static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 42cc5001255b,0323fd078271..7d3582ee66b7
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@@ -207,14 -207,10 +207,10 @@@ mlx5_eswitch_add_offloaded_rule(struct 
  
  	mlx5_eswitch_set_rule_source_port(esw, spec, attr);
  
- 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
- 		if (attr->tunnel_match_level != MLX5_MATCH_NONE)
- 			spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
- 		if (attr->match_level != MLX5_MATCH_NONE)
- 			spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
- 	} else if (attr->match_level != MLX5_MATCH_NONE) {
+ 	if (attr->outer_match_level != MLX5_MATCH_NONE)
  		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
- 	}
+ 	if (attr->inner_match_level != MLX5_MATCH_NONE)
+ 		spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
  
  	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
  		flow_act.modify_id = attr->mod_hdr_id;
@@@ -233,7 -229,7 +229,7 @@@
  	if (IS_ERR(rule))
  		goto err_add_rule;
  	else
 -		esw->offloads.num_flows++;
 +		atomic64_inc(&esw->offloads.num_flows);
  
  	return rule;
  
@@@ -290,7 -286,7 +286,7 @@@ mlx5_eswitch_add_fwd_rule(struct mlx5_e
  	mlx5_eswitch_set_rule_source_port(esw, spec, attr);
  
  	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
- 	if (attr->match_level != MLX5_MATCH_NONE)
+ 	if (attr->outer_match_level != MLX5_MATCH_NONE)
  		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
  
  	rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
@@@ -298,7 -294,7 +294,7 @@@
  	if (IS_ERR(rule))
  		goto add_err;
  
 -	esw->offloads.num_flows++;
 +	atomic64_inc(&esw->offloads.num_flows);
  
  	return rule;
  add_err:
@@@ -326,7 -322,7 +322,7 @@@ __mlx5_eswitch_del_rule(struct mlx5_esw
  			mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl);
  	}
  
 -	esw->offloads.num_flows--;
 +	atomic64_dec(&esw->offloads.num_flows);
  
  	if (fwd_rule)  {
  		esw_put_prio_table(esw, attr->chain, attr->prio, 1);
@@@ -442,11 -438,9 +438,11 @@@ int mlx5_eswitch_add_vlan_action(struc
  	fwd  = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
  		   !attr->dest_chain);
  
 +	mutex_lock(&esw->state_lock);
 +
  	err = esw_add_vlan_action_check(attr, push, pop, fwd);
  	if (err)
 -		return err;
 +		goto unlock;
  
  	attr->vlan_handled = false;
  
@@@ -459,11 -453,11 +455,11 @@@
  			attr->vlan_handled = true;
  		}
  
 -		return 0;
 +		goto unlock;
  	}
  
  	if (!push && !pop)
 -		return 0;
 +		goto unlock;
  
  	if (!(offloads->vlan_push_pop_refcount)) {
  		/* it's the 1st vlan rule, apply global vlan pop policy */
@@@ -488,8 -482,6 +484,8 @@@ skip_set_push
  out:
  	if (!err)
  		attr->vlan_handled = true;
 +unlock:
 +	mutex_unlock(&esw->state_lock);
  	return err;
  }
  
@@@ -512,8 -504,6 +508,8 @@@ int mlx5_eswitch_del_vlan_action(struc
  	pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
  	fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
  
 +	mutex_lock(&esw->state_lock);
 +
  	vport = esw_vlan_action_get_vport(attr, push, pop);
  
  	if (!push && !pop && fwd) {
@@@ -521,7 -511,7 +517,7 @@@
  		if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
  			vport->vlan_refcount--;
  
 -		return 0;
 +		goto out;
  	}
  
  	if (push) {
@@@ -539,13 -529,12 +535,13 @@@
  skip_unset_push:
  	offloads->vlan_push_pop_refcount--;
  	if (offloads->vlan_push_pop_refcount)
 -		return 0;
 +		goto out;
  
  	/* no more vlan rules, stop global vlan pop policy */
  	err = esw_set_global_vlan_pop(esw, 0);
  
  out:
 +	mutex_unlock(&esw->state_lock);
  	return err;
  }
  
@@@ -594,15 -583,38 +590,15 @@@ void mlx5_eswitch_del_send_to_vport_rul
  	mlx5_del_flow_rules(rule);
  }
  
 -static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw)
 +static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
  {
  	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
  	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
  	u8 fdb_to_vport_reg_c_id;
  	int err;
  
 -	err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
 -						   out, sizeof(out));
 -	if (err)
 -		return err;
 -
 -	fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
 -					 esw_vport_context.fdb_to_vport_reg_c_id);
 -
 -	fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
 -	MLX5_SET(modify_esw_vport_context_in, in,
 -		 esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
 -
 -	MLX5_SET(modify_esw_vport_context_in, in,
 -		 field_select.fdb_to_vport_reg_c_id, 1);
 -
 -	return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
 -						     in, sizeof(in));
 -}
 -
 -static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
 -{
 -	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
 -	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
 -	u8 fdb_to_vport_reg_c_id;
 -	int err;
 +	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
 +		return 0;
  
  	err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
  						   out, sizeof(out));
@@@ -612,10 -624,7 +608,10 @@@
  	fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
  					 esw_vport_context.fdb_to_vport_reg_c_id);
  
 -	fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
 +	if (enable)
 +		fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
 +	else
 +		fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
  
  	MLX5_SET(modify_esw_vport_context_in, in,
  		 esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
@@@ -1393,9 -1402,10 +1389,9 @@@ void esw_offloads_cleanup_reps(struct m
  int esw_offloads_init_reps(struct mlx5_eswitch *esw)
  {
  	int total_vports = esw->total_vports;
 -	struct mlx5_core_dev *dev = esw->dev;
  	struct mlx5_eswitch_rep *rep;
 -	u8 hw_id[ETH_ALEN], rep_type;
  	int vport_index;
 +	u8 rep_type;
  
  	esw->offloads.vport_reps = kcalloc(total_vports,
  					   sizeof(struct mlx5_eswitch_rep),
@@@ -1403,9 -1413,12 +1399,9 @@@
  	if (!esw->offloads.vport_reps)
  		return -ENOMEM;
  
 -	mlx5_query_mac_address(dev, hw_id);
 -
  	mlx5_esw_for_all_reps(esw, vport_index, rep) {
  		rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
  		rep->vport_index = vport_index;
 -		ether_addr_copy(rep->hw_id, hw_id);
  
  		for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
  			atomic_set(&rep->rep_data[rep_type].state,
@@@ -2107,7 -2120,7 +2103,7 @@@ int mlx5_esw_funcs_changed_handler(stru
  	return NOTIFY_OK;
  }
  
 -int esw_offloads_init(struct mlx5_eswitch *esw)
 +int esw_offloads_enable(struct mlx5_eswitch *esw)
  {
  	int err;
  
@@@ -2121,11 -2134,11 +2117,11 @@@
  	if (err)
  		return err;
  
 -	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
 -		err = mlx5_eswitch_enable_passing_vport_metadata(esw);
 -		if (err)
 -			goto err_vport_metadata;
 -	}
 +	err = esw_set_passing_vport_metadata(esw, true);
 +	if (err)
 +		goto err_vport_metadata;
 +
 +	mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
  
  	err = esw_offloads_load_all_reps(esw);
  	if (err)
@@@ -2139,8 -2152,8 +2135,8 @@@
  	return 0;
  
  err_reps:
 -	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
 -		mlx5_eswitch_disable_passing_vport_metadata(esw);
 +	mlx5_eswitch_disable_pf_vf_vports(esw);
 +	esw_set_passing_vport_metadata(esw, false);
  err_vport_metadata:
  	esw_offloads_steering_cleanup(esw);
  	return err;
@@@ -2165,13 -2178,13 +2161,13 @@@ static int esw_offloads_stop(struct mlx
  	return err;
  }
  
 -void esw_offloads_cleanup(struct mlx5_eswitch *esw)
 +void esw_offloads_disable(struct mlx5_eswitch *esw)
  {
  	mlx5_rdma_disable_roce(esw->dev);
  	esw_offloads_devcom_cleanup(esw);
  	esw_offloads_unload_all_reps(esw);
 -	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
 -		mlx5_eswitch_disable_passing_vport_metadata(esw);
 +	mlx5_eswitch_disable_pf_vf_vports(esw);
 +	esw_set_passing_vport_metadata(esw, false);
  	esw_offloads_steering_cleanup(esw);
  	esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
  }
@@@ -2332,7 -2345,7 +2328,7 @@@ int mlx5_devlink_eswitch_inline_mode_se
  		break;
  	}
  
 -	if (esw->offloads.num_flows > 0) {
 +	if (atomic64_read(&esw->offloads.num_flows) > 0) {
  		NL_SET_ERR_MSG_MOD(extack,
  				   "Can't set inline mode when flows are configured");
  		return -EOPNOTSUPP;
@@@ -2442,7 -2455,7 +2438,7 @@@ int mlx5_devlink_eswitch_encap_mode_set
  	if (esw->offloads.encap == encap)
  		return 0;
  
 -	if (esw->offloads.num_flows > 0) {
 +	if (atomic64_read(&esw->offloads.num_flows) > 0) {
  		NL_SET_ERR_MSG_MOD(extack,
  				   "Can't set encapsulation when flows are configured");
  		return -EOPNOTSUPP;
diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 1aaab8446270,84a87d059333..150b3a144b83
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@@ -239,8 -239,7 +239,8 @@@ mlxsw_sp_acl_block_lookup(struct mlxsw_
  int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
  			    struct mlxsw_sp_acl_block *block,
  			    struct mlxsw_sp_port *mlxsw_sp_port,
 -			    bool ingress)
 +			    bool ingress,
 +			    struct netlink_ext_ack *extack)
  {
  	struct mlxsw_sp_acl_block_binding *binding;
  	int err;
@@@ -248,11 -247,6 +248,11 @@@
  	if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress)))
  		return -EEXIST;
  
 +	if (!ingress && block->egress_blocker_rule_count) {
 +		NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules");
 +		return -EOPNOTSUPP;
 +	}
 +
  	binding = kzalloc(sizeof(*binding), GFP_KERNEL);
  	if (!binding)
  		return -ENOMEM;
@@@ -477,7 -471,7 +477,7 @@@ int mlxsw_sp_acl_rulei_commit(struct ml
  void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
  				 unsigned int priority)
  {
- 	rulei->priority = priority >> 16;
+ 	rulei->priority = priority;
  }
  
  void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei,
@@@ -678,7 -672,6 +678,7 @@@ int mlxsw_sp_acl_rule_add(struct mlxsw_
  {
  	struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
  	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
 +	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
  	int err;
  
  	err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei);
@@@ -696,14 -689,14 +696,14 @@@
  		 * one, to be directly bound to device. The rest of the
  		 * rulesets are bound by "Goto action set".
  		 */
 -		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
 -						      ruleset->ht_key.block);
 +		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block);
  		if (err)
  			goto err_ruleset_block_bind;
  	}
  
  	list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
 -	ruleset->ht_key.block->rule_count++;
 +	block->rule_count++;
 +	block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker;
  	return 0;
  
  err_ruleset_block_bind:
@@@ -719,9 -712,7 +719,9 @@@ void mlxsw_sp_acl_rule_del(struct mlxsw
  {
  	struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
  	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
 +	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
  
 +	block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker;
  	ruleset->ht_key.block->rule_count--;
  	list_del(&rule->list);
  	if (!ruleset->ht_key.chain_index &&
diff --combined drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 61fe92719982,337b0cbfd153..c979f38a2e0c
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@@ -1286,7 -1286,7 +1286,7 @@@ myri10ge_vlan_rx(struct net_device *dev
  {
  	u8 *va;
  	struct vlan_ethhdr *veh;
 -	struct skb_frag_struct *frag;
 +	skb_frag_t *frag;
  	__wsum vsum;
  
  	va = addr;
@@@ -1306,8 -1306,8 +1306,8 @@@
  		skb->len -= VLAN_HLEN;
  		skb->data_len -= VLAN_HLEN;
  		frag = skb_shinfo(skb)->frags;
 -		frag->page_offset += VLAN_HLEN;
 -		skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN);
 +		skb_frag_off_add(frag, VLAN_HLEN);
 +		skb_frag_size_sub(frag, VLAN_HLEN);
  	}
  }
  
@@@ -1318,7 -1318,7 +1318,7 @@@ myri10ge_rx_done(struct myri10ge_slice_
  {
  	struct myri10ge_priv *mgp = ss->mgp;
  	struct sk_buff *skb;
 -	struct skb_frag_struct *rx_frags;
 +	skb_frag_t *rx_frags;
  	struct myri10ge_rx_buf *rx;
  	int i, idx, remainder, bytes;
  	struct pci_dev *pdev = mgp->pdev;
@@@ -1351,7 -1351,7 +1351,7 @@@
  		return 0;
  	}
  	rx_frags = skb_shinfo(skb)->frags;
 -	/* Fill skb_frag_struct(s) with data from our receive */
 +	/* Fill skb_frag_t(s) with data from our receive */
  	for (i = 0, remainder = len; remainder > 0; i++) {
  		myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes);
  		skb_fill_page_desc(skb, i, rx->info[idx].page,
@@@ -1364,8 -1364,8 +1364,8 @@@
  	}
  
  	/* remove padding */
 -	rx_frags[0].page_offset += MXGEFW_PAD;
 -	rx_frags[0].size -= MXGEFW_PAD;
 +	skb_frag_off_add(&rx_frags[0], MXGEFW_PAD);
 +	skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD);
  	len -= MXGEFW_PAD;
  
  	skb->len = len;
@@@ -2628,7 -2628,7 +2628,7 @@@ static netdev_tx_t myri10ge_xmit(struc
  	struct myri10ge_slice_state *ss;
  	struct mcp_kreq_ether_send *req;
  	struct myri10ge_tx_buf *tx;
 -	struct skb_frag_struct *frag;
 +	skb_frag_t *frag;
  	struct netdev_queue *netdev_queue;
  	dma_addr_t bus;
  	u32 low;
@@@ -3037,6 -3037,7 +3037,6 @@@ static int myri10ge_set_mac_address(str
  static int myri10ge_change_mtu(struct net_device *dev, int new_mtu)
  {
  	struct myri10ge_priv *mgp = netdev_priv(dev);
 -	int error = 0;
  
  	netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu);
  	if (mgp->running) {
@@@ -3048,7 -3049,7 +3048,7 @@@
  	} else
  		dev->mtu = new_mtu;
  
 -	return error;
 +	return 0;
  }
  
  /*
@@@ -3918,7 -3919,7 +3918,7 @@@ static int myri10ge_probe(struct pci_de
  	 * setup (if available). */
  	status = myri10ge_request_irq(mgp);
  	if (status != 0)
- 		goto abort_with_firmware;
+ 		goto abort_with_slices;
  	myri10ge_free_irq(mgp);
  
  	/* Save configuration space to be restored if the
diff --combined drivers/net/hyperv/netvsc_drv.c
index 86884c863013,e8fce6d715ef..0a6cd2f1111f
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@@ -435,7 -435,7 +435,7 @@@ static u32 init_page_array(void *hdr, u
  		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
  
  		slots_used += fill_pg_buf(skb_frag_page(frag),
 -					frag->page_offset,
 +					skb_frag_off(frag),
  					skb_frag_size(frag), &pb[slots_used]);
  	}
  	return slots_used;
@@@ -449,7 -449,7 +449,7 @@@ static int count_skb_frag_slots(struct 
  	for (i = 0; i < frags; i++) {
  		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
  		unsigned long size = skb_frag_size(frag);
 -		unsigned long offset = frag->page_offset;
 +		unsigned long offset = skb_frag_off(frag);
  
  		/* Skip unused frames from start of page */
  		offset &= ~PAGE_MASK;
@@@ -1239,12 -1239,15 +1239,15 @@@ static void netvsc_get_stats64(struct n
  			       struct rtnl_link_stats64 *t)
  {
  	struct net_device_context *ndev_ctx = netdev_priv(net);
- 	struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+ 	struct netvsc_device *nvdev;
  	struct netvsc_vf_pcpu_stats vf_tot;
  	int i;
  
+ 	rcu_read_lock();
+ 
+ 	nvdev = rcu_dereference(ndev_ctx->nvdev);
  	if (!nvdev)
- 		return;
+ 		goto out;
  
  	netdev_stats_to_stats64(t, &net->stats);
  
@@@ -1283,6 -1286,8 +1286,8 @@@
  		t->rx_packets	+= packets;
  		t->multicast	+= multicast;
  	}
+ out:
+ 	rcu_read_unlock();
  }
  
  static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
diff --combined drivers/net/netdevsim/dev.c
index c217049552f7,bcc40a236624..c5b026150bf5
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@@ -17,60 -17,16 +17,60 @@@
  
  #include <linux/debugfs.h>
  #include <linux/device.h>
 +#include <linux/etherdevice.h>
 +#include <linux/inet.h>
 +#include <linux/jiffies.h>
 +#include <linux/kernel.h>
  #include <linux/list.h>
  #include <linux/mutex.h>
  #include <linux/random.h>
  #include <linux/rtnetlink.h>
 +#include <linux/workqueue.h>
  #include <net/devlink.h>
 +#include <net/ip.h>
 +#include <uapi/linux/devlink.h>
 +#include <uapi/linux/ip.h>
 +#include <uapi/linux/udp.h>
  
  #include "netdevsim.h"
  
  static struct dentry *nsim_dev_ddir;
  
 +#define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32)
 +
 +static ssize_t nsim_dev_take_snapshot_write(struct file *file,
 +					    const char __user *data,
 +					    size_t count, loff_t *ppos)
 +{
 +	struct nsim_dev *nsim_dev = file->private_data;
 +	void *dummy_data;
 +	int err;
 +	u32 id;
 +
 +	dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL);
 +	if (!dummy_data)
 +		return -ENOMEM;
 +
 +	get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
 +
 +	id = devlink_region_shapshot_id_get(priv_to_devlink(nsim_dev));
 +	err = devlink_region_snapshot_create(nsim_dev->dummy_region,
 +					     dummy_data, id, kfree);
 +	if (err) {
 +		pr_err("Failed to create region snapshot\n");
 +		kfree(dummy_data);
 +		return err;
 +	}
 +
 +	return count;
 +}
 +
 +static const struct file_operations nsim_dev_take_snapshot_fops = {
 +	.open = simple_open,
 +	.write = nsim_dev_take_snapshot_write,
 +	.llseek = generic_file_llseek,
 +};
 +
  static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
  {
  	char dev_ddir_name[16];
@@@ -84,12 -40,6 +84,12 @@@
  		return PTR_ERR_OR_ZERO(nsim_dev->ports_ddir) ?: -EINVAL;
  	debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir,
  			    &nsim_dev->fw_update_status);
 +	debugfs_create_u32("max_macs", 0600, nsim_dev->ddir,
 +			   &nsim_dev->max_macs);
 +	debugfs_create_bool("test1", 0600, nsim_dev->ddir,
 +			    &nsim_dev->test1);
 +	debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev,
 +			    &nsim_dev_take_snapshot_fops);
  	return 0;
  }
  
@@@ -123,46 -73,47 +123,47 @@@ static void nsim_dev_port_debugfs_exit(
  	debugfs_remove_recursive(nsim_dev_port->ddir);
  }
  
+ static struct net *nsim_devlink_net(struct devlink *devlink)
+ {
+ 	return &init_net;
+ }
+ 
  static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv)
  {
- 	struct nsim_dev *nsim_dev = priv;
+ 	struct net *net = priv;
  
- 	return nsim_fib_get_val(nsim_dev->fib_data,
- 				NSIM_RESOURCE_IPV4_FIB, false);
+ 	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false);
  }
  
  static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv)
  {
- 	struct nsim_dev *nsim_dev = priv;
+ 	struct net *net = priv;
  
- 	return nsim_fib_get_val(nsim_dev->fib_data,
- 				NSIM_RESOURCE_IPV4_FIB_RULES, false);
+ 	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false);
  }
  
  static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv)
  {
- 	struct nsim_dev *nsim_dev = priv;
+ 	struct net *net = priv;
  
- 	return nsim_fib_get_val(nsim_dev->fib_data,
- 				NSIM_RESOURCE_IPV6_FIB, false);
+ 	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false);
  }
  
  static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv)
  {
- 	struct nsim_dev *nsim_dev = priv;
+ 	struct net *net = priv;
  
- 	return nsim_fib_get_val(nsim_dev->fib_data,
- 				NSIM_RESOURCE_IPV6_FIB_RULES, false);
+ 	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false);
  }
  
  static int nsim_dev_resources_register(struct devlink *devlink)
  {
- 	struct nsim_dev *nsim_dev = devlink_priv(devlink);
  	struct devlink_resource_size_params params = {
  		.size_max = (u64)-1,
  		.size_granularity = 1,
  		.unit = DEVLINK_RESOURCE_UNIT_ENTRY
  	};
+ 	struct net *net = nsim_devlink_net(devlink);
  	int err;
  	u64 n;
  
@@@ -176,8 -127,7 +177,7 @@@
  		goto out;
  	}
  
- 	n = nsim_fib_get_val(nsim_dev->fib_data,
- 			     NSIM_RESOURCE_IPV4_FIB, true);
+ 	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true);
  	err = devlink_resource_register(devlink, "fib", n,
  					NSIM_RESOURCE_IPV4_FIB,
  					NSIM_RESOURCE_IPV4, &params);
@@@ -186,8 -136,7 +186,7 @@@
  		return err;
  	}
  
- 	n = nsim_fib_get_val(nsim_dev->fib_data,
- 			     NSIM_RESOURCE_IPV4_FIB_RULES, true);
+ 	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true);
  	err = devlink_resource_register(devlink, "fib-rules", n,
  					NSIM_RESOURCE_IPV4_FIB_RULES,
  					NSIM_RESOURCE_IPV4, &params);
@@@ -206,8 -155,7 +205,7 @@@
  		goto out;
  	}
  
- 	n = nsim_fib_get_val(nsim_dev->fib_data,
- 			     NSIM_RESOURCE_IPV6_FIB, true);
+ 	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true);
  	err = devlink_resource_register(devlink, "fib", n,
  					NSIM_RESOURCE_IPV6_FIB,
  					NSIM_RESOURCE_IPV6, &params);
@@@ -216,8 -164,7 +214,7 @@@
  		return err;
  	}
  
- 	n = nsim_fib_get_val(nsim_dev->fib_data,
- 			     NSIM_RESOURCE_IPV6_FIB_RULES, true);
+ 	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true);
  	err = devlink_resource_register(devlink, "fib-rules", n,
  					NSIM_RESOURCE_IPV6_FIB_RULES,
  					NSIM_RESOURCE_IPV6, &params);
@@@ -229,308 -176,31 +226,308 @@@
  	devlink_resource_occ_get_register(devlink,
  					  NSIM_RESOURCE_IPV4_FIB,
  					  nsim_dev_ipv4_fib_resource_occ_get,
- 					  nsim_dev);
+ 					  net);
  	devlink_resource_occ_get_register(devlink,
  					  NSIM_RESOURCE_IPV4_FIB_RULES,
  					  nsim_dev_ipv4_fib_rules_res_occ_get,
- 					  nsim_dev);
+ 					  net);
  	devlink_resource_occ_get_register(devlink,
  					  NSIM_RESOURCE_IPV6_FIB,
  					  nsim_dev_ipv6_fib_resource_occ_get,
- 					  nsim_dev);
+ 					  net);
  	devlink_resource_occ_get_register(devlink,
  					  NSIM_RESOURCE_IPV6_FIB_RULES,
  					  nsim_dev_ipv6_fib_rules_res_occ_get,
- 					  nsim_dev);
+ 					  net);
  out:
  	return err;
  }
  
 +enum nsim_devlink_param_id {
 +	NSIM_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
 +	NSIM_DEVLINK_PARAM_ID_TEST1,
 +};
 +
 +static const struct devlink_param nsim_devlink_params[] = {
 +	DEVLINK_PARAM_GENERIC(MAX_MACS,
 +			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 +			      NULL, NULL, NULL),
 +	DEVLINK_PARAM_DRIVER(NSIM_DEVLINK_PARAM_ID_TEST1,
 +			     "test1", DEVLINK_PARAM_TYPE_BOOL,
 +			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 +			     NULL, NULL, NULL),
 +};
 +
 +static void nsim_devlink_set_params_init_values(struct nsim_dev *nsim_dev,
 +						struct devlink *devlink)
 +{
 +	union devlink_param_value value;
 +
 +	value.vu32 = nsim_dev->max_macs;
 +	devlink_param_driverinit_value_set(devlink,
 +					   DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 +					   value);
 +	value.vbool = nsim_dev->test1;
 +	devlink_param_driverinit_value_set(devlink,
 +					   NSIM_DEVLINK_PARAM_ID_TEST1,
 +					   value);
 +}
 +
 +static void nsim_devlink_param_load_driverinit_values(struct devlink *devlink)
 +{
 +	struct nsim_dev *nsim_dev = devlink_priv(devlink);
 +	union devlink_param_value saved_value;
 +	int err;
 +
 +	err = devlink_param_driverinit_value_get(devlink,
 +						 DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 +						 &saved_value);
 +	if (!err)
 +		nsim_dev->max_macs = saved_value.vu32;
 +	err = devlink_param_driverinit_value_get(devlink,
 +						 NSIM_DEVLINK_PARAM_ID_TEST1,
 +						 &saved_value);
 +	if (!err)
 +		nsim_dev->test1 = saved_value.vbool;
 +}
 +
 +#define NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX 16
 +
 +static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev,
 +				      struct devlink *devlink)
 +{
 +	nsim_dev->dummy_region =
 +		devlink_region_create(devlink, "dummy",
 +				      NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX,
 +				      NSIM_DEV_DUMMY_REGION_SIZE);
 +	return PTR_ERR_OR_ZERO(nsim_dev->dummy_region);
 +}
 +
 +static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev)
 +{
 +	devlink_region_destroy(nsim_dev->dummy_region);
 +}
 +
 +struct nsim_trap_item {
 +	void *trap_ctx;
 +	enum devlink_trap_action action;
 +};
 +
 +struct nsim_trap_data {
 +	struct delayed_work trap_report_dw;
 +	struct nsim_trap_item *trap_items_arr;
 +	struct nsim_dev *nsim_dev;
 +	spinlock_t trap_lock;	/* Protects trap_items_arr */
 +};
 +
 +/* All driver-specific traps must be documented in
 + * Documentation/networking/devlink-trap-netdevsim.rst
 + */
 +enum {
 +	NSIM_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX,
 +	NSIM_TRAP_ID_FID_MISS,
 +};
 +
 +#define NSIM_TRAP_NAME_FID_MISS "fid_miss"
 +
 +#define NSIM_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
 +
 +#define NSIM_TRAP_DROP(_id, _group_id)					      \
 +	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,				      \
 +			     DEVLINK_TRAP_GROUP_GENERIC(_group_id),	      \
 +			     NSIM_TRAP_METADATA)
 +#define NSIM_TRAP_EXCEPTION(_id, _group_id)				      \
 +	DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id,			      \
 +			     DEVLINK_TRAP_GROUP_GENERIC(_group_id),	      \
 +			     NSIM_TRAP_METADATA)
 +#define NSIM_TRAP_DRIVER_EXCEPTION(_id, _group_id)			      \
 +	DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, NSIM_TRAP_ID_##_id,	      \
 +			    NSIM_TRAP_NAME_##_id,			      \
 +			    DEVLINK_TRAP_GROUP_GENERIC(_group_id),	      \
 +			    NSIM_TRAP_METADATA)
 +
 +static const struct devlink_trap nsim_traps_arr[] = {
 +	NSIM_TRAP_DROP(SMAC_MC, L2_DROPS),
 +	NSIM_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS),
 +	NSIM_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS),
 +	NSIM_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS),
 +	NSIM_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS),
 +	NSIM_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS),
 +	NSIM_TRAP_DRIVER_EXCEPTION(FID_MISS, L2_DROPS),
 +	NSIM_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS),
 +	NSIM_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS),
 +	NSIM_TRAP_DROP(TAIL_DROP, BUFFER_DROPS),
 +};
 +
 +#define NSIM_TRAP_L4_DATA_LEN 100
 +
 +static struct sk_buff *nsim_dev_trap_skb_build(void)
 +{
 +	int tot_len, data_len = NSIM_TRAP_L4_DATA_LEN;
 +	struct sk_buff *skb;
 +	struct udphdr *udph;
 +	struct ethhdr *eth;
 +	struct iphdr *iph;
 +
 +	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
 +	if (!skb)
 +		return NULL;
 +	tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len;
 +
 +	eth = skb_put(skb, sizeof(struct ethhdr));
 +	eth_random_addr(eth->h_dest);
 +	eth_random_addr(eth->h_source);
 +	eth->h_proto = htons(ETH_P_IP);
 +	skb->protocol = htons(ETH_P_IP);
 +
 +	iph = skb_put(skb, sizeof(struct iphdr));
 +	iph->protocol = IPPROTO_UDP;
 +	iph->saddr = in_aton("192.0.2.1");
 +	iph->daddr = in_aton("198.51.100.1");
 +	iph->version = 0x4;
 +	iph->frag_off = 0;
 +	iph->ihl = 0x5;
 +	iph->tot_len = htons(tot_len);
 +	iph->ttl = 100;
 +	ip_send_check(iph);
 +
 +	udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len);
 +	get_random_bytes(&udph->source, sizeof(u16));
 +	get_random_bytes(&udph->dest, sizeof(u16));
 +	udph->len = htons(sizeof(struct udphdr) + data_len);
 +
 +	return skb;
 +}
 +
 +static void nsim_dev_trap_report(struct nsim_dev_port *nsim_dev_port)
 +{
 +	struct nsim_dev *nsim_dev = nsim_dev_port->ns->nsim_dev;
 +	struct devlink *devlink = priv_to_devlink(nsim_dev);
 +	struct nsim_trap_data *nsim_trap_data;
 +	int i;
 +
 +	nsim_trap_data = nsim_dev->trap_data;
 +
 +	spin_lock(&nsim_trap_data->trap_lock);
 +	for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) {
 +		struct nsim_trap_item *nsim_trap_item;
 +		struct sk_buff *skb;
 +
 +		nsim_trap_item = &nsim_trap_data->trap_items_arr[i];
 +		if (nsim_trap_item->action == DEVLINK_TRAP_ACTION_DROP)
 +			continue;
 +
 +		skb = nsim_dev_trap_skb_build();
 +		if (!skb)
 +			continue;
 +		skb->dev = nsim_dev_port->ns->netdev;
 +
 +		/* Trapped packets are usually passed to devlink in softIRQ,
 +		 * but in this case they are generated in a workqueue. Disable
 +		 * softIRQs to prevent lockdep from complaining about
 +		 * "incosistent lock state".
 +		 */
 +		local_bh_disable();
 +		devlink_trap_report(devlink, skb, nsim_trap_item->trap_ctx,
 +				    &nsim_dev_port->devlink_port);
 +		local_bh_enable();
 +		consume_skb(skb);
 +	}
 +	spin_unlock(&nsim_trap_data->trap_lock);
 +}
 +
 +#define NSIM_TRAP_REPORT_INTERVAL_MS	100
 +
 +static void nsim_dev_trap_report_work(struct work_struct *work)
 +{
 +	struct nsim_trap_data *nsim_trap_data;
 +	struct nsim_dev_port *nsim_dev_port;
 +	struct nsim_dev *nsim_dev;
 +
 +	nsim_trap_data = container_of(work, struct nsim_trap_data,
 +				      trap_report_dw.work);
 +	nsim_dev = nsim_trap_data->nsim_dev;
 +
 +	/* For each running port and enabled packet trap, generate a UDP
 +	 * packet with a random 5-tuple and report it.
 +	 */
 +	mutex_lock(&nsim_dev->port_list_lock);
 +	list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) {
 +		if (!netif_running(nsim_dev_port->ns->netdev))
 +			continue;
 +
 +		nsim_dev_trap_report(nsim_dev_port);
 +	}
 +	mutex_unlock(&nsim_dev->port_list_lock);
 +
 +	schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw,
 +			      msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS));
 +}
 +
 +static int nsim_dev_traps_init(struct devlink *devlink)
 +{
 +	struct nsim_dev *nsim_dev = devlink_priv(devlink);
 +	struct nsim_trap_data *nsim_trap_data;
 +	int err;
 +
 +	nsim_trap_data = kzalloc(sizeof(*nsim_trap_data), GFP_KERNEL);
 +	if (!nsim_trap_data)
 +		return -ENOMEM;
 +
 +	nsim_trap_data->trap_items_arr = kcalloc(ARRAY_SIZE(nsim_traps_arr),
 +						 sizeof(struct nsim_trap_item),
 +						 GFP_KERNEL);
 +	if (!nsim_trap_data->trap_items_arr) {
 +		err = -ENOMEM;
 +		goto err_trap_data_free;
 +	}
 +
 +	/* The lock is used to protect the action state of the registered
 +	 * traps. The value is written by user and read in delayed work when
 +	 * iterating over all the traps.
 +	 */
 +	spin_lock_init(&nsim_trap_data->trap_lock);
 +	nsim_trap_data->nsim_dev = nsim_dev;
 +	nsim_dev->trap_data = nsim_trap_data;
 +
 +	err = devlink_traps_register(devlink, nsim_traps_arr,
 +				     ARRAY_SIZE(nsim_traps_arr), NULL);
 +	if (err)
 +		goto err_trap_items_free;
 +
 +	INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw,
 +			  nsim_dev_trap_report_work);
 +	schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw,
 +			      msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS));
 +
 +	return 0;
 +
 +err_trap_items_free:
 +	kfree(nsim_trap_data->trap_items_arr);
 +err_trap_data_free:
 +	kfree(nsim_trap_data);
 +	return err;
 +}
 +
 +static void nsim_dev_traps_exit(struct devlink *devlink)
 +{
 +	struct nsim_dev *nsim_dev = devlink_priv(devlink);
 +
 +	cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw);
 +	devlink_traps_unregister(devlink, nsim_traps_arr,
 +				 ARRAY_SIZE(nsim_traps_arr));
 +	kfree(nsim_dev->trap_data->trap_items_arr);
 +	kfree(nsim_dev->trap_data);
 +}
 +
  static int nsim_dev_reload(struct devlink *devlink,
  			   struct netlink_ext_ack *extack)
  {
- 	struct nsim_dev *nsim_dev = devlink_priv(devlink);
  	enum nsim_resource_id res_ids[] = {
  		NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
  		NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
  	};
+ 	struct net *net = nsim_devlink_net(devlink);
  	int i;
  
  	for (i = 0; i < ARRAY_SIZE(res_ids); ++i) {
@@@ -539,13 -209,11 +536,12 @@@
  
  		err = devlink_resource_size_get(devlink, res_ids[i], &val);
  		if (!err) {
- 			err = nsim_fib_set_max(nsim_dev->fib_data,
- 					       res_ids[i], val, extack);
+ 			err = nsim_fib_set_max(net, res_ids[i], val, extack);
  			if (err)
  				return err;
  		}
  	}
 +	nsim_devlink_param_load_driverinit_values(devlink);
  
  	return 0;
  }
@@@ -590,66 -258,11 +586,66 @@@ static int nsim_dev_flash_update(struc
  	return 0;
  }
  
 +static struct nsim_trap_item *
 +nsim_dev_trap_item_lookup(struct nsim_dev *nsim_dev, u16 trap_id)
 +{
 +	struct nsim_trap_data *nsim_trap_data = nsim_dev->trap_data;
 +	int i;
 +
 +	for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) {
 +		if (nsim_traps_arr[i].id == trap_id)
 +			return &nsim_trap_data->trap_items_arr[i];
 +	}
 +
 +	return NULL;
 +}
 +
 +static int nsim_dev_devlink_trap_init(struct devlink *devlink,
 +				      const struct devlink_trap *trap,
 +				      void *trap_ctx)
 +{
 +	struct nsim_dev *nsim_dev = devlink_priv(devlink);
 +	struct nsim_trap_item *nsim_trap_item;
 +
 +	nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id);
 +	if (WARN_ON(!nsim_trap_item))
 +		return -ENOENT;
 +
 +	nsim_trap_item->trap_ctx = trap_ctx;
 +	nsim_trap_item->action = trap->init_action;
 +
 +	return 0;
 +}
 +
 +static int
 +nsim_dev_devlink_trap_action_set(struct devlink *devlink,
 +				 const struct devlink_trap *trap,
 +				 enum devlink_trap_action action)
 +{
 +	struct nsim_dev *nsim_dev = devlink_priv(devlink);
 +	struct nsim_trap_item *nsim_trap_item;
 +
 +	nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id);
 +	if (WARN_ON(!nsim_trap_item))
 +		return -ENOENT;
 +
 +	spin_lock(&nsim_dev->trap_data->trap_lock);
 +	nsim_trap_item->action = action;
 +	spin_unlock(&nsim_dev->trap_data->trap_lock);
 +
 +	return 0;
 +}
 +
  static const struct devlink_ops nsim_dev_devlink_ops = {
  	.reload = nsim_dev_reload,
  	.flash_update = nsim_dev_flash_update,
 +	.trap_init = nsim_dev_devlink_trap_init,
 +	.trap_action_set = nsim_dev_devlink_trap_action_set,
  };
  
 +#define NSIM_DEV_MAX_MACS_DEFAULT 32
 +#define NSIM_DEV_TEST1_DEFAULT true
 +
  static struct nsim_dev *
  nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
  {
@@@ -667,63 -280,31 +663,55 @@@
  	INIT_LIST_HEAD(&nsim_dev->port_list);
  	mutex_init(&nsim_dev->port_list_lock);
  	nsim_dev->fw_update_status = true;
 +	nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT;
 +	nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT;
  
- 	nsim_dev->fib_data = nsim_fib_create();
- 	if (IS_ERR(nsim_dev->fib_data)) {
- 		err = PTR_ERR(nsim_dev->fib_data);
- 		goto err_devlink_free;
- 	}
- 
  	err = nsim_dev_resources_register(devlink);
  	if (err)
- 		goto err_fib_destroy;
+ 		goto err_devlink_free;
  
  	err = devlink_register(devlink, &nsim_bus_dev->dev);
  	if (err)
  		goto err_resources_unregister;
  
 -	err = nsim_dev_debugfs_init(nsim_dev);
 +	err = devlink_params_register(devlink, nsim_devlink_params,
 +				      ARRAY_SIZE(nsim_devlink_params));
  	if (err)
  		goto err_dl_unregister;
 +	nsim_devlink_set_params_init_values(nsim_dev, devlink);
 +
 +	err = nsim_dev_dummy_region_init(nsim_dev, devlink);
 +	if (err)
 +		goto err_params_unregister;
 +
 +	err = nsim_dev_traps_init(devlink);
 +	if (err)
 +		goto err_dummy_region_exit;
 +
 +	err = nsim_dev_debugfs_init(nsim_dev);
 +	if (err)
 +		goto err_traps_exit;
  
  	err = nsim_bpf_dev_init(nsim_dev);
  	if (err)
  		goto err_debugfs_exit;
  
 +	devlink_params_publish(devlink);
  	return nsim_dev;
  
  err_debugfs_exit:
  	nsim_dev_debugfs_exit(nsim_dev);
 +err_traps_exit:
 +	nsim_dev_traps_exit(devlink);
 +err_dummy_region_exit:
 +	nsim_dev_dummy_region_exit(nsim_dev);
 +err_params_unregister:
 +	devlink_params_unregister(devlink, nsim_devlink_params,
 +				  ARRAY_SIZE(nsim_devlink_params));
  err_dl_unregister:
  	devlink_unregister(devlink);
  err_resources_unregister:
  	devlink_resources_unregister(devlink, NULL);
- err_fib_destroy:
- 	nsim_fib_destroy(nsim_dev->fib_data);
  err_devlink_free:
  	devlink_free(devlink);
  	return ERR_PTR(err);
@@@ -735,13 -316,8 +723,12 @@@ static void nsim_dev_destroy(struct nsi
  
  	nsim_bpf_dev_exit(nsim_dev);
  	nsim_dev_debugfs_exit(nsim_dev);
 +	nsim_dev_traps_exit(devlink);
 +	nsim_dev_dummy_region_exit(nsim_dev);
 +	devlink_params_unregister(devlink, nsim_devlink_params,
 +				  ARRAY_SIZE(nsim_devlink_params));
  	devlink_unregister(devlink);
  	devlink_resources_unregister(devlink, NULL);
- 	nsim_fib_destroy(nsim_dev->fib_data);
  	mutex_destroy(&nsim_dev->port_list_lock);
  	devlink_free(devlink);
  }
diff --combined drivers/net/netdevsim/netdevsim.h
index 262a6978bbca,9404637d34b7..66bf13765ad0
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@@ -145,7 -145,6 +145,7 @@@ struct nsim_dev_port 
  struct nsim_dev {
  	struct nsim_bus_dev *nsim_bus_dev;
  	struct nsim_fib_data *fib_data;
 +	struct nsim_trap_data *trap_data;
  	struct dentry *ddir;
  	struct dentry *ports_ddir;
  	struct bpf_offload_dev *bpf_dev;
@@@ -159,9 -158,6 +159,9 @@@
  	struct list_head port_list;
  	struct mutex port_list_lock; /* protects port list */
  	bool fw_update_status;
 +	u32 max_macs;
 +	bool test1;
 +	struct devlink_region *dummy_region;
  };
  
  int nsim_dev_init(void);
@@@ -173,12 -169,10 +173,10 @@@ int nsim_dev_port_add(struct nsim_bus_d
  int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
  		      unsigned int port_index);
  
- struct nsim_fib_data *nsim_fib_create(void);
- void nsim_fib_destroy(struct nsim_fib_data *fib_data);
- u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
- 		     enum nsim_resource_id res_id, bool max);
- int nsim_fib_set_max(struct nsim_fib_data *fib_data,
- 		     enum nsim_resource_id res_id, u64 val,
+ int nsim_fib_init(void);
+ void nsim_fib_exit(void);
+ u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max);
+ int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val,
  		     struct netlink_ext_ack *extack);
  
  #if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
diff --combined drivers/net/phy/at803x.c
index d98aa56710a9,6ad8b1c63c34..2aa7b2e60046
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@@ -249,40 -249,28 +249,24 @@@ static int at803x_config_init(struct ph
  {
  	int ret;
  
 -	ret = genphy_config_init(phydev);
 -	if (ret < 0)
 -		return ret;
 -
  	/* The RX and TX delay default is:
  	 *   after HW reset: RX delay enabled and TX delay disabled
  	 *   after SW reset: RX delay enabled, while TX delay retains the
  	 *   value before reset.
- 	 *
- 	 * So let's first disable the RX and TX delays in PHY and enable
- 	 * them based on the mode selected (this also takes care of RGMII
- 	 * mode where we expect delays to be disabled)
  	 */
- 
- 	ret = at803x_disable_rx_delay(phydev);
- 	if (ret < 0)
- 		return ret;
- 	ret = at803x_disable_tx_delay(phydev);
- 	if (ret < 0)
- 		return ret;
- 
  	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
- 	    phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
- 		/* If RGMII_ID or RGMII_RXID are specified enable RX delay,
- 		 * otherwise keep it disabled
- 		 */
+ 	    phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
  		ret = at803x_enable_rx_delay(phydev);
- 		if (ret < 0)
- 			return ret;
- 	}
+ 	else
+ 		ret = at803x_disable_rx_delay(phydev);
+ 	if (ret < 0)
+ 		return ret;
  
  	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
- 	    phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
- 		/* If RGMII_ID or RGMII_TXID are specified enable TX delay,
- 		 * otherwise keep it disabled
- 		 */
+ 	    phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
  		ret = at803x_enable_tx_delay(phydev);
- 	}
+ 	else
+ 		ret = at803x_disable_tx_delay(phydev);
  
  	return ret;
  }
diff --combined drivers/net/phy/phy_device.c
index d5db7604d7c4,27ebc2c6c2d0..d347ddcac45b
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@@ -1564,20 -1564,24 +1564,20 @@@ EXPORT_SYMBOL(phy_reset_after_clk_enabl
   */
  static int genphy_config_advert(struct phy_device *phydev)
  {
 -	u32 advertise;
 -	int bmsr, adv;
 -	int err, changed = 0;
 +	int err, bmsr, changed = 0;
 +	u32 adv;
  
  	/* Only allow advertising what this PHY supports */
  	linkmode_and(phydev->advertising, phydev->advertising,
  		     phydev->supported);
 -	if (!ethtool_convert_link_mode_to_legacy_u32(&advertise,
 -						     phydev->advertising))
 -		phydev_warn(phydev, "PHY advertising (%*pb) more modes than genphy supports, some modes not advertised.\n",
 -			    __ETHTOOL_LINK_MODE_MASK_NBITS,
 -			    phydev->advertising);
 +
 +	adv = linkmode_adv_to_mii_adv_t(phydev->advertising);
  
  	/* Setup standard advertisement */
  	err = phy_modify_changed(phydev, MII_ADVERTISE,
  				 ADVERTISE_ALL | ADVERTISE_100BASE4 |
  				 ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM,
 -				 ethtool_adv_to_mii_adv_t(advertise));
 +				 adv);
  	if (err < 0)
  		return err;
  	if (err > 0)
@@@ -1594,7 -1598,13 +1594,7 @@@
  	if (!(bmsr & BMSR_ESTATEN))
  		return changed;
  
 -	/* Configure gigabit if it's supported */
 -	adv = 0;
 -	if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
 -			      phydev->supported) ||
 -	    linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
 -			      phydev->supported))
 -		adv = ethtool_adv_to_mii_ctrl1000_t(advertise);
 +	adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
  
  	err = phy_modify_changed(phydev, MII_CTRL1000,
  				 ADVERTISE_1000FULL | ADVERTISE_1000HALF,
@@@ -1671,20 -1681,18 +1671,20 @@@ int genphy_restart_aneg(struct phy_devi
  EXPORT_SYMBOL(genphy_restart_aneg);
  
  /**
 - * genphy_config_aneg - restart auto-negotiation or write BMCR
 + * __genphy_config_aneg - restart auto-negotiation or write BMCR
   * @phydev: target phy_device struct
 + * @changed: whether autoneg is requested
   *
   * Description: If auto-negotiation is enabled, we configure the
   *   advertising, and then restart auto-negotiation.  If it is not
   *   enabled, then we write the BMCR.
   */
 -int genphy_config_aneg(struct phy_device *phydev)
 +int __genphy_config_aneg(struct phy_device *phydev, bool changed)
  {
 -	int err, changed;
 +	int err;
  
 -	changed = genphy_config_eee_advert(phydev);
 +	if (genphy_config_eee_advert(phydev))
 +		changed = true;
  
  	if (AUTONEG_ENABLE != phydev->autoneg)
  		return genphy_setup_forced(phydev);
@@@ -1692,10 -1700,10 +1692,10 @@@
  	err = genphy_config_advert(phydev);
  	if (err < 0) /* error */
  		return err;
 +	else if (err)
 +		changed = true;
  
 -	changed |= err;
 -
 -	if (changed == 0) {
 +	if (!changed) {
  		/* Advertisement hasn't changed, but maybe aneg was never on to
  		 * begin with?  Or maybe phy was isolated?
  		 */
@@@ -1705,15 -1713,18 +1705,15 @@@
  			return ctl;
  
  		if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
 -			changed = 1; /* do restart aneg */
 +			changed = true; /* do restart aneg */
  	}
  
  	/* Only restart aneg if we are advertising something different
  	 * than we were before.
  	 */
 -	if (changed > 0)
 -		return genphy_restart_aneg(phydev);
 -
 -	return 0;
 +	return changed ? genphy_restart_aneg(phydev) : 0;
  }
 -EXPORT_SYMBOL(genphy_config_aneg);
 +EXPORT_SYMBOL(__genphy_config_aneg);
  
  /**
   * genphy_aneg_done - return auto-negotiation status
@@@ -1741,7 -1752,17 +1741,17 @@@ EXPORT_SYMBOL(genphy_aneg_done)
   */
  int genphy_update_link(struct phy_device *phydev)
  {
- 	int status;
+ 	int status = 0, bmcr;
+ 
+ 	bmcr = phy_read(phydev, MII_BMCR);
+ 	if (bmcr < 0)
+ 		return bmcr;
+ 
+ 	/* Autoneg is being started, therefore disregard BMSR value and
+ 	 * report link as down.
+ 	 */
+ 	if (bmcr & BMCR_ANRESTART)
+ 		goto done;
  
  	/* The link state is latched low so that momentary link
  	 * drops can be detected. Do not double-read the status
@@@ -1784,7 -1805,7 +1794,7 @@@ EXPORT_SYMBOL(genphy_update_link)
   */
  int genphy_read_status(struct phy_device *phydev)
  {
 -	int adv, lpa, lpagb, err, old_link = phydev->link;
 +	int lpa, lpagb, err, old_link = phydev->link;
  
  	/* Update the link, but return if there was an error */
  	err = genphy_update_link(phydev);
@@@ -1800,18 -1821,19 +1810,18 @@@
  	phydev->pause = 0;
  	phydev->asym_pause = 0;
  
 -	linkmode_zero(phydev->lp_advertising);
 -
  	if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
  		if (phydev->is_gigabit_capable) {
  			lpagb = phy_read(phydev, MII_STAT1000);
  			if (lpagb < 0)
  				return lpagb;
  
 -			adv = phy_read(phydev, MII_CTRL1000);
 -			if (adv < 0)
 -				return adv;
 -
  			if (lpagb & LPA_1000MSFAIL) {
 +				int adv = phy_read(phydev, MII_CTRL1000);
 +
 +				if (adv < 0)
 +					return adv;
 +
  				if (adv & CTL1000_ENABLE_MASTER)
  					phydev_err(phydev, "Master/Slave resolution failed, maybe conflicting manual settings?\n");
  				else
@@@ -1885,6 -1907,57 +1895,6 @@@ int genphy_soft_reset(struct phy_devic
  }
  EXPORT_SYMBOL(genphy_soft_reset);
  
 -int genphy_config_init(struct phy_device *phydev)
 -{
 -	int val;
 -	__ETHTOOL_DECLARE_LINK_MODE_MASK(features) = { 0, };
 -
 -	linkmode_set_bit_array(phy_basic_ports_array,
 -			       ARRAY_SIZE(phy_basic_ports_array),
 -			       features);
 -	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, features);
 -	linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, features);
 -
 -	/* Do we support autonegotiation? */
 -	val = phy_read(phydev, MII_BMSR);
 -	if (val < 0)
 -		return val;
 -
 -	if (val & BMSR_ANEGCAPABLE)
 -		linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, features);
 -
 -	if (val & BMSR_100FULL)
 -		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, features);
 -	if (val & BMSR_100HALF)
 -		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, features);
 -	if (val & BMSR_10FULL)
 -		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, features);
 -	if (val & BMSR_10HALF)
 -		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, features);
 -
 -	if (val & BMSR_ESTATEN) {
 -		val = phy_read(phydev, MII_ESTATUS);
 -		if (val < 0)
 -			return val;
 -
 -		if (val & ESTATUS_1000_TFULL)
 -			linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
 -					 features);
 -		if (val & ESTATUS_1000_THALF)
 -			linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
 -					 features);
 -		if (val & ESTATUS_1000_XFULL)
 -			linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
 -					 features);
 -	}
 -
 -	linkmode_and(phydev->supported, phydev->supported, features);
 -	linkmode_and(phydev->advertising, phydev->advertising, features);
 -
 -	return 0;
 -}
 -EXPORT_SYMBOL(genphy_config_init);
 -
  /**
   * genphy_read_abilities - read PHY abilities from Clause 22 registers
   * @phydev: target phy_device struct
diff --combined drivers/net/usb/lan78xx.c
index 769bb262fbec,f033fee225a1..58f5a219fb65
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@@ -1258,7 -1258,8 +1258,7 @@@ static void lan78xx_status(struct lan78
  		return;
  	}
  
 -	memcpy(&intdata, urb->transfer_buffer, 4);
 -	le32_to_cpus(&intdata);
 +	intdata = get_unaligned_le32(urb->transfer_buffer);
  
  	if (intdata & INT_ENP_PHY_INT) {
  		netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
@@@ -2729,7 -2730,6 +2729,7 @@@ static struct sk_buff *lan78xx_tx_prep(
  				       struct sk_buff *skb, gfp_t flags)
  {
  	u32 tx_cmd_a, tx_cmd_b;
 +	void *ptr;
  
  	if (skb_cow_head(skb, TX_OVERHEAD)) {
  		dev_kfree_skb_any(skb);
@@@ -2758,9 -2758,13 +2758,9 @@@
  		tx_cmd_b |= skb_vlan_tag_get(skb) & TX_CMD_B_VTAG_MASK_;
  	}
  
 -	skb_push(skb, 4);
 -	cpu_to_le32s(&tx_cmd_b);
 -	memcpy(skb->data, &tx_cmd_b, 4);
 -
 -	skb_push(skb, 4);
 -	cpu_to_le32s(&tx_cmd_a);
 -	memcpy(skb->data, &tx_cmd_a, 4);
 +	ptr = skb_push(skb, 8);
 +	put_unaligned_le32(tx_cmd_a, ptr);
 +	put_unaligned_le32(tx_cmd_b, ptr + 4);
  
  	return skb;
  }
@@@ -3101,13 -3105,16 +3101,13 @@@ static int lan78xx_rx(struct lan78xx_ne
  		struct sk_buff *skb2;
  		unsigned char *packet;
  
 -		memcpy(&rx_cmd_a, skb->data, sizeof(rx_cmd_a));
 -		le32_to_cpus(&rx_cmd_a);
 +		rx_cmd_a = get_unaligned_le32(skb->data);
  		skb_pull(skb, sizeof(rx_cmd_a));
  
 -		memcpy(&rx_cmd_b, skb->data, sizeof(rx_cmd_b));
 -		le32_to_cpus(&rx_cmd_b);
 +		rx_cmd_b = get_unaligned_le32(skb->data);
  		skb_pull(skb, sizeof(rx_cmd_b));
  
 -		memcpy(&rx_cmd_c, skb->data, sizeof(rx_cmd_c));
 -		le16_to_cpus(&rx_cmd_c);
 +		rx_cmd_c = get_unaligned_le16(skb->data);
  		skb_pull(skb, sizeof(rx_cmd_c));
  
  		packet = skb->data;
@@@ -3785,7 -3792,7 +3785,7 @@@ static int lan78xx_probe(struct usb_int
  	ret = register_netdev(netdev);
  	if (ret != 0) {
  		netif_err(dev, probe, netdev, "couldn't register the device\n");
- 		goto out3;
+ 		goto out4;
  	}
  
  	usb_set_intfdata(intf, dev);
@@@ -3800,12 -3807,14 +3800,14 @@@
  
  	ret = lan78xx_phy_init(dev);
  	if (ret < 0)
- 		goto out4;
+ 		goto out5;
  
  	return 0;
  
- out4:
+ out5:
  	unregister_netdev(netdev);
+ out4:
+ 	usb_free_urb(dev->urb_intr);
  out3:
  	lan78xx_unbind(dev, intf);
  out2:
diff --combined drivers/net/xen-netback/netback.c
index 4679fcf1a1c4,c9262ffeefe4..0020b2e8c279
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@@ -136,12 -136,12 +136,12 @@@ static inline struct xenvif_queue *ubuf
  
  static u16 frag_get_pending_idx(skb_frag_t *frag)
  {
 -	return (u16)frag->page_offset;
 +	return (u16)skb_frag_off(frag);
  }
  
  static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
  {
 -	frag->page_offset = pending_idx;
 +	skb_frag_off_set(frag, pending_idx);
  }
  
  static inline pending_ring_idx_t pending_index(unsigned i)
@@@ -925,6 -925,7 +925,7 @@@ static void xenvif_tx_build_gops(struc
  			skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
  			nskb = xenvif_alloc_skb(0);
  			if (unlikely(nskb == NULL)) {
+ 				skb_shinfo(skb)->nr_frags = 0;
  				kfree_skb(skb);
  				xenvif_tx_err(queue, &txreq, extra_count, idx);
  				if (net_ratelimit())
@@@ -940,6 -941,7 +941,7 @@@
  
  			if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
  				/* Failure in xenvif_set_skb_gso is fatal. */
+ 				skb_shinfo(skb)->nr_frags = 0;
  				kfree_skb(skb);
  				kfree_skb(nskb);
  				break;
@@@ -1055,7 -1057,7 +1057,7 @@@ static int xenvif_handle_frag_list(stru
  			int j;
  			skb->truesize += skb->data_len;
  			for (j = 0; j < i; j++)
 -				put_page(frags[j].page.p);
 +				put_page(skb_frag_page(&frags[j]));
  			return -ENOMEM;
  		}
  
@@@ -1067,8 -1069,8 +1069,8 @@@
  			BUG();
  
  		offset += len;
 -		frags[i].page.p = page;
 -		frags[i].page_offset = 0;
 +		__skb_frag_set_page(&frags[i], page);
 +		skb_frag_off_set(&frags[i], 0);
  		skb_frag_size_set(&frags[i], len);
  	}
  
@@@ -1653,6 -1655,9 +1655,6 @@@ static int __init netback_init(void
  
  #ifdef CONFIG_DEBUG_FS
  	xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
 -	if (IS_ERR_OR_NULL(xen_netback_dbg_root))
 -		pr_warn("Init of debugfs returned %ld!\n",
 -			PTR_ERR(xen_netback_dbg_root));
  #endif /* CONFIG_DEBUG_FS */
  
  	return 0;
diff --combined drivers/s390/net/qeth_core_main.c
index 5aa0f1268bca,9c3310c4d61d..0803070246aa
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@@ -544,6 -544,7 +544,7 @@@ static struct qeth_reply *qeth_alloc_re
  	if (reply) {
  		refcount_set(&reply->refcnt, 1);
  		init_completion(&reply->received);
+ 		spin_lock_init(&reply->lock);
  	}
  	return reply;
  }
@@@ -799,6 -800,13 +800,13 @@@ static void qeth_issue_next_read_cb(str
  
  	if (!reply->callback) {
  		rc = 0;
+ 		goto no_callback;
+ 	}
+ 
+ 	spin_lock_irqsave(&reply->lock, flags);
+ 	if (reply->rc) {
+ 		/* Bail out when the requestor has already left: */
+ 		rc = reply->rc;
  	} else {
  		if (cmd) {
  			reply->offset = (u16)((char *)cmd - (char *)iob->data);
@@@ -807,7 -815,9 +815,9 @@@
  			rc = reply->callback(card, reply, (unsigned long)iob);
  		}
  	}
+ 	spin_unlock_irqrestore(&reply->lock, flags);
  
+ no_callback:
  	if (rc <= 0)
  		qeth_notify_reply(reply, rc);
  	qeth_put_reply(reply);
@@@ -1749,6 -1759,16 +1759,16 @@@ static int qeth_send_control_data(struc
  		rc = (timeout == -ERESTARTSYS) ? -EINTR : -ETIME;
  
  	qeth_dequeue_reply(card, reply);
+ 
+ 	if (reply_cb) {
+ 		/* Wait until the callback for a late reply has completed: */
+ 		spin_lock_irq(&reply->lock);
+ 		if (rc)
+ 			/* Zap any callback that's still pending: */
+ 			reply->rc = rc;
+ 		spin_unlock_irq(&reply->lock);
+ 	}
+ 
  	if (!rc)
  		rc = reply->rc;
  	qeth_put_reply(reply);
@@@ -3515,7 -3535,7 +3535,7 @@@ static int qeth_get_elements_for_frags(
  	int cnt, elements = 0;
  
  	for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) {
 -		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[cnt];
 +		skb_frag_t *frag = &skb_shinfo(skb)->frags[cnt];
  
  		elements += qeth_get_elements_for_range(
  			(addr_t)skb_frag_address(frag),
diff --combined drivers/staging/unisys/visornic/visornic_main.c
index 6fa7726185de,40dd573e73c3..1d1440d43002
--- a/drivers/staging/unisys/visornic/visornic_main.c
+++ b/drivers/staging/unisys/visornic/visornic_main.c
@@@ -284,9 -284,9 +284,9 @@@ static int visor_copy_fragsinfo_from_sk
  		for (frag = 0; frag < numfrags; frag++) {
  			count = add_physinfo_entries(page_to_pfn(
  				  skb_frag_page(&skb_shinfo(skb)->frags[frag])),
 -				  skb_shinfo(skb)->frags[frag].page_offset,
 -				  skb_shinfo(skb)->frags[frag].size, count,
 -				  frags_max, frags);
 +				  skb_frag_off(&skb_shinfo(skb)->frags[frag]),
 +				  skb_frag_size(&skb_shinfo(skb)->frags[frag]),
 +				  count, frags_max, frags);
  			/* add_physinfo_entries only returns
  			 * zero if the frags array is out of room
  			 * That should never happen because we
@@@ -1750,7 -1750,8 +1750,8 @@@ static int visornic_poll(struct napi_st
  }
  
  /* poll_for_irq	- checks the status of the response queue
-  * @v: Void pointer to the visronic devdata struct.
+  * @t: pointer to the 'struct timer_list' from which we can retrieve the
+  *     the visornic devdata struct.
   *
   * Main function of the vnic_incoming thread. Periodically check the response
   * queue and drain it if needed.
diff --combined include/linux/mlx5/mlx5_ifc.h
index da5e7eaed438,b8b570c30b5e..a66ed0abe40e
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@@ -1040,21 -1040,6 +1040,21 @@@ enum 
  	MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1,
  };
  
 +#define MLX5_FC_BULK_SIZE_FACTOR 128
 +
 +enum mlx5_fc_bulk_alloc_bitmask {
 +	MLX5_FC_BULK_128   = (1 << 0),
 +	MLX5_FC_BULK_256   = (1 << 1),
 +	MLX5_FC_BULK_512   = (1 << 2),
 +	MLX5_FC_BULK_1024  = (1 << 3),
 +	MLX5_FC_BULK_2048  = (1 << 4),
 +	MLX5_FC_BULK_4096  = (1 << 5),
 +	MLX5_FC_BULK_8192  = (1 << 6),
 +	MLX5_FC_BULK_16384 = (1 << 7),
 +};
 +
 +#define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum))
 +
  struct mlx5_ifc_cmd_hca_cap_bits {
  	u8         reserved_at_0[0x30];
  	u8         vhca_id[0x10];
@@@ -1259,8 -1244,7 +1259,8 @@@
  	u8         reserved_at_2e0[0x7];
  	u8         max_qp_mcg[0x19];
  
 -	u8         reserved_at_300[0x18];
 +	u8         reserved_at_300[0x10];
 +	u8         flow_counter_bulk_alloc[0x8];
  	u8         log_max_mcg[0x8];
  
  	u8         reserved_at_320[0x3];
@@@ -2782,7 -2766,7 +2782,7 @@@ struct mlx5_ifc_traffic_counter_bits 
  struct mlx5_ifc_tisc_bits {
  	u8         strict_lag_tx_port_affinity[0x1];
  	u8         tls_en[0x1];
 -	u8         reserved_at_1[0x2];
 +	u8         reserved_at_2[0x2];
  	u8         lag_tx_port_affinity[0x04];
  
  	u8         reserved_at_8[0x4];
@@@ -2957,13 -2941,6 +2957,13 @@@ enum 
  	SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
  };
  
 +enum {
 +	ELEMENT_TYPE_CAP_MASK_TASR		= 1 << 0,
 +	ELEMENT_TYPE_CAP_MASK_VPORT		= 1 << 1,
 +	ELEMENT_TYPE_CAP_MASK_VPORT_TC		= 1 << 2,
 +	ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC	= 1 << 3,
 +};
 +
  struct mlx5_ifc_scheduling_context_bits {
  	u8         element_type[0x8];
  	u8         reserved_at_8[0x18];
@@@ -7840,8 -7817,7 +7840,8 @@@ struct mlx5_ifc_alloc_flow_counter_in_b
  	u8         reserved_at_20[0x10];
  	u8         op_mod[0x10];
  
 -	u8         reserved_at_40[0x40];
 +	u8         reserved_at_40[0x38];
 +	u8         flow_counter_bulk[0x8];
  };
  
  struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
@@@ -10078,9 -10054,8 +10078,8 @@@ struct mlx5_ifc_tls_static_params_bits 
  };
  
  struct mlx5_ifc_tls_progress_params_bits {
- 	u8         valid[0x1];
- 	u8         reserved_at_1[0x7];
- 	u8         pd[0x18];
+ 	u8         reserved_at_0[0x8];
+ 	u8         tisn[0x18];
  
  	u8         next_record_tcp_sn[0x20];
  
diff --combined include/linux/skbuff.h
index 7eb28b72d9ba,ba5583522d24..77c6dc88e95d
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -14,7 -14,6 +14,7 @@@
  #include <linux/compiler.h>
  #include <linux/time.h>
  #include <linux/bug.h>
 +#include <linux/bvec.h>
  #include <linux/cache.h>
  #include <linux/rbtree.h>
  #include <linux/socket.h>
@@@ -309,45 -308,58 +309,45 @@@ extern int sysctl_max_skb_frags
   */
  #define GSO_BY_FRAGS	0xFFFF
  
 -typedef struct skb_frag_struct skb_frag_t;
 -
 -struct skb_frag_struct {
 -	struct {
 -		struct page *p;
 -	} page;
 -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
 -	__u32 page_offset;
 -	__u32 size;
 -#else
 -	__u16 page_offset;
 -	__u16 size;
 -#endif
 -};
 +typedef struct bio_vec skb_frag_t;
  
  /**
 - * skb_frag_size - Returns the size of a skb fragment
 + * skb_frag_size() - Returns the size of a skb fragment
   * @frag: skb fragment
   */
  static inline unsigned int skb_frag_size(const skb_frag_t *frag)
  {
 -	return frag->size;
 +	return frag->bv_len;
  }
  
  /**
 - * skb_frag_size_set - Sets the size of a skb fragment
 + * skb_frag_size_set() - Sets the size of a skb fragment
   * @frag: skb fragment
   * @size: size of fragment
   */
  static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
  {
 -	frag->size = size;
 +	frag->bv_len = size;
  }
  
  /**
 - * skb_frag_size_add - Incrementes the size of a skb fragment by %delta
 + * skb_frag_size_add() - Increments the size of a skb fragment by @delta
   * @frag: skb fragment
   * @delta: value to add
   */
  static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
  {
 -	frag->size += delta;
 +	frag->bv_len += delta;
  }
  
  /**
 - * skb_frag_size_sub - Decrements the size of a skb fragment by %delta
 + * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta
   * @frag: skb fragment
   * @delta: value to subtract
   */
  static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  {
 -	frag->size -= delta;
 +	frag->bv_len -= delta;
  }
  
  /**
@@@ -367,7 -379,7 +367,7 @@@ static inline bool skb_frag_must_loop(s
   *	skb_frag_foreach_page - loop over pages in a fragment
   *
   *	@f:		skb frag to operate on
 - *	@f_off:		offset from start of f->page.p
 + *	@f_off:		offset from start of f->bv_page
   *	@f_len:		length from f_off to loop over
   *	@p:		(temp var) current page
   *	@p_off:		(temp var) offset from start of current page,
@@@ -1271,7 -1283,7 +1271,7 @@@ static inline int skb_flow_dissector_bp
  
  struct bpf_flow_dissector;
  bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 -		      __be16 proto, int nhoff, int hlen);
 +		      __be16 proto, int nhoff, int hlen, unsigned int flags);
  
  bool __skb_flow_dissect(const struct net *net,
  			const struct sk_buff *skb,
@@@ -1362,6 -1374,14 +1362,14 @@@ static inline void skb_copy_hash(struc
  	to->l4_hash = from->l4_hash;
  };
  
+ static inline void skb_copy_decrypted(struct sk_buff *to,
+ 				      const struct sk_buff *from)
+ {
+ #ifdef CONFIG_TLS_DEVICE
+ 	to->decrypted = from->decrypted;
+ #endif
+ }
+ 
  #ifdef NET_SKBUFF_DATA_USES_OFFSET
  static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
  {
@@@ -2077,8 -2097,8 +2085,8 @@@ static inline void __skb_fill_page_desc
  	 * that not all callers have unique ownership of the page but rely
  	 * on page_is_pfmemalloc doing the right thing(tm).
  	 */
 -	frag->page.p		  = page;
 -	frag->page_offset	  = off;
 +	frag->bv_page		  = page;
 +	frag->bv_offset		  = off;
  	skb_frag_size_set(frag, size);
  
  	page = compound_head(page);
@@@ -2857,46 -2877,6 +2865,46 @@@ static inline void skb_propagate_pfmema
  		skb->pfmemalloc = true;
  }
  
 +/**
 + * skb_frag_off() - Returns the offset of a skb fragment
 + * @frag: the paged fragment
 + */
 +static inline unsigned int skb_frag_off(const skb_frag_t *frag)
 +{
 +	return frag->bv_offset;
 +}
 +
 +/**
 + * skb_frag_off_add() - Increments the offset of a skb fragment by @delta
 + * @frag: skb fragment
 + * @delta: value to add
 + */
 +static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
 +{
 +	frag->bv_offset += delta;
 +}
 +
 +/**
 + * skb_frag_off_set() - Sets the offset of a skb fragment
 + * @frag: skb fragment
 + * @offset: offset of fragment
 + */
 +static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
 +{
 +	frag->bv_offset = offset;
 +}
 +
 +/**
 + * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment
 + * @fragto: skb fragment where offset is set
 + * @fragfrom: skb fragment offset is copied from
 + */
 +static inline void skb_frag_off_copy(skb_frag_t *fragto,
 +				     const skb_frag_t *fragfrom)
 +{
 +	fragto->bv_offset = fragfrom->bv_offset;
 +}
 +
  /**
   * skb_frag_page - retrieve the page referred to by a paged fragment
   * @frag: the paged fragment
@@@ -2905,7 -2885,7 +2913,7 @@@
   */
  static inline struct page *skb_frag_page(const skb_frag_t *frag)
  {
 -	return frag->page.p;
 +	return frag->bv_page;
  }
  
  /**
@@@ -2963,7 -2943,7 +2971,7 @@@ static inline void skb_frag_unref(struc
   */
  static inline void *skb_frag_address(const skb_frag_t *frag)
  {
 -	return page_address(skb_frag_page(frag)) + frag->page_offset;
 +	return page_address(skb_frag_page(frag)) + skb_frag_off(frag);
  }
  
  /**
@@@ -2979,18 -2959,7 +2987,18 @@@ static inline void *skb_frag_address_sa
  	if (unlikely(!ptr))
  		return NULL;
  
 -	return ptr + frag->page_offset;
 +	return ptr + skb_frag_off(frag);
 +}
 +
 +/**
 + * skb_frag_page_copy() - sets the page in a fragment from another fragment
 + * @fragto: skb fragment where page is set
 + * @fragfrom: skb fragment page is copied from
 + */
 +static inline void skb_frag_page_copy(skb_frag_t *fragto,
 +				      const skb_frag_t *fragfrom)
 +{
 +	fragto->bv_page = fragfrom->bv_page;
  }
  
  /**
@@@ -3002,7 -2971,7 +3010,7 @@@
   */
  static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
  {
 -	frag->page.p = page;
 +	frag->bv_page = page;
  }
  
  /**
@@@ -3038,7 -3007,7 +3046,7 @@@ static inline dma_addr_t skb_frag_dma_m
  					  enum dma_data_direction dir)
  {
  	return dma_map_page(dev, skb_frag_page(frag),
 -			    frag->page_offset + offset, size, dir);
 +			    skb_frag_off(frag) + offset, size, dir);
  }
  
  static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
@@@ -3205,10 -3174,10 +3213,10 @@@ static inline bool skb_can_coalesce(str
  	if (skb_zcopy(skb))
  		return false;
  	if (i) {
 -		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
 +		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
  
  		return page == skb_frag_page(frag) &&
 -		       off == frag->page_offset + skb_frag_size(frag);
 +		       off == skb_frag_off(frag) + skb_frag_size(frag);
  	}
  	return false;
  }
diff --combined include/net/netfilter/nf_tables.h
index dc301e3d6739,475d6f28ca67..e73d16f8b870
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@@ -25,7 -25,6 +25,7 @@@ struct nft_pktinfo 
  	struct xt_action_param		xt;
  };
  
 +#if IS_ENABLED(CONFIG_NETFILTER)
  static inline struct net *nft_net(const struct nft_pktinfo *pkt)
  {
  	return pkt->xt.state->net;
@@@ -58,7 -57,6 +58,7 @@@ static inline void nft_set_pktinfo(stru
  	pkt->skb = skb;
  	pkt->xt.state = state;
  }
 +#endif
  
  static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
  					  struct sk_buff *skb)
@@@ -423,8 -421,7 +423,7 @@@ struct nft_set 
  	unsigned char			*udata;
  	/* runtime data below here */
  	const struct nft_set_ops	*ops ____cacheline_aligned;
- 	u16				flags:13,
- 					bound:1,
+ 	u16				flags:14,
  					genmask:2;
  	u8				klen;
  	u8				dlen;
@@@ -929,11 -926,9 +928,11 @@@ struct nft_chain_type 
  	int				family;
  	struct module			*owner;
  	unsigned int			hook_mask;
 +#if IS_ENABLED(CONFIG_NETFILTER)
  	nf_hookfn			*hooks[NF_MAX_HOOKS];
  	int				(*ops_register)(struct net *net, const struct nf_hook_ops *ops);
  	void				(*ops_unregister)(struct net *net, const struct nf_hook_ops *ops);
 +#endif
  };
  
  int nft_chain_validate_dependency(const struct nft_chain *chain,
@@@ -959,9 -954,7 +958,9 @@@ struct nft_stats 
   *	@flow_block: flow block (for hardware offload)
   */
  struct nft_base_chain {
 +#if IS_ENABLED(CONFIG_NETFILTER)
  	struct nf_hook_ops		ops;
 +#endif
  	const struct nft_chain_type	*type;
  	u8				policy;
  	u8				flags;
@@@ -1158,9 -1151,7 +1157,9 @@@ struct nft_flowtable 
  					use:30;
  	u64				handle;
  	/* runtime data below here */
 +#if IS_ENABLED(CONFIG_NETFILTER)
  	struct nf_hook_ops		*ops ____cacheline_aligned;
 +#endif
  	struct nf_flowtable		data;
  };
  
@@@ -1215,8 -1206,6 +1214,8 @@@ void nft_trace_notify(struct nft_tracei
  #define MODULE_ALIAS_NFT_OBJ(type) \
  	MODULE_ALIAS("nft-obj-" __stringify(type))
  
 +#if IS_ENABLED(CONFIG_NF_TABLES)
 +
  /*
   * The gencursor defines two generations, the currently active and the
   * next one. Objects contain a bitmask of 2 bits specifying the generations
@@@ -1290,8 -1279,6 +1289,8 @@@ static inline void nft_set_elem_change_
  	ext->genmask ^= nft_genmask_next(net);
  }
  
 +#endif /* IS_ENABLED(CONFIG_NF_TABLES) */
 +
  /*
   * We use a free bit in the genmask field to indicate the element
   * is busy, meaning it is currently being processed either by
@@@ -1360,12 -1347,15 +1359,15 @@@ struct nft_trans_rule 
  struct nft_trans_set {
  	struct nft_set			*set;
  	u32				set_id;
+ 	bool				bound;
  };
  
  #define nft_trans_set(trans)	\
  	(((struct nft_trans_set *)trans->data)->set)
  #define nft_trans_set_id(trans)	\
  	(((struct nft_trans_set *)trans->data)->set_id)
+ #define nft_trans_set_bound(trans)	\
+ 	(((struct nft_trans_set *)trans->data)->bound)
  
  struct nft_trans_chain {
  	bool				update;
@@@ -1396,12 -1386,15 +1398,15 @@@ struct nft_trans_table 
  struct nft_trans_elem {
  	struct nft_set			*set;
  	struct nft_set_elem		elem;
+ 	bool				bound;
  };
  
  #define nft_trans_elem_set(trans)	\
  	(((struct nft_trans_elem *)trans->data)->set)
  #define nft_trans_elem(trans)	\
  	(((struct nft_trans_elem *)trans->data)->elem)
+ #define nft_trans_elem_set_bound(trans)	\
+ 	(((struct nft_trans_elem *)trans->data)->bound)
  
  struct nft_trans_obj {
  	struct nft_object		*obj;
diff --combined include/net/netfilter/nf_tables_offload.h
index 8a5969d9b80b,c8b9dec376f5..db104665a9e4
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@@ -9,7 -9,6 +9,7 @@@ struct nft_offload_reg 
  	u32		len;
  	u32		base_offset;
  	u32		offset;
 +	struct nft_data data;
  	struct nft_data	mask;
  };
  
@@@ -64,10 -63,6 +64,10 @@@ struct nft_rule
  struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule);
  void nft_flow_rule_destroy(struct nft_flow_rule *flow);
  int nft_flow_rule_offload_commit(struct net *net);
 +void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
 +				    flow_indr_block_bind_cb_t *cb,
 +				    void *cb_priv,
 +				    enum flow_block_command command);
  
  #define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg)		\
  	(__reg)->base_offset	=					\
@@@ -78,4 -73,6 +78,6 @@@
  	(__reg)->key		= __key;				\
  	memset(&(__reg)->mask, 0xff, (__reg)->len);
  
+ int nft_chain_offload_priority(struct nft_base_chain *basechain);
+ 
  #endif
diff --combined include/net/pkt_cls.h
index 0790a4ed909c,98be18ef1ed3..64999ffcb486
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@@ -70,6 -70,15 +70,6 @@@ static inline struct Qdisc *tcf_block_q
  	return block->q;
  }
  
 -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 -				tc_indr_block_bind_cb_t *cb, void *cb_ident);
 -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 -			      tc_indr_block_bind_cb_t *cb, void *cb_ident);
 -void __tc_indr_block_cb_unregister(struct net_device *dev,
 -				   tc_indr_block_bind_cb_t *cb, void *cb_ident);
 -void tc_indr_block_cb_unregister(struct net_device *dev,
 -				 tc_indr_block_bind_cb_t *cb, void *cb_ident);
 -
  int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
  		 struct tcf_result *res, bool compat_mode);
  
@@@ -128,6 -137,32 +128,6 @@@ void tc_setup_cb_block_unregister(struc
  {
  }
  
 -static inline
 -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 -				tc_indr_block_bind_cb_t *cb, void *cb_ident)
 -{
 -	return 0;
 -}
 -
 -static inline
 -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 -			      tc_indr_block_bind_cb_t *cb, void *cb_ident)
 -{
 -	return 0;
 -}
 -
 -static inline
 -void __tc_indr_block_cb_unregister(struct net_device *dev,
 -				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
 -{
 -}
 -
 -static inline
 -void tc_indr_block_cb_unregister(struct net_device *dev,
 -				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
 -{
 -}
 -
  static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
  			       struct tcf_result *res, bool compat_mode)
  {
@@@ -611,7 -646,7 +611,7 @@@ tc_cls_common_offload_init(struct flow_
  {
  	cls_common->chain_index = tp->chain->index;
  	cls_common->protocol = tp->protocol;
- 	cls_common->prio = tp->prio;
+ 	cls_common->prio = tp->prio >> 16;
  	if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE)
  		cls_common->extack = extack;
  }
diff --combined include/uapi/linux/bpf.h
index 4393bd4b2419,a5aa7d3ac6a1..0e66371bea13
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@@ -134,7 -134,6 +134,7 @@@ enum bpf_map_type 
  	BPF_MAP_TYPE_QUEUE,
  	BPF_MAP_TYPE_STACK,
  	BPF_MAP_TYPE_SK_STORAGE,
 +	BPF_MAP_TYPE_DEVMAP_HASH,
  };
  
  /* Note that tracing related programs such as
@@@ -1467,8 -1466,8 +1467,8 @@@ union bpf_attr 
   * 		If no cookie has been set yet, generate a new cookie. Once
   * 		generated, the socket cookie remains stable for the life of the
   * 		socket. This helper can be useful for monitoring per socket
-  * 		networking traffic statistics as it provides a unique socket
-  * 		identifier per namespace.
+  * 		networking traffic statistics as it provides a global socket
+  * 		identifier that can be assumed unique.
   * 	Return
   * 		A 8-byte long non-decreasing number on success, or 0 if the
   * 		socket field is missing inside *skb*.
@@@ -2714,33 -2713,6 +2714,33 @@@
   *		**-EPERM** if no permission to send the *sig*.
   *
   *		**-EAGAIN** if bpf program can try again.
 + *
 + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
 + *	Description
 + *		Try to issue a SYN cookie for the packet with corresponding
 + *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
 + *
 + *		*iph* points to the start of the IPv4 or IPv6 header, while
 + *		*iph_len* contains **sizeof**\ (**struct iphdr**) or
 + *		**sizeof**\ (**struct ip6hdr**).
 + *
 + *		*th* points to the start of the TCP header, while *th_len*
 + *		contains the length of the TCP header.
 + *
 + *	Return
 + *		On success, lower 32 bits hold the generated SYN cookie in
 + *		followed by 16 bits which hold the MSS value for that cookie,
 + *		and the top 16 bits are unused.
 + *
 + *		On failure, the returned value is one of the following:
 + *
 + *		**-EINVAL** SYN cookie cannot be issued due to error
 + *
 + *		**-ENOENT** SYN cookie should not be issued (no SYN flood)
 + *
 + *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
 + *
 + *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
   */
  #define __BPF_FUNC_MAPPER(FN)		\
  	FN(unspec),			\
@@@ -2852,8 -2824,7 +2852,8 @@@
  	FN(strtoul),			\
  	FN(sk_storage_get),		\
  	FN(sk_storage_delete),		\
 -	FN(send_signal),
 +	FN(send_signal),		\
 +	FN(tcp_gen_syncookie),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@@ -3536,10 -3507,6 +3536,10 @@@ enum bpf_task_fd_type 
  	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
  };
  
 +#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG		(1U << 0)
 +#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL		(1U << 1)
 +#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP		(1U << 2)
 +
  struct bpf_flow_keys {
  	__u16	nhoff;
  	__u16	thoff;
@@@ -3561,8 -3528,6 +3561,8 @@@
  			__u32	ipv6_dst[4];	/* in6_addr; network order */
  		};
  	};
 +	__u32	flags;
 +	__be32	flow_label;
  };
  
  struct bpf_func_info {
diff --combined net/ipv4/tcp.c
index f8fa1686f7f3,77b485d60b9d..051ef10374f6
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@@ -984,6 -984,9 +984,9 @@@ new_segment
  			if (!skb)
  				goto wait_for_memory;
  
+ #ifdef CONFIG_TLS_DEVICE
+ 			skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+ #endif
  			skb_entail(sk, skb);
  			copy = size_goal;
  		}
@@@ -1162,7 -1165,7 +1165,7 @@@ int tcp_sendmsg_locked(struct sock *sk
  	struct sockcm_cookie sockc;
  	int flags, err, copied = 0;
  	int mss_now = 0, size_goal, copied_syn = 0;
 -	bool process_backlog = false;
 +	int process_backlog = 0;
  	bool zc = false;
  	long timeo;
  
@@@ -1254,10 -1257,9 +1257,10 @@@ new_segment
  			if (!sk_stream_memory_free(sk))
  				goto wait_for_sndbuf;
  
 -			if (process_backlog && sk_flush_backlog(sk)) {
 -				process_backlog = false;
 -				goto restart;
 +			if (unlikely(process_backlog >= 16)) {
 +				process_backlog = 0;
 +				if (sk_flush_backlog(sk))
 +					goto restart;
  			}
  			first_skb = tcp_rtx_and_write_queues_empty(sk);
  			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
@@@ -1265,7 -1267,7 +1268,7 @@@
  			if (!skb)
  				goto wait_for_memory;
  
 -			process_backlog = true;
 +			process_backlog++;
  			skb->ip_summed = CHECKSUM_PARTIAL;
  
  			skb_entail(sk, skb);
@@@ -1777,21 -1779,19 +1780,21 @@@ static int tcp_zerocopy_receive(struct 
  				break;
  			frags = skb_shinfo(skb)->frags;
  			while (offset) {
 -				if (frags->size > offset)
 +				if (skb_frag_size(frags) > offset)
  					goto out;
 -				offset -= frags->size;
 +				offset -= skb_frag_size(frags);
  				frags++;
  			}
  		}
 -		if (frags->size != PAGE_SIZE || frags->page_offset) {
 +		if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
  			int remaining = zc->recv_skip_hint;
 +			int size = skb_frag_size(frags);
  
 -			while (remaining && (frags->size != PAGE_SIZE ||
 -					     frags->page_offset)) {
 -				remaining -= frags->size;
 +			while (remaining && (size != PAGE_SIZE ||
 +					     skb_frag_off(frags))) {
 +				remaining -= size;
  				frags++;
 +				size = skb_frag_size(frags);
  			}
  			zc->recv_skip_hint -= remaining;
  			break;
@@@ -3784,8 -3784,8 +3787,8 @@@ int tcp_md5_hash_skb_data(struct tcp_md
  		return 1;
  
  	for (i = 0; i < shi->nr_frags; ++i) {
 -		const struct skb_frag_struct *f = &shi->frags[i];
 -		unsigned int offset = f->page_offset;
 +		const skb_frag_t *f = &shi->frags[i];
 +		unsigned int offset = skb_frag_off(f);
  		struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
  
  		sg_set_page(&sg, page, skb_frag_size(f),
diff --combined net/ipv4/tcp_output.c
index e6d02e05bb1c,979520e46e33..5c46bc4c7e8d
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@@ -1320,6 -1320,7 +1320,7 @@@ int tcp_fragment(struct sock *sk, enum 
  	buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
  	if (!buff)
  		return -ENOMEM; /* We'll just try again later. */
+ 	skb_copy_decrypted(buff, skb);
  
  	sk->sk_wmem_queued += buff->truesize;
  	sk_mem_charge(sk, buff->truesize);
@@@ -1402,7 -1403,7 +1403,7 @@@ static int __pskb_trim_head(struct sk_b
  		} else {
  			shinfo->frags[k] = shinfo->frags[i];
  			if (eat) {
 -				shinfo->frags[k].page_offset += eat;
 +				skb_frag_off_add(&shinfo->frags[k], eat);
  				skb_frag_size_sub(&shinfo->frags[k], eat);
  				eat = 0;
  			}
@@@ -1874,6 -1875,7 +1875,7 @@@ static int tso_fragment(struct sock *sk
  	buff = sk_stream_alloc_skb(sk, 0, gfp, true);
  	if (unlikely(!buff))
  		return -ENOMEM;
+ 	skb_copy_decrypted(buff, skb);
  
  	sk->sk_wmem_queued += buff->truesize;
  	sk_mem_charge(sk, buff->truesize);
@@@ -2143,6 -2145,7 +2145,7 @@@ static int tcp_mtu_probe(struct sock *s
  	sk_mem_charge(sk, nskb->truesize);
  
  	skb = tcp_send_head(sk);
+ 	skb_copy_decrypted(nskb, skb);
  
  	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
  	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
diff --combined net/netfilter/nf_tables_api.c
index fe3b7b0c6c66,d47469f824a1..6d00bef023c4
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@@ -138,9 -138,14 +138,14 @@@ static void nft_set_trans_bind(const st
  		return;
  
  	list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
- 		if (trans->msg_type == NFT_MSG_NEWSET &&
- 		    nft_trans_set(trans) == set) {
- 			set->bound = true;
+ 		switch (trans->msg_type) {
+ 		case NFT_MSG_NEWSET:
+ 			if (nft_trans_set(trans) == set)
+ 				nft_trans_set_bound(trans) = true;
+ 			break;
+ 		case NFT_MSG_NEWSETELEM:
+ 			if (nft_trans_elem_set(trans) == set)
+ 				nft_trans_elem_set_bound(trans) = true;
  			break;
  		}
  	}
@@@ -1662,6 -1667,10 +1667,10 @@@ static int nf_tables_addchain(struct nf
  
  		chain->flags |= NFT_BASE_CHAIN | flags;
  		basechain->policy = NF_ACCEPT;
+ 		if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+ 		    nft_chain_offload_priority(basechain) < 0)
+ 			return -EOPNOTSUPP;
+ 
  		flow_block_init(&basechain->flow_block);
  	} else {
  		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
@@@ -6906,7 -6915,7 +6915,7 @@@ static int __nf_tables_abort(struct ne
  			break;
  		case NFT_MSG_NEWSET:
  			trans->ctx.table->use--;
- 			if (nft_trans_set(trans)->bound) {
+ 			if (nft_trans_set_bound(trans)) {
  				nft_trans_destroy(trans);
  				break;
  			}
@@@ -6918,7 -6927,7 +6927,7 @@@
  			nft_trans_destroy(trans);
  			break;
  		case NFT_MSG_NEWSETELEM:
- 			if (nft_trans_elem_set(trans)->bound) {
+ 			if (nft_trans_elem_set_bound(trans)) {
  				nft_trans_destroy(trans);
  				break;
  			}
@@@ -7593,11 -7602,6 +7602,11 @@@ static struct pernet_operations nf_tabl
  	.exit	= nf_tables_exit_net,
  };
  
 +static struct flow_indr_block_ing_entry block_ing_entry = {
 +	.cb = nft_indr_block_get_and_ing_cmd,
 +	.list = LIST_HEAD_INIT(block_ing_entry.list),
 +};
 +
  static int __init nf_tables_module_init(void)
  {
  	int err;
@@@ -7629,7 -7633,6 +7638,7 @@@
  		goto err5;
  
  	nft_chain_route_init();
 +	flow_indr_add_block_ing_cb(&block_ing_entry);
  	return err;
  err5:
  	rhltable_destroy(&nft_objname_ht);
@@@ -7646,7 -7649,6 +7655,7 @@@ err1
  
  static void __exit nf_tables_module_exit(void)
  {
 +	flow_indr_del_block_ing_cb(&block_ing_entry);
  	nfnetlink_subsys_unregister(&nf_tables_subsys);
  	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
  	nft_chain_filter_fini();
diff --combined net/netfilter/nf_tables_offload.c
index d3c4c9c88bc8,c0d18c1d77ac..3c2725ade61b
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@@ -103,10 -103,11 +103,11 @@@ void nft_offload_update_dependency(stru
  }
  
  static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
- 					 __be16 proto,
- 					struct netlink_ext_ack *extack)
+ 					 __be16 proto, int priority,
+ 					 struct netlink_ext_ack *extack)
  {
  	common->protocol = proto;
+ 	common->prio = priority;
  	common->extack = extack;
  }
  
@@@ -124,6 -125,15 +125,15 @@@ static int nft_setup_cb_call(struct nft
  	return 0;
  }
  
+ int nft_chain_offload_priority(struct nft_base_chain *basechain)
+ {
+ 	if (basechain->ops.priority <= 0 ||
+ 	    basechain->ops.priority > USHRT_MAX)
+ 		return -1;
+ 
+ 	return 0;
+ }
+ 
  static int nft_flow_offload_rule(struct nft_trans *trans,
  				 enum flow_cls_command command)
  {
@@@ -142,7 -152,8 +152,8 @@@
  	if (flow)
  		proto = flow->proto;
  
- 	nft_flow_offload_common_init(&cls_flow.common, proto, &extack);
+ 	nft_flow_offload_common_init(&cls_flow.common, proto,
+ 				     basechain->ops.priority, &extack);
  	cls_flow.command = command;
  	cls_flow.cookie = (unsigned long) rule;
  	if (flow)
@@@ -171,110 -182,24 +182,110 @@@ static int nft_flow_offload_unbind(stru
  	return 0;
  }
  
 +static int nft_block_setup(struct nft_base_chain *basechain,
 +			   struct flow_block_offload *bo,
 +			   enum flow_block_command cmd)
 +{
 +	int err;
 +
 +	switch (cmd) {
 +	case FLOW_BLOCK_BIND:
 +		err = nft_flow_offload_bind(bo, basechain);
 +		break;
 +	case FLOW_BLOCK_UNBIND:
 +		err = nft_flow_offload_unbind(bo, basechain);
 +		break;
 +	default:
 +		WARN_ON_ONCE(1);
 +		err = -EOPNOTSUPP;
 +	}
 +
 +	return err;
 +}
 +
 +static int nft_block_offload_cmd(struct nft_base_chain *chain,
 +				 struct net_device *dev,
 +				 enum flow_block_command cmd)
 +{
 +	struct netlink_ext_ack extack = {};
 +	struct flow_block_offload bo = {};
 +	int err;
 +
 +	bo.net = dev_net(dev);
 +	bo.block = &chain->flow_block;
 +	bo.command = cmd;
 +	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 +	bo.extack = &extack;
 +	INIT_LIST_HEAD(&bo.cb_list);
 +
 +	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 +	if (err < 0)
 +		return err;
 +
 +	return nft_block_setup(chain, &bo, cmd);
 +}
 +
 +static void nft_indr_block_ing_cmd(struct net_device *dev,
 +				   struct nft_base_chain *chain,
 +				   flow_indr_block_bind_cb_t *cb,
 +				   void *cb_priv,
 +				   enum flow_block_command cmd)
 +{
 +	struct netlink_ext_ack extack = {};
 +	struct flow_block_offload bo = {};
 +
 +	if (!chain)
 +		return;
 +
 +	bo.net = dev_net(dev);
 +	bo.block = &chain->flow_block;
 +	bo.command = cmd;
 +	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 +	bo.extack = &extack;
 +	INIT_LIST_HEAD(&bo.cb_list);
 +
 +	cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
 +
 +	nft_block_setup(chain, &bo, cmd);
 +}
 +
 +static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
 +				      struct net_device *dev,
 +				      enum flow_block_command cmd)
 +{
 +	struct flow_block_offload bo = {};
 +	struct netlink_ext_ack extack = {};
 +
 +	bo.net = dev_net(dev);
 +	bo.block = &chain->flow_block;
 +	bo.command = cmd;
 +	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 +	bo.extack = &extack;
 +	INIT_LIST_HEAD(&bo.cb_list);
 +
 +	flow_indr_block_call(dev, &bo, cmd);
 +
 +	if (list_empty(&bo.cb_list))
 +		return -EOPNOTSUPP;
 +
 +	return nft_block_setup(chain, &bo, cmd);
 +}
 +
  #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
  
  static int nft_flow_offload_chain(struct nft_trans *trans,
  				  enum flow_block_command cmd)
  {
  	struct nft_chain *chain = trans->ctx.chain;
 -	struct netlink_ext_ack extack = {};
 -	struct flow_block_offload bo = {};
  	struct nft_base_chain *basechain;
  	struct net_device *dev;
 -	int err;
  
  	if (!nft_is_base_chain(chain))
  		return -EOPNOTSUPP;
  
  	basechain = nft_base_chain(chain);
  	dev = basechain->ops.dev;
 -	if (!dev || !dev->netdev_ops->ndo_setup_tc)
 +	if (!dev)
  		return -EOPNOTSUPP;
  
  	/* Only default policy to accept is supported for now. */
@@@ -283,10 -208,26 +294,10 @@@
  	    nft_trans_chain_policy(trans) != NF_ACCEPT)
  		return -EOPNOTSUPP;
  
 -	bo.command = cmd;
 -	bo.block = &basechain->flow_block;
 -	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 -	bo.extack = &extack;
 -	INIT_LIST_HEAD(&bo.cb_list);
 -
 -	err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
 -	if (err < 0)
 -		return err;
 -
 -	switch (cmd) {
 -	case FLOW_BLOCK_BIND:
 -		err = nft_flow_offload_bind(&bo, basechain);
 -		break;
 -	case FLOW_BLOCK_UNBIND:
 -		err = nft_flow_offload_unbind(&bo, basechain);
 -		break;
 -	}
 -
 -	return err;
 +	if (dev->netdev_ops->ndo_setup_tc)
 +		return nft_block_offload_cmd(basechain, dev, cmd);
 +	else
 +		return nft_indr_block_offload_cmd(basechain, dev, cmd);
  }
  
  int nft_flow_rule_offload_commit(struct net *net)
@@@ -336,33 -277,3 +347,33 @@@
  
  	return err;
  }
 +
 +void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
 +				    flow_indr_block_bind_cb_t *cb,
 +				    void *cb_priv,
 +				    enum flow_block_command command)
 +{
 +	struct net *net = dev_net(dev);
 +	const struct nft_table *table;
 +	const struct nft_chain *chain;
 +
 +	list_for_each_entry_rcu(table, &net->nft.tables, list) {
 +		if (table->family != NFPROTO_NETDEV)
 +			continue;
 +
 +		list_for_each_entry_rcu(chain, &table->chains, list) {
 +			if (nft_is_base_chain(chain)) {
 +				struct nft_base_chain *basechain;
 +
 +				basechain = nft_base_chain(chain);
 +				if (!strncmp(basechain->dev_name, dev->name,
 +					     IFNAMSIZ)) {
 +					nft_indr_block_ing_cmd(dev, basechain,
 +							       cb, cb_priv,
 +							       command);
 +					return;
 +				}
 +			}
 +		}
 +	}
 +}
diff --combined net/rxrpc/ar-internal.h
index 63b26baa108a,145335611af6..fa5b030acaa8
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@@ -226,9 -226,6 +226,9 @@@ struct rxrpc_security 
  	int (*verify_packet)(struct rxrpc_call *, struct sk_buff *,
  			     unsigned int, unsigned int, rxrpc_seq_t, u16);
  
 +	/* Free crypto request on a call */
 +	void (*free_call_crypto)(struct rxrpc_call *);
 +
  	/* Locate the data in a received packet that has been verified. */
  	void (*locate_data)(struct rxrpc_call *, struct sk_buff *,
  			    unsigned int *, unsigned int *);
@@@ -257,7 -254,8 +257,8 @@@
   */
  struct rxrpc_local {
  	struct rcu_head		rcu;
- 	atomic_t		usage;
+ 	atomic_t		active_users;	/* Number of users of the local endpoint */
+ 	atomic_t		usage;		/* Number of references to the structure */
  	struct rxrpc_net	*rxnet;		/* The network ns in which this resides */
  	struct list_head	link;
  	struct socket		*socket;	/* my UDP socket */
@@@ -560,7 -558,6 +561,7 @@@ struct rxrpc_call 
  	unsigned long		expect_term_by;	/* When we expect call termination by */
  	u32			next_rx_timo;	/* Timeout for next Rx packet (jif) */
  	u32			next_req_timo;	/* Timeout for next Rx request packet (jif) */
 +	struct skcipher_request	*cipher_req;	/* Packet cipher request buffer */
  	struct timer_list	timer;		/* Combined event timer */
  	struct work_struct	processor;	/* Event processor */
  	rxrpc_notify_rx_t	notify_rx;	/* kernel service Rx notification function */
@@@ -653,7 -650,6 +654,6 @@@
  
  	/* receive-phase ACK management */
  	u8			ackr_reason;	/* reason to ACK */
- 	u16			ackr_skew;	/* skew on packet being ACK'd */
  	rxrpc_serial_t		ackr_serial;	/* serial of packet being ACK'd */
  	rxrpc_serial_t		ackr_first_seq;	/* first sequence number received */
  	rxrpc_seq_t		ackr_prev_seq;	/* previous sequence number received */
@@@ -747,7 -743,7 +747,7 @@@ int rxrpc_reject_call(struct rxrpc_soc
  /*
   * call_event.c
   */
- void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
+ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool,
  		       enum rxrpc_propose_ack_trace);
  void rxrpc_process_call(struct work_struct *);
  
@@@ -1006,6 -1002,8 +1006,8 @@@ struct rxrpc_local *rxrpc_lookup_local(
  struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *);
  struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *);
  void rxrpc_put_local(struct rxrpc_local *);
+ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *);
+ void rxrpc_unuse_local(struct rxrpc_local *);
  void rxrpc_queue_local(struct rxrpc_local *);
  void rxrpc_destroy_all_locals(struct rxrpc_net *);
  
diff --combined net/sched/sch_taprio.c
index 046fd2c102b4,e25d414ae12f..540bde009ea5
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@@ -677,6 -677,10 +677,6 @@@ static const struct nla_policy entry_po
  	[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]  = { .type = NLA_U32 },
  };
  
 -static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
 -	[TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
 -};
 -
  static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
  	[TCA_TAPRIO_ATTR_PRIOMAP]	       = {
  		.len = sizeof(struct tc_mqprio_qopt)
@@@ -1191,7 -1195,8 +1191,8 @@@ unlock
  	spin_unlock_bh(qdisc_lock(sch));
  
  free_sched:
- 	kfree(new_admin);
+ 	if (new_admin)
+ 		call_rcu(&new_admin->rcu, taprio_free_sched_cb);
  
  	return err;
  }
diff --combined net/tipc/link.c
index 289e848084ac,c2c5c53cad22..6cc75ffd9e2c
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@@ -106,8 -106,6 +106,6 @@@ struct tipc_stats 
   * @transmitq: queue for sent, non-acked messages
   * @backlogq: queue for messages waiting to be sent
   * @snt_nxt: next sequence number to use for outbound messages
-  * @prev_from: sequence number of most previous retransmission request
-  * @stale_limit: time when repeated identical retransmits must force link reset
   * @ackers: # of peers that needs to ack each packet before it can be released
   * @acked: # last packet acked by a certain peer. Used for broadcast.
   * @rcv_nxt: next sequence number to expect for inbound messages
@@@ -164,9 -162,7 +162,7 @@@ struct tipc_link 
  		u16 limit;
  	} backlog[5];
  	u16 snd_nxt;
- 	u16 prev_from;
  	u16 window;
- 	unsigned long stale_limit;
  
  	/* Reception */
  	u16 rcv_nxt;
@@@ -180,7 -176,6 +176,7 @@@
  
  	/* Fragmentation/reassembly */
  	struct sk_buff *reasm_buf;
 +	struct sk_buff *reasm_tnlmsg;
  
  	/* Broadcast */
  	u16 ackers;
@@@ -854,31 -849,18 +850,31 @@@ static int link_schedule_user(struct ti
   */
  static void link_prepare_wakeup(struct tipc_link *l)
  {
 +	struct sk_buff_head *wakeupq = &l->wakeupq;
 +	struct sk_buff_head *inputq = l->inputq;
  	struct sk_buff *skb, *tmp;
 -	int imp, i = 0;
 +	struct sk_buff_head tmpq;
 +	int avail[5] = {0,};
 +	int imp = 0;
 +
 +	__skb_queue_head_init(&tmpq);
 +
 +	for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++)
 +		avail[imp] = l->backlog[imp].limit - l->backlog[imp].len;
  
 -	skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
 +	skb_queue_walk_safe(wakeupq, skb, tmp) {
  		imp = TIPC_SKB_CB(skb)->chain_imp;
 -		if (l->backlog[imp].len < l->backlog[imp].limit) {
 -			skb_unlink(skb, &l->wakeupq);
 -			skb_queue_tail(l->inputq, skb);
 -		} else if (i++ > 10) {
 -			break;
 -		}
 +		if (avail[imp] <= 0)
 +			continue;
 +		avail[imp]--;
 +		__skb_unlink(skb, wakeupq);
 +		__skb_queue_tail(&tmpq, skb);
  	}
 +
 +	spin_lock_bh(&inputq->lock);
 +	skb_queue_splice_tail(&tmpq, inputq);
 +	spin_unlock_bh(&inputq->lock);
 +
  }
  
  void tipc_link_reset(struct tipc_link *l)
@@@ -911,10 -893,8 +907,10 @@@
  	l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
  	l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
  	kfree_skb(l->reasm_buf);
 +	kfree_skb(l->reasm_tnlmsg);
  	kfree_skb(l->failover_reasm_skb);
  	l->reasm_buf = NULL;
 +	l->reasm_tnlmsg = NULL;
  	l->failover_reasm_skb = NULL;
  	l->rcv_unacked = 0;
  	l->snd_nxt = 1;
@@@ -956,10 -936,7 +952,10 @@@ int tipc_link_xmit(struct tipc_link *l
  	int rc = 0;
  
  	if (unlikely(msg_size(hdr) > mtu)) {
 -		skb_queue_purge(list);
 +		pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n",
 +			skb_queue_len(list), msg_user(hdr),
 +			msg_type(hdr), msg_size(hdr), mtu);
 +		__skb_queue_purge(list);
  		return -EMSGSIZE;
  	}
  
@@@ -988,7 -965,7 +984,7 @@@
  		if (likely(skb_queue_len(transmq) < maxwin)) {
  			_skb = skb_clone(skb, GFP_ATOMIC);
  			if (!_skb) {
 -				skb_queue_purge(list);
 +				__skb_queue_purge(list);
  				return -ENOBUFS;
  			}
  			__skb_dequeue(list);
@@@ -1063,47 -1040,53 +1059,53 @@@ static void tipc_link_advance_backlog(s
   * link_retransmit_failure() - Detect repeated retransmit failures
   * @l: tipc link sender
   * @r: tipc link receiver (= l in case of unicast)
-  * @from: seqno of the 1st packet in retransmit request
   * @rc: returned code
   *
   * Return: true if the repeated retransmit failures happens, otherwise
   * false
   */
  static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
- 				    u16 from, int *rc)
+ 				    int *rc)
  {
  	struct sk_buff *skb = skb_peek(&l->transmq);
  	struct tipc_msg *hdr;
  
  	if (!skb)
  		return false;
- 	hdr = buf_msg(skb);
  
- 	/* Detect repeated retransmit failures on same packet */
- 	if (r->prev_from != from) {
- 		r->prev_from = from;
- 		r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
- 	} else if (time_after(jiffies, r->stale_limit)) {
- 		pr_warn("Retransmission failure on link <%s>\n", l->name);
- 		link_print(l, "State of link ");
- 		pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
- 			msg_user(hdr), msg_type(hdr), msg_size(hdr),
- 			msg_errcode(hdr));
- 		pr_info("sqno %u, prev: %x, src: %x\n",
- 			msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
- 
- 		trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
- 		trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
- 		trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+ 	if (!TIPC_SKB_CB(skb)->retr_cnt)
+ 		return false;
  
- 		if (link_is_bc_sndlink(l))
- 			*rc = TIPC_LINK_DOWN_EVT;
+ 	if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
+ 			msecs_to_jiffies(r->tolerance)))
+ 		return false;
+ 
+ 	hdr = buf_msg(skb);
+ 	if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr)))
+ 		return false;
  
+ 	pr_warn("Retransmission failure on link <%s>\n", l->name);
+ 	link_print(l, "State of link ");
+ 	pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
+ 		msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
+ 	pr_info("sqno %u, prev: %x, dest: %x\n",
+ 		msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr));
+ 	pr_info("retr_stamp %d, retr_cnt %d\n",
+ 		jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp),
+ 		TIPC_SKB_CB(skb)->retr_cnt);
+ 
+ 	trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
+ 	trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
+ 	trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+ 
+ 	if (link_is_bc_sndlink(l)) {
+ 		r->state = LINK_RESET;
+ 		*rc = TIPC_LINK_DOWN_EVT;
+ 	} else {
  		*rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
- 		return true;
  	}
  
- 	return false;
+ 	return true;
  }
  
  /* tipc_link_bc_retrans() - retransmit zero or more packets
@@@ -1129,7 -1112,7 +1131,7 @@@ static int tipc_link_bc_retrans(struct 
  
  	trace_tipc_link_retrans(r, from, to, &l->transmq);
  
- 	if (link_retransmit_failure(l, r, from, &rc))
+ 	if (link_retransmit_failure(l, r, &rc))
  		return rc;
  
  	skb_queue_walk(&l->transmq, skb) {
@@@ -1138,11 -1121,10 +1140,10 @@@
  			continue;
  		if (more(msg_seqno(hdr), to))
  			break;
- 		if (link_is_bc_sndlink(l)) {
- 			if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
- 				continue;
- 			TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
- 		}
+ 
+ 		if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
+ 			continue;
+ 		TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
  		_skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC);
  		if (!_skb)
  			return 0;
@@@ -1152,6 -1134,10 +1153,10 @@@
  		_skb->priority = TC_PRIO_CONTROL;
  		__skb_queue_tail(xmitq, _skb);
  		l->stats.retransmitted++;
+ 
+ 		/* Increase actual retrans counter & mark first time */
+ 		if (!TIPC_SKB_CB(skb)->retr_cnt++)
+ 			TIPC_SKB_CB(skb)->retr_stamp = jiffies;
  	}
  	return 0;
  }
@@@ -1252,7 -1238,6 +1257,7 @@@ static int tipc_link_tnl_rcv(struct tip
  			     struct sk_buff_head *inputq)
  {
  	struct sk_buff **reasm_skb = &l->failover_reasm_skb;
 +	struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg;
  	struct sk_buff_head *fdefq = &l->failover_deferdq;
  	struct tipc_msg *hdr = buf_msg(skb);
  	struct sk_buff *iskb;
@@@ -1260,56 -1245,40 +1265,56 @@@
  	int rc = 0;
  	u16 seqno;
  
 -	/* SYNCH_MSG */
 -	if (msg_type(hdr) == SYNCH_MSG)
 -		goto drop;
 +	if (msg_type(hdr) == SYNCH_MSG) {
 +		kfree_skb(skb);
 +		return 0;
 +	}
  
 -	/* FAILOVER_MSG */
 -	if (!tipc_msg_extract(skb, &iskb, &ipos)) {
 -		pr_warn_ratelimited("Cannot extract FAILOVER_MSG, defq: %d\n",
 -				    skb_queue_len(fdefq));
 -		return rc;
 +	/* Not a fragment? */
 +	if (likely(!msg_nof_fragms(hdr))) {
 +		if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) {
 +			pr_warn_ratelimited("Unable to extract msg, defq: %d\n",
 +					    skb_queue_len(fdefq));
 +			return 0;
 +		}
 +		kfree_skb(skb);
 +	} else {
 +		/* Set fragment type for buf_append */
 +		if (msg_fragm_no(hdr) == 1)
 +			msg_set_type(hdr, FIRST_FRAGMENT);
 +		else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr))
 +			msg_set_type(hdr, FRAGMENT);
 +		else
 +			msg_set_type(hdr, LAST_FRAGMENT);
 +
 +		if (!tipc_buf_append(reasm_tnlmsg, &skb)) {
 +			/* Successful but non-complete reassembly? */
 +			if (*reasm_tnlmsg || link_is_bc_rcvlink(l))
 +				return 0;
 +			pr_warn_ratelimited("Unable to reassemble tunnel msg\n");
 +			return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 +		}
 +		iskb = skb;
  	}
  
  	do {
  		seqno = buf_seqno(iskb);
 -
  		if (unlikely(less(seqno, l->drop_point))) {
  			kfree_skb(iskb);
  			continue;
  		}
 -
  		if (unlikely(seqno != l->drop_point)) {
  			__tipc_skb_queue_sorted(fdefq, seqno, iskb);
  			continue;
  		}
  
  		l->drop_point++;
 -
  		if (!tipc_data_input(l, iskb, inputq))
  			rc |= tipc_link_input(l, iskb, inputq, reasm_skb);
  		if (unlikely(rc))
  			break;
  	} while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point)));
  
 -drop:
 -	kfree_skb(skb);
  	return rc;
  }
  
@@@ -1393,12 -1362,10 +1398,10 @@@ static int tipc_link_advance_transmq(st
  	struct tipc_msg *hdr;
  	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
  	u16 ack = l->rcv_nxt - 1;
+ 	bool passed = false;
  	u16 seqno, n = 0;
  	int rc = 0;
  
- 	if (gap && link_retransmit_failure(l, l, acked + 1, &rc))
- 		return rc;
- 
  	skb_queue_walk_safe(&l->transmq, skb, tmp) {
  		seqno = buf_seqno(skb);
  
@@@ -1408,12 -1375,17 +1411,17 @@@ next_gap_ack
  			__skb_unlink(skb, &l->transmq);
  			kfree_skb(skb);
  		} else if (less_eq(seqno, acked + gap)) {
- 			/* retransmit skb */
+ 			/* First, check if repeated retrans failures occurs? */
+ 			if (!passed && link_retransmit_failure(l, l, &rc))
+ 				return rc;
+ 			passed = true;
+ 
+ 			/* retransmit skb if unrestricted*/
  			if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
  				continue;
  			TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
- 
- 			_skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
+ 			_skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE,
+ 					   GFP_ATOMIC);
  			if (!_skb)
  				continue;
  			hdr = buf_msg(_skb);
@@@ -1422,6 -1394,10 +1430,10 @@@
  			_skb->priority = TC_PRIO_CONTROL;
  			__skb_queue_tail(xmitq, _skb);
  			l->stats.retransmitted++;
+ 
+ 			/* Increase actual retrans counter & mark first time */
+ 			if (!TIPC_SKB_CB(skb)->retr_cnt++)
+ 				TIPC_SKB_CB(skb)->retr_stamp = jiffies;
  		} else {
  			/* retry with Gap ACK blocks if any */
  			if (!ga || n >= ga->gack_cnt)
@@@ -1668,7 -1644,7 +1680,7 @@@ void tipc_link_create_dummy_tnl_msg(str
  	struct sk_buff *skb;
  	u32 dnode = l->addr;
  
 -	skb_queue_head_init(&tnlq);
 +	__skb_queue_head_init(&tnlq);
  	skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG,
  			      INT_H_SIZE, BASIC_H_SIZE,
  			      dnode, onode, 0, 0, 0);
@@@ -1699,18 -1675,14 +1711,18 @@@ void tipc_link_tnl_prepare(struct tipc_
  	struct sk_buff *skb, *tnlskb;
  	struct tipc_msg *hdr, tnlhdr;
  	struct sk_buff_head *queue = &l->transmq;
 -	struct sk_buff_head tmpxq, tnlq;
 +	struct sk_buff_head tmpxq, tnlq, frags;
  	u16 pktlen, pktcnt, seqno = l->snd_nxt;
 +	bool pktcnt_need_update = false;
 +	u16 syncpt;
 +	int rc;
  
  	if (!tnl)
  		return;
  
 -	skb_queue_head_init(&tnlq);
 -	skb_queue_head_init(&tmpxq);
 +	__skb_queue_head_init(&tnlq);
 +	__skb_queue_head_init(&tmpxq);
 +	__skb_queue_head_init(&frags);
  
  	/* At least one packet required for safe algorithm => add dummy */
  	skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
@@@ -1720,35 -1692,10 +1732,35 @@@
  		pr_warn("%sunable to create tunnel packet\n", link_co_err);
  		return;
  	}
 -	skb_queue_tail(&tnlq, skb);
 +	__skb_queue_tail(&tnlq, skb);
  	tipc_link_xmit(l, &tnlq, &tmpxq);
  	__skb_queue_purge(&tmpxq);
  
 +	/* Link Synching:
 +	 * From now on, send only one single ("dummy") SYNCH message
 +	 * to peer. The SYNCH message does not contain any data, just
 +	 * a header conveying the synch point to the peer.
 +	 */
 +	if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
 +		tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG,
 +					 INT_H_SIZE, 0, l->addr,
 +					 tipc_own_addr(l->net),
 +					 0, 0, 0);
 +		if (!tnlskb) {
 +			pr_warn("%sunable to create dummy SYNCH_MSG\n",
 +				link_co_err);
 +			return;
 +		}
 +
 +		hdr = buf_msg(tnlskb);
 +		syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1;
 +		msg_set_syncpt(hdr, syncpt);
 +		msg_set_bearer_id(hdr, l->peer_bearer_id);
 +		__skb_queue_tail(&tnlq, tnlskb);
 +		tipc_link_xmit(tnl, &tnlq, xmitq);
 +		return;
 +	}
 +
  	/* Initialize reusable tunnel packet header */
  	tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
  		      mtyp, INT_H_SIZE, l->addr);
@@@ -1766,39 -1713,6 +1778,39 @@@ tnl
  		if (queue == &l->backlogq)
  			msg_set_seqno(hdr, seqno++);
  		pktlen = msg_size(hdr);
 +
 +		/* Tunnel link MTU is not large enough? This could be
 +		 * due to:
 +		 * 1) Link MTU has just changed or set differently;
 +		 * 2) Or FAILOVER on the top of a SYNCH message
 +		 *
 +		 * The 2nd case should not happen if peer supports
 +		 * TIPC_TUNNEL_ENHANCED
 +		 */
 +		if (pktlen > tnl->mtu - INT_H_SIZE) {
 +			if (mtyp == FAILOVER_MSG &&
 +			    (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
 +				rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu,
 +						       &frags);
 +				if (rc) {
 +					pr_warn("%sunable to frag msg: rc %d\n",
 +						link_co_err, rc);
 +					return;
 +				}
 +				pktcnt += skb_queue_len(&frags) - 1;
 +				pktcnt_need_update = true;
 +				skb_queue_splice_tail_init(&frags, &tnlq);
 +				continue;
 +			}
 +			/* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED
 +			 * => Just warn it and return!
 +			 */
 +			pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n",
 +					    link_co_err, msg_user(hdr),
 +					    msg_type(hdr), msg_size(hdr));
 +			return;
 +		}
 +
  		msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
  		tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC);
  		if (!tnlskb) {
@@@ -1814,12 -1728,6 +1826,12 @@@
  		goto tnl;
  	}
  
 +	if (pktcnt_need_update)
 +		skb_queue_walk(&tnlq, skb) {
 +			hdr = buf_msg(skb);
 +			msg_set_msgcnt(hdr, pktcnt);
 +		}
 +
  	tipc_link_xmit(tnl, &tnlq, xmitq);
  
  	if (mtyp == FAILOVER_MSG) {
@@@ -2681,7 -2589,7 +2693,7 @@@ int tipc_link_dump(struct tipc_link *l
  	i += scnprintf(buf + i, sz - i, " %x", l->peer_caps);
  	i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt);
  	i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt);
- 	i += scnprintf(buf + i, sz - i, " %u", l->prev_from);
+ 	i += scnprintf(buf + i, sz - i, " %u", 0);
  	i += scnprintf(buf + i, sz - i, " %u", 0);
  	i += scnprintf(buf + i, sz - i, " %u", l->acked);
  
diff --combined net/tipc/msg.h
index 1c8c8dd32a4e,d7ebc9e955f6..0daa6f04ca81
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@@ -102,13 -102,15 +102,15 @@@ struct plist
  #define TIPC_MEDIA_INFO_OFFSET	5
  
  struct tipc_skb_cb {
- 	u32 bytes_read;
- 	u32 orig_member;
  	struct sk_buff *tail;
  	unsigned long nxt_retr;
- 	bool validated;
+ 	unsigned long retr_stamp;
+ 	u32 bytes_read;
+ 	u32 orig_member;
  	u16 chain_imp;
  	u16 ackers;
+ 	u16 retr_cnt;
+ 	bool validated;
  };
  
  #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
@@@ -721,26 -723,12 +723,26 @@@ static inline void msg_set_last_bcast(s
  	msg_set_bits(m, 4, 16, 0xffff, n);
  }
  
 +static inline u32 msg_nof_fragms(struct tipc_msg *m)
 +{
 +	return msg_bits(m, 4, 0, 0xffff);
 +}
 +
 +static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n)
 +{
 +	msg_set_bits(m, 4, 0, 0xffff, n);
 +}
 +
 +static inline u32 msg_fragm_no(struct tipc_msg *m)
 +{
 +	return msg_bits(m, 4, 16, 0xffff);
 +}
 +
  static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n)
  {
  	msg_set_bits(m, 4, 16, 0xffff, n);
  }
  
 -
  static inline u16 msg_next_sent(struct tipc_msg *m)
  {
  	return msg_bits(m, 4, 0, 0xffff);
@@@ -891,16 -879,6 +893,16 @@@ static inline void msg_set_msgcnt(struc
  	msg_set_bits(m, 9, 16, 0xffff, n);
  }
  
 +static inline u16 msg_syncpt(struct tipc_msg *m)
 +{
 +	return msg_bits(m, 9, 16, 0xffff);
 +}
 +
 +static inline void msg_set_syncpt(struct tipc_msg *m, u16 n)
 +{
 +	msg_set_bits(m, 9, 16, 0xffff, n);
 +}
 +
  static inline u32 msg_conn_ack(struct tipc_msg *m)
  {
  	return msg_bits(m, 9, 16, 0xffff);
@@@ -1059,8 -1037,6 +1061,8 @@@ bool tipc_msg_bundle(struct sk_buff *sk
  bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
  			  u32 mtu, u32 dnode);
  bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
 +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
 +		      int pktmax, struct sk_buff_head *frags);
  int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
  		   int offset, int dsz, int mtu, struct sk_buff_head *list);
  bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
diff --combined net/tls/tls_device.c
index d184230665eb,43922d86e510..a470df7ffcf9
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@@ -243,14 -243,14 +243,14 @@@ static void tls_append_frag(struct tls_
  	skb_frag_t *frag;
  
  	frag = &record->frags[record->num_frags - 1];
 -	if (frag->page.p == pfrag->page &&
 -	    frag->page_offset + frag->size == pfrag->offset) {
 -		frag->size += size;
 +	if (skb_frag_page(frag) == pfrag->page &&
 +	    skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) {
 +		skb_frag_size_add(frag, size);
  	} else {
  		++frag;
 -		frag->page.p = pfrag->page;
 -		frag->page_offset = pfrag->offset;
 -		frag->size = size;
 +		__skb_frag_set_page(frag, pfrag->page);
 +		skb_frag_off_set(frag, pfrag->offset);
 +		skb_frag_size_set(frag, size);
  		++record->num_frags;
  		get_page(pfrag->page);
  	}
@@@ -301,8 -301,8 +301,8 @@@ static int tls_push_record(struct sock 
  		frag = &record->frags[i];
  		sg_unmark_end(&offload_ctx->sg_tx_data[i]);
  		sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag),
 -			    frag->size, frag->page_offset);
 -		sk_mem_charge(sk, frag->size);
 +			    skb_frag_size(frag), skb_frag_off(frag));
 +		sk_mem_charge(sk, skb_frag_size(frag));
  		get_page(skb_frag_page(frag));
  	}
  	sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]);
@@@ -324,7 -324,7 +324,7 @@@ static int tls_create_new_record(struc
  
  	frag = &record->frags[0];
  	__skb_frag_set_page(frag, pfrag->page);
 -	frag->page_offset = pfrag->offset;
 +	skb_frag_off_set(frag, pfrag->offset);
  	skb_frag_size_set(frag, prepend_size);
  
  	get_page(pfrag->page);
@@@ -373,9 -373,9 +373,9 @@@ static int tls_push_data(struct sock *s
  	struct tls_context *tls_ctx = tls_get_ctx(sk);
  	struct tls_prot_info *prot = &tls_ctx->prot_info;
  	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
- 	int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
  	int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
  	struct tls_record_info *record = ctx->open_record;
+ 	int tls_push_record_flags;
  	struct page_frag *pfrag;
  	size_t orig_size = size;
  	u32 max_open_record_len;
@@@ -390,6 -390,9 +390,9 @@@
  	if (sk->sk_err)
  		return -sk->sk_err;
  
+ 	flags |= MSG_SENDPAGE_DECRYPTED;
+ 	tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
+ 
  	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
  	if (tls_is_partially_sent_record(tls_ctx)) {
  		rc = tls_push_partial_record(sk, tls_ctx, flags);
@@@ -576,7 -579,9 +579,9 @@@ void tls_device_write_space(struct soc
  		gfp_t sk_allocation = sk->sk_allocation;
  
  		sk->sk_allocation = GFP_ATOMIC;
- 		tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL);
+ 		tls_push_partial_record(sk, ctx,
+ 					MSG_DONTWAIT | MSG_NOSIGNAL |
+ 					MSG_SENDPAGE_DECRYPTED);
  		sk->sk_allocation = sk_allocation;
  	}
  }
diff --combined tools/include/uapi/linux/bpf.h
index 4393bd4b2419,a5aa7d3ac6a1..0e66371bea13
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@@ -134,7 -134,6 +134,7 @@@ enum bpf_map_type 
  	BPF_MAP_TYPE_QUEUE,
  	BPF_MAP_TYPE_STACK,
  	BPF_MAP_TYPE_SK_STORAGE,
 +	BPF_MAP_TYPE_DEVMAP_HASH,
  };
  
  /* Note that tracing related programs such as
@@@ -1467,8 -1466,8 +1467,8 @@@ union bpf_attr 
   * 		If no cookie has been set yet, generate a new cookie. Once
   * 		generated, the socket cookie remains stable for the life of the
   * 		socket. This helper can be useful for monitoring per socket
-  * 		networking traffic statistics as it provides a unique socket
-  * 		identifier per namespace.
+  * 		networking traffic statistics as it provides a global socket
+  * 		identifier that can be assumed unique.
   * 	Return
   * 		A 8-byte long non-decreasing number on success, or 0 if the
   * 		socket field is missing inside *skb*.
@@@ -2714,33 -2713,6 +2714,33 @@@
   *		**-EPERM** if no permission to send the *sig*.
   *
   *		**-EAGAIN** if bpf program can try again.
 + *
 + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
 + *	Description
 + *		Try to issue a SYN cookie for the packet with corresponding
 + *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
 + *
 + *		*iph* points to the start of the IPv4 or IPv6 header, while
 + *		*iph_len* contains **sizeof**\ (**struct iphdr**) or
 + *		**sizeof**\ (**struct ip6hdr**).
 + *
 + *		*th* points to the start of the TCP header, while *th_len*
 + *		contains the length of the TCP header.
 + *
 + *	Return
 + *		On success, lower 32 bits hold the generated SYN cookie in
 + *		followed by 16 bits which hold the MSS value for that cookie,
 + *		and the top 16 bits are unused.
 + *
 + *		On failure, the returned value is one of the following:
 + *
 + *		**-EINVAL** SYN cookie cannot be issued due to error
 + *
 + *		**-ENOENT** SYN cookie should not be issued (no SYN flood)
 + *
 + *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
 + *
 + *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
   */
  #define __BPF_FUNC_MAPPER(FN)		\
  	FN(unspec),			\
@@@ -2852,8 -2824,7 +2852,8 @@@
  	FN(strtoul),			\
  	FN(sk_storage_get),		\
  	FN(sk_storage_delete),		\
 -	FN(send_signal),
 +	FN(send_signal),		\
 +	FN(tcp_gen_syncookie),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@@ -3536,10 -3507,6 +3536,10 @@@ enum bpf_task_fd_type 
  	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
  };
  
 +#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG		(1U << 0)
 +#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL		(1U << 1)
 +#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP		(1U << 2)
 +
  struct bpf_flow_keys {
  	__u16	nhoff;
  	__u16	thoff;
@@@ -3561,8 -3528,6 +3561,8 @@@
  			__u32	ipv6_dst[4];	/* in6_addr; network order */
  		};
  	};
 +	__u32	flags;
 +	__be32	flow_label;
  };
  
  struct bpf_func_info {
diff --combined tools/lib/bpf/libbpf.c
index 2233f919dd88,2b57d7ea7836..e0276520171b
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@@ -39,7 -39,6 +39,7 @@@
  #include <sys/stat.h>
  #include <sys/types.h>
  #include <sys/vfs.h>
 +#include <sys/utsname.h>
  #include <tools/libc_compat.h>
  #include <libelf.h>
  #include <gelf.h>
@@@ -49,7 -48,6 +49,7 @@@
  #include "btf.h"
  #include "str_error.h"
  #include "libbpf_internal.h"
 +#include "hashmap.h"
  
  #ifndef EM_BPF
  #define EM_BPF 247
@@@ -77,12 -75,9 +77,12 @@@ static int __base_pr(enum libbpf_print_
  
  static libbpf_print_fn_t __libbpf_pr = __base_pr;
  
 -void libbpf_set_print(libbpf_print_fn_t fn)
 +libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
  {
 +	libbpf_print_fn_t old_print_fn = __libbpf_pr;
 +
  	__libbpf_pr = fn;
 +	return old_print_fn;
  }
  
  __printf(2, 3)
@@@ -187,7 -182,6 +187,6 @@@ struct bpf_program 
  	bpf_program_clear_priv_t clear_priv;
  
  	enum bpf_attach_type expected_attach_type;
- 	int btf_fd;
  	void *func_info;
  	__u32 func_info_rec_size;
  	__u32 func_info_cnt;
@@@ -318,7 -312,6 +317,6 @@@ void bpf_program__unload(struct bpf_pro
  	prog->instances.nr = -1;
  	zfree(&prog->instances.fds);
  
- 	zclose(prog->btf_fd);
  	zfree(&prog->func_info);
  	zfree(&prog->line_info);
  }
@@@ -397,7 -390,6 +395,6 @@@ bpf_program__init(void *data, size_t si
  	prog->instances.fds = NULL;
  	prog->instances.nr = -1;
  	prog->type = BPF_PROG_TYPE_UNSPEC;
- 	prog->btf_fd = -1;
  
  	return 0;
  errout:
@@@ -1018,21 -1010,23 +1015,21 @@@ static int bpf_object__init_user_maps(s
  	return 0;
  }
  
 -static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
 -						     __u32 id)
 +static const struct btf_type *
 +skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
  {
  	const struct btf_type *t = btf__type_by_id(btf, id);
  
 -	while (true) {
 -		switch (BTF_INFO_KIND(t->info)) {
 -		case BTF_KIND_VOLATILE:
 -		case BTF_KIND_CONST:
 -		case BTF_KIND_RESTRICT:
 -		case BTF_KIND_TYPEDEF:
 -			t = btf__type_by_id(btf, t->type);
 -			break;
 -		default:
 -			return t;
 -		}
 +	if (res_id)
 +		*res_id = id;
 +
 +	while (btf_is_mod(t) || btf_is_typedef(t)) {
 +		if (res_id)
 +			*res_id = t->type;
 +		t = btf__type_by_id(btf, t->type);
  	}
 +
 +	return t;
  }
  
  /*
@@@ -1045,14 -1039,14 +1042,14 @@@
  static bool get_map_field_int(const char *map_name, const struct btf *btf,
  			      const struct btf_type *def,
  			      const struct btf_member *m, __u32 *res) {
 -	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
 +	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
  	const char *name = btf__name_by_offset(btf, m->name_off);
  	const struct btf_array *arr_info;
  	const struct btf_type *arr_t;
  
 -	if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
 +	if (!btf_is_ptr(t)) {
  		pr_warning("map '%s': attr '%s': expected PTR, got %u.\n",
 -			   map_name, name, BTF_INFO_KIND(t->info));
 +			   map_name, name, btf_kind(t));
  		return false;
  	}
  
@@@ -1062,12 -1056,12 +1059,12 @@@
  			   map_name, name, t->type);
  		return false;
  	}
 -	if (BTF_INFO_KIND(arr_t->info) != BTF_KIND_ARRAY) {
 +	if (!btf_is_array(arr_t)) {
  		pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n",
 -			   map_name, name, BTF_INFO_KIND(arr_t->info));
 +			   map_name, name, btf_kind(arr_t));
  		return false;
  	}
 -	arr_info = (const void *)(arr_t + 1);
 +	arr_info = btf_array(arr_t);
  	*res = arr_info->nelems;
  	return true;
  }
@@@ -1085,11 -1079,11 +1082,11 @@@ static int bpf_object__init_user_btf_ma
  	struct bpf_map *map;
  	int vlen, i;
  
 -	vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx;
 +	vi = btf_var_secinfos(sec) + var_idx;
  	var = btf__type_by_id(obj->btf, vi->type);
 -	var_extra = (const void *)(var + 1);
 +	var_extra = btf_var(var);
  	map_name = btf__name_by_offset(obj->btf, var->name_off);
 -	vlen = BTF_INFO_VLEN(var->info);
 +	vlen = btf_vlen(var);
  
  	if (map_name == NULL || map_name[0] == '\0') {
  		pr_warning("map #%d: empty name.\n", var_idx);
@@@ -1099,9 -1093,9 +1096,9 @@@
  		pr_warning("map '%s' BTF data is corrupted.\n", map_name);
  		return -EINVAL;
  	}
 -	if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) {
 +	if (!btf_is_var(var)) {
  		pr_warning("map '%s': unexpected var kind %u.\n",
 -			   map_name, BTF_INFO_KIND(var->info));
 +			   map_name, btf_kind(var));
  		return -EINVAL;
  	}
  	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
@@@ -1111,10 -1105,10 +1108,10 @@@
  		return -EOPNOTSUPP;
  	}
  
 -	def = skip_mods_and_typedefs(obj->btf, var->type);
 -	if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) {
 +	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
 +	if (!btf_is_struct(def)) {
  		pr_warning("map '%s': unexpected def kind %u.\n",
 -			   map_name, BTF_INFO_KIND(var->info));
 +			   map_name, btf_kind(var));
  		return -EINVAL;
  	}
  	if (def->size > vi->size) {
@@@ -1137,8 -1131,8 +1134,8 @@@
  	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
  		 map_name, map->sec_idx, map->sec_offset);
  
 -	vlen = BTF_INFO_VLEN(def->info);
 -	m = (const void *)(def + 1);
 +	vlen = btf_vlen(def);
 +	m = btf_members(def);
  	for (i = 0; i < vlen; i++, m++) {
  		const char *name = btf__name_by_offset(obj->btf, m->name_off);
  
@@@ -1188,9 -1182,9 +1185,9 @@@
  					   map_name, m->type);
  				return -EINVAL;
  			}
 -			if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
 +			if (!btf_is_ptr(t)) {
  				pr_warning("map '%s': key spec is not PTR: %u.\n",
 -					   map_name, BTF_INFO_KIND(t->info));
 +					   map_name, btf_kind(t));
  				return -EINVAL;
  			}
  			sz = btf__resolve_size(obj->btf, t->type);
@@@ -1231,9 -1225,9 +1228,9 @@@
  					   map_name, m->type);
  				return -EINVAL;
  			}
 -			if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
 +			if (!btf_is_ptr(t)) {
  				pr_warning("map '%s': value spec is not PTR: %u.\n",
 -					   map_name, BTF_INFO_KIND(t->info));
 +					   map_name, btf_kind(t));
  				return -EINVAL;
  			}
  			sz = btf__resolve_size(obj->btf, t->type);
@@@ -1294,7 -1288,7 +1291,7 @@@ static int bpf_object__init_user_btf_ma
  	nr_types = btf__get_nr_types(obj->btf);
  	for (i = 1; i <= nr_types; i++) {
  		t = btf__type_by_id(obj->btf, i);
 -		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
 +		if (!btf_is_datasec(t))
  			continue;
  		name = btf__name_by_offset(obj->btf, t->name_off);
  		if (strcmp(name, MAPS_ELF_SEC) == 0) {
@@@ -1308,7 -1302,7 +1305,7 @@@
  		return -ENOENT;
  	}
  
 -	vlen = BTF_INFO_VLEN(sec->info);
 +	vlen = btf_vlen(sec);
  	for (i = 0; i < vlen; i++) {
  		err = bpf_object__init_user_btf_map(obj, sec, i,
  						    obj->efile.btf_maps_shndx,
@@@ -1369,14 -1363,16 +1366,14 @@@ static void bpf_object__sanitize_btf(st
  	struct btf *btf = obj->btf;
  	struct btf_type *t;
  	int i, j, vlen;
 -	__u16 kind;
  
  	if (!obj->btf || (has_func && has_datasec))
  		return;
  
  	for (i = 1; i <= btf__get_nr_types(btf); i++) {
  		t = (struct btf_type *)btf__type_by_id(btf, i);
 -		kind = BTF_INFO_KIND(t->info);
  
 -		if (!has_datasec && kind == BTF_KIND_VAR) {
 +		if (!has_datasec && btf_is_var(t)) {
  			/* replace VAR with INT */
  			t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
  			/*
@@@ -1385,11 -1381,11 +1382,11 @@@
  			 * original variable took less than 4 bytes
  			 */
  			t->size = 1;
 -			*(int *)(t+1) = BTF_INT_ENC(0, 0, 8);
 -		} else if (!has_datasec && kind == BTF_KIND_DATASEC) {
 +			*(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
 +		} else if (!has_datasec && btf_is_datasec(t)) {
  			/* replace DATASEC with STRUCT */
 -			struct btf_var_secinfo *v = (void *)(t + 1);
 -			struct btf_member *m = (void *)(t + 1);
 +			const struct btf_var_secinfo *v = btf_var_secinfos(t);
 +			struct btf_member *m = btf_members(t);
  			struct btf_type *vt;
  			char *name;
  
@@@ -1400,7 -1396,7 +1397,7 @@@
  				name++;
  			}
  
 -			vlen = BTF_INFO_VLEN(t->info);
 +			vlen = btf_vlen(t);
  			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
  			for (j = 0; j < vlen; j++, v++, m++) {
  				/* order of field assignments is important */
@@@ -1410,12 -1406,12 +1407,12 @@@
  				vt = (void *)btf__type_by_id(btf, v->type);
  				m->name_off = vt->name_off;
  			}
 -		} else if (!has_func && kind == BTF_KIND_FUNC_PROTO) {
 +		} else if (!has_func && btf_is_func_proto(t)) {
  			/* replace FUNC_PROTO with ENUM */
 -			vlen = BTF_INFO_VLEN(t->info);
 +			vlen = btf_vlen(t);
  			t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
  			t->size = sizeof(__u32); /* kernel enforced */
 -		} else if (!has_func && kind == BTF_KIND_FUNC) {
 +		} else if (!has_func && btf_is_func(t)) {
  			/* replace FUNC with TYPEDEF */
  			t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
  		}
@@@ -1773,22 -1769,15 +1770,22 @@@ bpf_program__collect_reloc(struct bpf_p
  			 (long long) sym.st_value, sym.st_name, name);
  
  		shdr_idx = sym.st_shndx;
 +		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
 +		pr_debug("relocation: insn_idx=%u, shdr_idx=%u\n",
 +			 insn_idx, shdr_idx);
 +
 +		if (shdr_idx >= SHN_LORESERVE) {
 +			pr_warning("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n",
 +				   name, shdr_idx, insn_idx,
 +				   insns[insn_idx].code);
 +			return -LIBBPF_ERRNO__RELOC;
 +		}
  		if (!bpf_object__relo_in_known_section(obj, shdr_idx)) {
  			pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n",
  				   prog->section_name, shdr_idx);
  			return -LIBBPF_ERRNO__RELOC;
  		}
  
 -		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
 -		pr_debug("relocation: insn_idx=%u\n", insn_idx);
 -
  		if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
  			if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
  				pr_warning("incorrect bpf_call opcode\n");
@@@ -2296,900 -2285,9 +2293,897 @@@ bpf_program_reloc_btf_ext(struct bpf_pr
  		prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
  	}
  
- 	if (!insn_offset)
- 		prog->btf_fd = btf__fd(obj->btf);
- 
  	return 0;
  }
  
 +#define BPF_CORE_SPEC_MAX_LEN 64
 +
 +/* represents BPF CO-RE field or array element accessor */
 +struct bpf_core_accessor {
 +	__u32 type_id;		/* struct/union type or array element type */
 +	__u32 idx;		/* field index or array index */
 +	const char *name;	/* field name or NULL for array accessor */
 +};
 +
 +struct bpf_core_spec {
 +	const struct btf *btf;
 +	/* high-level spec: named fields and array indices only */
 +	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
 +	/* high-level spec length */
 +	int len;
 +	/* raw, low-level spec: 1-to-1 with accessor spec string */
 +	int raw_spec[BPF_CORE_SPEC_MAX_LEN];
 +	/* raw spec length */
 +	int raw_len;
 +	/* field byte offset represented by spec */
 +	__u32 offset;
 +};
 +
 +static bool str_is_empty(const char *s)
 +{
 +	return !s || !s[0];
 +}
 +
 +/*
 + * Turn bpf_offset_reloc into a low- and high-level spec representation,
 + * validating correctness along the way, as well as calculating resulting
 + * field offset (in bytes), specified by accessor string. Low-level spec
 + * captures every single level of nestedness, including traversing anonymous
 + * struct/union members. High-level one only captures semantically meaningful
 + * "turning points": named fields and array indicies.
 + * E.g., for this case:
 + *
 + *   struct sample {
 + *       int __unimportant;
 + *       struct {
 + *           int __1;
 + *           int __2;
 + *           int a[7];
 + *       };
 + *   };
 + *
 + *   struct sample *s = ...;
 + *
 + *   int x = &s->a[3]; // access string = '0:1:2:3'
 + *
 + * Low-level spec has 1:1 mapping with each element of access string (it's
 + * just a parsed access string representation): [0, 1, 2, 3].
 + *
 + * High-level spec will capture only 3 points:
 + *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
 + *   - field 'a' access (corresponds to '2' in low-level spec);
 + *   - array element #3 access (corresponds to '3' in low-level spec).
 + *
 + */
 +static int bpf_core_spec_parse(const struct btf *btf,
 +			       __u32 type_id,
 +			       const char *spec_str,
 +			       struct bpf_core_spec *spec)
 +{
 +	int access_idx, parsed_len, i;
 +	const struct btf_type *t;
 +	const char *name;
 +	__u32 id;
 +	__s64 sz;
 +
 +	if (str_is_empty(spec_str) || *spec_str == ':')
 +		return -EINVAL;
 +
 +	memset(spec, 0, sizeof(*spec));
 +	spec->btf = btf;
 +
 +	/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
 +	while (*spec_str) {
 +		if (*spec_str == ':')
 +			++spec_str;
 +		if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
 +			return -EINVAL;
 +		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
 +			return -E2BIG;
 +		spec_str += parsed_len;
 +		spec->raw_spec[spec->raw_len++] = access_idx;
 +	}
 +
 +	if (spec->raw_len == 0)
 +		return -EINVAL;
 +
 +	/* first spec value is always reloc type array index */
 +	t = skip_mods_and_typedefs(btf, type_id, &id);
 +	if (!t)
 +		return -EINVAL;
 +
 +	access_idx = spec->raw_spec[0];
 +	spec->spec[0].type_id = id;
 +	spec->spec[0].idx = access_idx;
 +	spec->len++;
 +
 +	sz = btf__resolve_size(btf, id);
 +	if (sz < 0)
 +		return sz;
 +	spec->offset = access_idx * sz;
 +
 +	for (i = 1; i < spec->raw_len; i++) {
 +		t = skip_mods_and_typedefs(btf, id, &id);
 +		if (!t)
 +			return -EINVAL;
 +
 +		access_idx = spec->raw_spec[i];
 +
 +		if (btf_is_composite(t)) {
 +			const struct btf_member *m;
 +			__u32 offset;
 +
 +			if (access_idx >= btf_vlen(t))
 +				return -EINVAL;
 +			if (btf_member_bitfield_size(t, access_idx))
 +				return -EINVAL;
 +
 +			offset = btf_member_bit_offset(t, access_idx);
 +			if (offset % 8)
 +				return -EINVAL;
 +			spec->offset += offset / 8;
 +
 +			m = btf_members(t) + access_idx;
 +			if (m->name_off) {
 +				name = btf__name_by_offset(btf, m->name_off);
 +				if (str_is_empty(name))
 +					return -EINVAL;
 +
 +				spec->spec[spec->len].type_id = id;
 +				spec->spec[spec->len].idx = access_idx;
 +				spec->spec[spec->len].name = name;
 +				spec->len++;
 +			}
 +
 +			id = m->type;
 +		} else if (btf_is_array(t)) {
 +			const struct btf_array *a = btf_array(t);
 +
 +			t = skip_mods_and_typedefs(btf, a->type, &id);
 +			if (!t || access_idx >= a->nelems)
 +				return -EINVAL;
 +
 +			spec->spec[spec->len].type_id = id;
 +			spec->spec[spec->len].idx = access_idx;
 +			spec->len++;
 +
 +			sz = btf__resolve_size(btf, id);
 +			if (sz < 0)
 +				return sz;
 +			spec->offset += access_idx * sz;
 +		} else {
 +			pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
 +				   type_id, spec_str, i, id, btf_kind(t));
 +			return -EINVAL;
 +		}
 +	}
 +
 +	return 0;
 +}
 +
 +static bool bpf_core_is_flavor_sep(const char *s)
 +{
 +	/* check X___Y name pattern, where X and Y are not underscores */
 +	return s[0] != '_' &&				      /* X */
 +	       s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
 +	       s[4] != '_';				      /* Y */
 +}
 +
 +/* Given 'some_struct_name___with_flavor' return the length of a name prefix
 + * before last triple underscore. Struct name part after last triple
 + * underscore is ignored by BPF CO-RE relocation during relocation matching.
 + */
 +static size_t bpf_core_essential_name_len(const char *name)
 +{
 +	size_t n = strlen(name);
 +	int i;
 +
 +	for (i = n - 5; i >= 0; i--) {
 +		if (bpf_core_is_flavor_sep(name + i))
 +			return i + 1;
 +	}
 +	return n;
 +}
 +
 +/* dynamically sized list of type IDs */
 +struct ids_vec {
 +	__u32 *data;
 +	int len;
 +};
 +
 +static void bpf_core_free_cands(struct ids_vec *cand_ids)
 +{
 +	free(cand_ids->data);
 +	free(cand_ids);
 +}
 +
 +static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 +					   __u32 local_type_id,
 +					   const struct btf *targ_btf)
 +{
 +	size_t local_essent_len, targ_essent_len;
 +	const char *local_name, *targ_name;
 +	const struct btf_type *t;
 +	struct ids_vec *cand_ids;
 +	__u32 *new_ids;
 +	int i, err, n;
 +
 +	t = btf__type_by_id(local_btf, local_type_id);
 +	if (!t)
 +		return ERR_PTR(-EINVAL);
 +
 +	local_name = btf__name_by_offset(local_btf, t->name_off);
 +	if (str_is_empty(local_name))
 +		return ERR_PTR(-EINVAL);
 +	local_essent_len = bpf_core_essential_name_len(local_name);
 +
 +	cand_ids = calloc(1, sizeof(*cand_ids));
 +	if (!cand_ids)
 +		return ERR_PTR(-ENOMEM);
 +
 +	n = btf__get_nr_types(targ_btf);
 +	for (i = 1; i <= n; i++) {
 +		t = btf__type_by_id(targ_btf, i);
 +		targ_name = btf__name_by_offset(targ_btf, t->name_off);
 +		if (str_is_empty(targ_name))
 +			continue;
 +
 +		targ_essent_len = bpf_core_essential_name_len(targ_name);
 +		if (targ_essent_len != local_essent_len)
 +			continue;
 +
 +		if (strncmp(local_name, targ_name, local_essent_len) == 0) {
 +			pr_debug("[%d] %s: found candidate [%d] %s\n",
 +				 local_type_id, local_name, i, targ_name);
 +			new_ids = realloc(cand_ids->data, cand_ids->len + 1);
 +			if (!new_ids) {
 +				err = -ENOMEM;
 +				goto err_out;
 +			}
 +			cand_ids->data = new_ids;
 +			cand_ids->data[cand_ids->len++] = i;
 +		}
 +	}
 +	return cand_ids;
 +err_out:
 +	bpf_core_free_cands(cand_ids);
 +	return ERR_PTR(err);
 +}
 +
 +/* Check two types for compatibility, skipping const/volatile/restrict and
 + * typedefs, to ensure we are relocating offset to the compatible entities:
 + *   - any two STRUCTs/UNIONs are compatible and can be mixed;
 + *   - any two FWDs are compatible;
 + *   - any two PTRs are always compatible;
 + *   - for ENUMs, check sizes, names are ignored;
 + *   - for INT, size and bitness should match, signedness is ignored;
 + *   - for ARRAY, dimensionality is ignored, element types are checked for
 + *     compatibility recursively;
 + *   - everything else shouldn't be ever a target of relocation.
 + * These rules are not set in stone and probably will be adjusted as we get
 + * more experience with using BPF CO-RE relocations.
 + */
 +static int bpf_core_fields_are_compat(const struct btf *local_btf,
 +				      __u32 local_id,
 +				      const struct btf *targ_btf,
 +				      __u32 targ_id)
 +{
 +	const struct btf_type *local_type, *targ_type;
 +
 +recur:
 +	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
 +	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
 +	if (!local_type || !targ_type)
 +		return -EINVAL;
 +
 +	if (btf_is_composite(local_type) && btf_is_composite(targ_type))
 +		return 1;
 +	if (btf_kind(local_type) != btf_kind(targ_type))
 +		return 0;
 +
 +	switch (btf_kind(local_type)) {
 +	case BTF_KIND_FWD:
 +	case BTF_KIND_PTR:
 +		return 1;
 +	case BTF_KIND_ENUM:
 +		return local_type->size == targ_type->size;
 +	case BTF_KIND_INT:
 +		return btf_int_offset(local_type) == 0 &&
 +		       btf_int_offset(targ_type) == 0 &&
 +		       local_type->size == targ_type->size &&
 +		       btf_int_bits(local_type) == btf_int_bits(targ_type);
 +	case BTF_KIND_ARRAY:
 +		local_id = btf_array(local_type)->type;
 +		targ_id = btf_array(targ_type)->type;
 +		goto recur;
 +	default:
 +		pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n",
 +			   btf_kind(local_type), local_id, targ_id);
 +		return 0;
 +	}
 +}
 +
 +/*
 + * Given single high-level named field accessor in local type, find
 + * corresponding high-level accessor for a target type. Along the way,
 + * maintain low-level spec for target as well. Also keep updating target
 + * offset.
 + *
 + * Searching is performed through recursive exhaustive enumeration of all
 + * fields of a struct/union. If there are any anonymous (embedded)
 + * structs/unions, they are recursively searched as well. If field with
 + * desired name is found, check compatibility between local and target types,
 + * before returning result.
 + *
 + * 1 is returned, if field is found.
 + * 0 is returned if no compatible field is found.
 + * <0 is returned on error.
 + */
 +static int bpf_core_match_member(const struct btf *local_btf,
 +				 const struct bpf_core_accessor *local_acc,
 +				 const struct btf *targ_btf,
 +				 __u32 targ_id,
 +				 struct bpf_core_spec *spec,
 +				 __u32 *next_targ_id)
 +{
 +	const struct btf_type *local_type, *targ_type;
 +	const struct btf_member *local_member, *m;
 +	const char *local_name, *targ_name;
 +	__u32 local_id;
 +	int i, n, found;
 +
 +	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
 +	if (!targ_type)
 +		return -EINVAL;
 +	if (!btf_is_composite(targ_type))
 +		return 0;
 +
 +	local_id = local_acc->type_id;
 +	local_type = btf__type_by_id(local_btf, local_id);
 +	local_member = btf_members(local_type) + local_acc->idx;
 +	local_name = btf__name_by_offset(local_btf, local_member->name_off);
 +
 +	n = btf_vlen(targ_type);
 +	m = btf_members(targ_type);
 +	for (i = 0; i < n; i++, m++) {
 +		__u32 offset;
 +
 +		/* bitfield relocations not supported */
 +		if (btf_member_bitfield_size(targ_type, i))
 +			continue;
 +		offset = btf_member_bit_offset(targ_type, i);
 +		if (offset % 8)
 +			continue;
 +
 +		/* too deep struct/union/array nesting */
 +		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
 +			return -E2BIG;
 +
 +		/* speculate this member will be the good one */
 +		spec->offset += offset / 8;
 +		spec->raw_spec[spec->raw_len++] = i;
 +
 +		targ_name = btf__name_by_offset(targ_btf, m->name_off);
 +		if (str_is_empty(targ_name)) {
 +			/* embedded struct/union, we need to go deeper */
 +			found = bpf_core_match_member(local_btf, local_acc,
 +						      targ_btf, m->type,
 +						      spec, next_targ_id);
 +			if (found) /* either found or error */
 +				return found;
 +		} else if (strcmp(local_name, targ_name) == 0) {
 +			/* matching named field */
 +			struct bpf_core_accessor *targ_acc;
 +
 +			targ_acc = &spec->spec[spec->len++];
 +			targ_acc->type_id = targ_id;
 +			targ_acc->idx = i;
 +			targ_acc->name = targ_name;
 +
 +			*next_targ_id = m->type;
 +			found = bpf_core_fields_are_compat(local_btf,
 +							   local_member->type,
 +							   targ_btf, m->type);
 +			if (!found)
 +				spec->len--; /* pop accessor */
 +			return found;
 +		}
 +		/* member turned out not to be what we looked for */
 +		spec->offset -= offset / 8;
 +		spec->raw_len--;
 +	}
 +
 +	return 0;
 +}
 +
 +/*
 + * Try to match local spec to a target type and, if successful, produce full
 + * target spec (high-level, low-level + offset).
 + */
 +static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 +			       const struct btf *targ_btf, __u32 targ_id,
 +			       struct bpf_core_spec *targ_spec)
 +{
 +	const struct btf_type *targ_type;
 +	const struct bpf_core_accessor *local_acc;
 +	struct bpf_core_accessor *targ_acc;
 +	int i, sz, matched;
 +
 +	memset(targ_spec, 0, sizeof(*targ_spec));
 +	targ_spec->btf = targ_btf;
 +
 +	local_acc = &local_spec->spec[0];
 +	targ_acc = &targ_spec->spec[0];
 +
 +	for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
 +		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
 +						   &targ_id);
 +		if (!targ_type)
 +			return -EINVAL;
 +
 +		if (local_acc->name) {
 +			matched = bpf_core_match_member(local_spec->btf,
 +							local_acc,
 +							targ_btf, targ_id,
 +							targ_spec, &targ_id);
 +			if (matched <= 0)
 +				return matched;
 +		} else {
 +			/* for i=0, targ_id is already treated as array element
 +			 * type (because it's the original struct), for others
 +			 * we should find array element type first
 +			 */
 +			if (i > 0) {
 +				const struct btf_array *a;
 +
 +				if (!btf_is_array(targ_type))
 +					return 0;
 +
 +				a = btf_array(targ_type);
 +				if (local_acc->idx >= a->nelems)
 +					return 0;
 +				if (!skip_mods_and_typedefs(targ_btf, a->type,
 +							    &targ_id))
 +					return -EINVAL;
 +			}
 +
 +			/* too deep struct/union/array nesting */
 +			if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
 +				return -E2BIG;
 +
 +			targ_acc->type_id = targ_id;
 +			targ_acc->idx = local_acc->idx;
 +			targ_acc->name = NULL;
 +			targ_spec->len++;
 +			targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
 +			targ_spec->raw_len++;
 +
 +			sz = btf__resolve_size(targ_btf, targ_id);
 +			if (sz < 0)
 +				return sz;
 +			targ_spec->offset += local_acc->idx * sz;
 +		}
 +	}
 +
 +	return 1;
 +}
 +
 +/*
 + * Patch relocatable BPF instruction.
 + * Expected insn->imm value is provided for validation, as well as the new
 + * relocated value.
 + *
 + * Currently three kinds of BPF instructions are supported:
 + * 1. rX = <imm> (assignment with immediate operand);
 + * 2. rX += <imm> (arithmetic operations with immediate operand);
 + * 3. *(rX) = <imm> (indirect memory assignment with immediate operand).
 + *
 + * If actual insn->imm value is wrong, bail out.
 + */
 +static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off,
 +			       __u32 orig_off, __u32 new_off)
 +{
 +	struct bpf_insn *insn;
 +	int insn_idx;
 +	__u8 class;
 +
 +	if (insn_off % sizeof(struct bpf_insn))
 +		return -EINVAL;
 +	insn_idx = insn_off / sizeof(struct bpf_insn);
 +
 +	insn = &prog->insns[insn_idx];
 +	class = BPF_CLASS(insn->code);
 +
 +	if (class == BPF_ALU || class == BPF_ALU64) {
 +		if (BPF_SRC(insn->code) != BPF_K)
 +			return -EINVAL;
 +		if (insn->imm != orig_off)
 +			return -EINVAL;
 +		insn->imm = new_off;
 +		pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n",
 +			 bpf_program__title(prog, false),
 +			 insn_idx, orig_off, new_off);
 +	} else {
 +		pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
 +			   bpf_program__title(prog, false),
 +			   insn_idx, insn->code, insn->src_reg, insn->dst_reg,
 +			   insn->off, insn->imm);
 +		return -EINVAL;
 +	}
 +	return 0;
 +}
 +
 +static struct btf *btf_load_raw(const char *path)
 +{
 +	struct btf *btf;
 +	size_t read_cnt;
 +	struct stat st;
 +	void *data;
 +	FILE *f;
 +
 +	if (stat(path, &st))
 +		return ERR_PTR(-errno);
 +
 +	data = malloc(st.st_size);
 +	if (!data)
 +		return ERR_PTR(-ENOMEM);
 +
 +	f = fopen(path, "rb");
 +	if (!f) {
 +		btf = ERR_PTR(-errno);
 +		goto cleanup;
 +	}
 +
 +	read_cnt = fread(data, 1, st.st_size, f);
 +	fclose(f);
 +	if (read_cnt < st.st_size) {
 +		btf = ERR_PTR(-EBADF);
 +		goto cleanup;
 +	}
 +
 +	btf = btf__new(data, read_cnt);
 +
 +cleanup:
 +	free(data);
 +	return btf;
 +}
 +
 +/*
 + * Probe few well-known locations for vmlinux kernel image and try to load BTF
 + * data out of it to use for target BTF.
 + */
 +static struct btf *bpf_core_find_kernel_btf(void)
 +{
 +	struct {
 +		const char *path_fmt;
 +		bool raw_btf;
 +	} locations[] = {
 +		/* try canonical vmlinux BTF through sysfs first */
 +		{ "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
 +		/* fall back to trying to find vmlinux ELF on disk otherwise */
 +		{ "/boot/vmlinux-%1$s" },
 +		{ "/lib/modules/%1$s/vmlinux-%1$s" },
 +		{ "/lib/modules/%1$s/build/vmlinux" },
 +		{ "/usr/lib/modules/%1$s/kernel/vmlinux" },
 +		{ "/usr/lib/debug/boot/vmlinux-%1$s" },
 +		{ "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
 +		{ "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
 +	};
 +	char path[PATH_MAX + 1];
 +	struct utsname buf;
 +	struct btf *btf;
 +	int i;
 +
 +	uname(&buf);
 +
 +	for (i = 0; i < ARRAY_SIZE(locations); i++) {
 +		snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
 +
 +		if (access(path, R_OK))
 +			continue;
 +
 +		if (locations[i].raw_btf)
 +			btf = btf_load_raw(path);
 +		else
 +			btf = btf__parse_elf(path, NULL);
 +
 +		pr_debug("loading kernel BTF '%s': %ld\n",
 +			 path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
 +		if (IS_ERR(btf))
 +			continue;
 +
 +		return btf;
 +	}
 +
 +	pr_warning("failed to find valid kernel BTF\n");
 +	return ERR_PTR(-ESRCH);
 +}
 +
 +/* Output spec definition in the format:
 + * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
 + * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
 + */
 +static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
 +{
 +	const struct btf_type *t;
 +	const char *s;
 +	__u32 type_id;
 +	int i;
 +
 +	type_id = spec->spec[0].type_id;
 +	t = btf__type_by_id(spec->btf, type_id);
 +	s = btf__name_by_offset(spec->btf, t->name_off);
 +	libbpf_print(level, "[%u] %s + ", type_id, s);
 +
 +	for (i = 0; i < spec->raw_len; i++)
 +		libbpf_print(level, "%d%s", spec->raw_spec[i],
 +			     i == spec->raw_len - 1 ? " => " : ":");
 +
 +	libbpf_print(level, "%u @ &x", spec->offset);
 +
 +	for (i = 0; i < spec->len; i++) {
 +		if (spec->spec[i].name)
 +			libbpf_print(level, ".%s", spec->spec[i].name);
 +		else
 +			libbpf_print(level, "[%u]", spec->spec[i].idx);
 +	}
 +
 +}
 +
 +static size_t bpf_core_hash_fn(const void *key, void *ctx)
 +{
 +	return (size_t)key;
 +}
 +
 +static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
 +{
 +	return k1 == k2;
 +}
 +
 +static void *u32_as_hash_key(__u32 x)
 +{
 +	return (void *)(uintptr_t)x;
 +}
 +
 +/*
 + * CO-RE relocate single instruction.
 + *
 + * The outline and important points of the algorithm:
 + * 1. For given local type, find corresponding candidate target types.
 + *    Candidate type is a type with the same "essential" name, ignoring
 + *    everything after last triple underscore (___). E.g., `sample`,
 + *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
 + *    for each other. Names with triple underscore are referred to as
 + *    "flavors" and are useful, among other things, to allow to
 + *    specify/support incompatible variations of the same kernel struct, which
 + *    might differ between different kernel versions and/or build
 + *    configurations.
 + *
 + *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
 + *    converter, when deduplicated BTF of a kernel still contains more than
 + *    one different types with the same name. In that case, ___2, ___3, etc
 + *    are appended starting from second name conflict. But start flavors are
 + *    also useful to be defined "locally", in BPF program, to extract same
 + *    data from incompatible changes between different kernel
 + *    versions/configurations. For instance, to handle field renames between
 + *    kernel versions, one can use two flavors of the struct name with the
 + *    same common name and use conditional relocations to extract that field,
 + *    depending on target kernel version.
 + * 2. For each candidate type, try to match local specification to this
 + *    candidate target type. Matching involves finding corresponding
 + *    high-level spec accessors, meaning that all named fields should match,
 + *    as well as all array accesses should be within the actual bounds. Also,
 + *    types should be compatible (see bpf_core_fields_are_compat for details).
 + * 3. It is supported and expected that there might be multiple flavors
 + *    matching the spec. As long as all the specs resolve to the same set of
 + *    offsets across all candidates, there is not error. If there is any
 + *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
 + *    imprefection of BTF deduplication, which can cause slight duplication of
 + *    the same BTF type, if some directly or indirectly referenced (by
 + *    pointer) type gets resolved to different actual types in different
 + *    object files. If such situation occurs, deduplicated BTF will end up
 + *    with two (or more) structurally identical types, which differ only in
 + *    types they refer to through pointer. This should be OK in most cases and
 + *    is not an error.
 + * 4. Candidate types search is performed by linearly scanning through all
 + *    types in target BTF. It is anticipated that this is overall more
 + *    efficient memory-wise and not significantly worse (if not better)
 + *    CPU-wise compared to prebuilding a map from all local type names to
 + *    a list of candidate type names. It's also sped up by caching resolved
 + *    list of matching candidates per each local "root" type ID, that has at
 + *    least one bpf_offset_reloc associated with it. This list is shared
 + *    between multiple relocations for the same type ID and is updated as some
 + *    of the candidates are pruned due to structural incompatibility.
 + */
 +static int bpf_core_reloc_offset(struct bpf_program *prog,
 +				 const struct bpf_offset_reloc *relo,
 +				 int relo_idx,
 +				 const struct btf *local_btf,
 +				 const struct btf *targ_btf,
 +				 struct hashmap *cand_cache)
 +{
 +	const char *prog_name = bpf_program__title(prog, false);
 +	struct bpf_core_spec local_spec, cand_spec, targ_spec;
 +	const void *type_key = u32_as_hash_key(relo->type_id);
 +	const struct btf_type *local_type, *cand_type;
 +	const char *local_name, *cand_name;
 +	struct ids_vec *cand_ids;
 +	__u32 local_id, cand_id;
 +	const char *spec_str;
 +	int i, j, err;
 +
 +	local_id = relo->type_id;
 +	local_type = btf__type_by_id(local_btf, local_id);
 +	if (!local_type)
 +		return -EINVAL;
 +
 +	local_name = btf__name_by_offset(local_btf, local_type->name_off);
 +	if (str_is_empty(local_name))
 +		return -EINVAL;
 +
 +	spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
 +	if (str_is_empty(spec_str))
 +		return -EINVAL;
 +
 +	err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
 +	if (err) {
 +		pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
 +			   prog_name, relo_idx, local_id, local_name, spec_str,
 +			   err);
 +		return -EINVAL;
 +	}
 +
 +	pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx);
 +	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
 +	libbpf_print(LIBBPF_DEBUG, "\n");
 +
 +	if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
 +		cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
 +		if (IS_ERR(cand_ids)) {
 +			pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
 +				   prog_name, relo_idx, local_id, local_name,
 +				   PTR_ERR(cand_ids));
 +			return PTR_ERR(cand_ids);
 +		}
 +		err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
 +		if (err) {
 +			bpf_core_free_cands(cand_ids);
 +			return err;
 +		}
 +	}
 +
 +	for (i = 0, j = 0; i < cand_ids->len; i++) {
 +		cand_id = cand_ids->data[i];
 +		cand_type = btf__type_by_id(targ_btf, cand_id);
 +		cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
 +
 +		err = bpf_core_spec_match(&local_spec, targ_btf,
 +					  cand_id, &cand_spec);
 +		pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
 +			 prog_name, relo_idx, i, cand_name);
 +		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
 +		libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
 +		if (err < 0) {
 +			pr_warning("prog '%s': relo #%d: matching error: %d\n",
 +				   prog_name, relo_idx, err);
 +			return err;
 +		}
 +		if (err == 0)
 +			continue;
 +
 +		if (j == 0) {
 +			targ_spec = cand_spec;
 +		} else if (cand_spec.offset != targ_spec.offset) {
 +			/* if there are many candidates, they should all
 +			 * resolve to the same offset
 +			 */
 +			pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
 +				   prog_name, relo_idx, cand_spec.offset,
 +				   targ_spec.offset);
 +			return -EINVAL;
 +		}
 +
 +		cand_ids->data[j++] = cand_spec.spec[0].type_id;
 +	}
 +
 +	cand_ids->len = j;
 +	if (cand_ids->len == 0) {
 +		pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
 +			   prog_name, relo_idx, local_id, local_name, spec_str);
 +		return -ESRCH;
 +	}
 +
 +	err = bpf_core_reloc_insn(prog, relo->insn_off,
 +				  local_spec.offset, targ_spec.offset);
 +	if (err) {
 +		pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
 +			   prog_name, relo_idx, relo->insn_off, err);
 +		return -EINVAL;
 +	}
 +
 +	return 0;
 +}
 +
 +static int
 +bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)
 +{
 +	const struct btf_ext_info_sec *sec;
 +	const struct bpf_offset_reloc *rec;
 +	const struct btf_ext_info *seg;
 +	struct hashmap_entry *entry;
 +	struct hashmap *cand_cache = NULL;
 +	struct bpf_program *prog;
 +	struct btf *targ_btf;
 +	const char *sec_name;
 +	int i, err = 0;
 +
 +	if (targ_btf_path)
 +		targ_btf = btf__parse_elf(targ_btf_path, NULL);
 +	else
 +		targ_btf = bpf_core_find_kernel_btf();
 +	if (IS_ERR(targ_btf)) {
 +		pr_warning("failed to get target BTF: %ld\n",
 +			   PTR_ERR(targ_btf));
 +		return PTR_ERR(targ_btf);
 +	}
 +
 +	cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
 +	if (IS_ERR(cand_cache)) {
 +		err = PTR_ERR(cand_cache);
 +		goto out;
 +	}
 +
 +	seg = &obj->btf_ext->offset_reloc_info;
 +	for_each_btf_ext_sec(seg, sec) {
 +		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
 +		if (str_is_empty(sec_name)) {
 +			err = -EINVAL;
 +			goto out;
 +		}
 +		prog = bpf_object__find_program_by_title(obj, sec_name);
 +		if (!prog) {
 +			pr_warning("failed to find program '%s' for CO-RE offset relocation\n",
 +				   sec_name);
 +			err = -EINVAL;
 +			goto out;
 +		}
 +
 +		pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
 +			 sec_name, sec->num_info);
 +
 +		for_each_btf_ext_rec(seg, sec, i, rec) {
 +			err = bpf_core_reloc_offset(prog, rec, i, obj->btf,
 +						    targ_btf, cand_cache);
 +			if (err) {
 +				pr_warning("prog '%s': relo #%d: failed to relocate: %d\n",
 +					   sec_name, i, err);
 +				goto out;
 +			}
 +		}
 +	}
 +
 +out:
 +	btf__free(targ_btf);
 +	if (!IS_ERR_OR_NULL(cand_cache)) {
 +		hashmap__for_each_entry(cand_cache, entry, i) {
 +			bpf_core_free_cands(entry->value);
 +		}
 +		hashmap__free(cand_cache);
 +	}
 +	return err;
 +}
 +
 +static int
 +bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 +{
 +	int err = 0;
 +
 +	if (obj->btf_ext->offset_reloc_info.len)
 +		err = bpf_core_reloc_offsets(obj, targ_btf_path);
 +
 +	return err;
 +}
 +
  static int
  bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
  			struct reloc_desc *relo)
@@@ -3297,21 -2395,14 +3291,21 @@@ bpf_program__relocate(struct bpf_progra
  	return 0;
  }
  
 -
  static int
 -bpf_object__relocate(struct bpf_object *obj)
 +bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
  {
  	struct bpf_program *prog;
  	size_t i;
  	int err;
  
 +	if (obj->btf_ext) {
 +		err = bpf_object__relocate_core(obj, targ_btf_path);
 +		if (err) {
 +			pr_warning("failed to perform CO-RE relocations: %d\n",
 +				   err);
 +			return err;
 +		}
 +	}
  	for (i = 0; i < obj->nr_programs; i++) {
  		prog = &obj->programs[i];
  
@@@ -3366,7 -2457,7 +3360,7 @@@ load_program(struct bpf_program *prog, 
  	char *cp, errmsg[STRERR_BUFSIZE];
  	int log_buf_size = BPF_LOG_BUF_SIZE;
  	char *log_buf;
- 	int ret;
+ 	int btf_fd, ret;
  
  	if (!insns || !insns_cnt)
  		return -EINVAL;
@@@ -3381,7 -2472,12 +3375,12 @@@
  	load_attr.license = license;
  	load_attr.kern_version = kern_version;
  	load_attr.prog_ifindex = prog->prog_ifindex;
- 	load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0;
+ 	/* if .BTF.ext was loaded, kernel supports associated BTF for prog */
+ 	if (prog->obj->btf_ext)
+ 		btf_fd = bpf_object__btf_fd(prog->obj);
+ 	else
+ 		btf_fd = -1;
+ 	load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0;
  	load_attr.func_info = prog->func_info;
  	load_attr.func_info_rec_size = prog->func_info_rec_size;
  	load_attr.func_info_cnt = prog->func_info_cnt;
@@@ -3712,7 -2808,7 +3711,7 @@@ int bpf_object__load_xattr(struct bpf_o
  	obj->loaded = true;
  
  	CHECK_ERR(bpf_object__create_maps(obj), err, out);
 -	CHECK_ERR(bpf_object__relocate(obj), err, out);
 +	CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out);
  	CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
  
  	return 0;
@@@ -5903,13 -4999,15 +5902,15 @@@ int libbpf_num_possible_cpus(void
  	static const char *fcpu = "/sys/devices/system/cpu/possible";
  	int len = 0, n = 0, il = 0, ir = 0;
  	unsigned int start = 0, end = 0;
+ 	int tmp_cpus = 0;
  	static int cpus;
  	char buf[128];
  	int error = 0;
  	int fd = -1;
  
- 	if (cpus > 0)
- 		return cpus;
+ 	tmp_cpus = READ_ONCE(cpus);
+ 	if (tmp_cpus > 0)
+ 		return tmp_cpus;
  
  	fd = open(fcpu, O_RDONLY);
  	if (fd < 0) {
@@@ -5932,7 -5030,7 +5933,7 @@@
  	}
  	buf[len] = '\0';
  
- 	for (ir = 0, cpus = 0; ir <= len; ir++) {
+ 	for (ir = 0, tmp_cpus = 0; ir <= len; ir++) {
  		/* Each sub string separated by ',' has format \d+-\d+ or \d+ */
  		if (buf[ir] == ',' || buf[ir] == '\0') {
  			buf[ir] = '\0';
@@@ -5944,13 -5042,15 +5945,15 @@@
  			} else if (n == 1) {
  				end = start;
  			}
- 			cpus += end - start + 1;
+ 			tmp_cpus += end - start + 1;
  			il = ir + 1;
  		}
  	}
- 	if (cpus <= 0) {
- 		pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu);
+ 	if (tmp_cpus <= 0) {
+ 		pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu);
  		return -EINVAL;
  	}
- 	return cpus;
+ 
+ 	WRITE_ONCE(cpus, tmp_cpus);
+ 	return tmp_cpus;
  }