M: Heiner Kallweit <hkallweit1@gmail.com>
L: netdev@vger.kernel.org
S: Maintained
- F: drivers/net/ethernet/realtek/r8169.c
+ F: drivers/net/ethernet/realtek/r8169*
8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
F: drivers/mux/adgs1408.c
F: Documentation/devicetree/bindings/mux/adi,adgs1408.txt
+ANALOG DEVICES INC ADIN DRIVER
+M: Alexandru Ardelean <alexaundru.ardelean@analog.com>
+L: netdev@vger.kernel.org
+W: http://ez.analog.com/community/linux-device-drivers
+S: Supported
+F: drivers/net/phy/adin.c
+F: Documentation/devicetree/bindings/net/adi,adin.yaml
+
ANALOG DEVICES INC ADIS DRIVER LIBRARY
M: Alexandru Ardelean <alexandru.ardelean@analog.com>
S: Supported
F: Documentation/devicetree/bindings/net/can/
F: drivers/net/can/
F: include/linux/can/dev.h
+F: include/linux/can/led.h
+F: include/linux/can/rx-offload.h
F: include/linux/can/platform/
F: include/uapi/linux/can/error.h
F: include/uapi/linux/can/netlink.h
+F: include/uapi/linux/can/vxcan.h
CAN NETWORK LAYER
M: Oliver Hartkopp <socketcan@hartkopp.net>
F: Documentation/networking/can.rst
F: net/can/
F: include/linux/can/core.h
+F: include/linux/can/skb.h
+F: include/net/netns/can.h
F: include/uapi/linux/can.h
F: include/uapi/linux/can/bcm.h
F: include/uapi/linux/can/raw.h
M: Heiner Kallweit <hkallweit1@gmail.com>
L: netdev@vger.kernel.org
S: Maintained
- F: Documentation/ABI/testing/sysfs-bus-mdio
+ F: Documentation/ABI/testing/sysfs-class-net-phydev
F: Documentation/devicetree/bindings/net/ethernet-phy.yaml
F: Documentation/devicetree/bindings/net/mdio*
F: Documentation/networking/phy.rst
M: Moritz Fischer <mdf@kernel.org>
L: linux-fpga@vger.kernel.org
S: Maintained
- T: git git://git.kernel.org/pub/scm/linux/kernel/git/atull/linux-fpga.git
+ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git
Q: http://patchwork.kernel.org/project/linux-fpga/list/
F: Documentation/fpga/
F: Documentation/driver-api/fpga/
M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
L: dri-devel@lists.freedesktop.org
L: linux-fbdev@vger.kernel.org
- T: git git://github.com/bzolnier/linux.git
+ T: git git://anongit.freedesktop.org/drm/drm-misc
Q: http://patchwork.kernel.org/project/linux-fbdev/list/
S: Maintained
F: Documentation/fb/
F: drivers/perf/fsl_imx8_ddr_perf.c
F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
+ FREESCALE IMX I2C DRIVER
+ M: Oleksij Rempel <o.rempel@pengutronix.de>
+ R: Pengutronix Kernel Team <kernel@pengutronix.de>
+ L: linux-i2c@vger.kernel.org
+ S: Maintained
+ F: drivers/i2c/busses/i2c-imx.c
+ F: Documentation/devicetree/bindings/i2c/i2c-imx.txt
+
FREESCALE IMX LPI2C DRIVER
M: Dong Aisheng <aisheng.dong@nxp.com>
L: linux-i2c@vger.kernel.org
F: drivers/scsi/storvsc_drv.c
F: drivers/uio/uio_hv_generic.c
F: drivers/video/fbdev/hyperv_fb.c
- F: drivers/iommu/hyperv_iommu.c
+ F: drivers/iommu/hyperv-iommu.c
F: net/vmw_vsock/hyperv_transport.c
F: include/clocksource/hyperv_timer.h
F: include/linux/hyperv.h
F: drivers/video/fbdev/i810/
INTEL ASoC DRIVERS
+ M: Cezary Rojewski <cezary.rojewski@intel.com>
M: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
M: Liam Girdwood <liam.r.girdwood@linux.intel.com>
M: Jie Yang <yang.jie@linux.intel.com>
S: Supported
F: drivers/scsi/isci/
+ INTEL CPU family model numbers
+ M: Tony Luck <tony.luck@intel.com>
+ M: x86@kernel.org
+ L: linux-kernel@vger.kernel.org
+ S: Supported
+ F: arch/x86/include/asm/intel-family.h
+
INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
M: Jani Nikula <jani.nikula@linux.intel.com>
M: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
L: linux-fsdevel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
S: Supported
- F: fs/iomap.c
F: fs/iomap/
F: include/linux/iomap.h
W: https://fedorahosted.org/dropwatch/
F: net/core/drop_monitor.c
F: include/uapi/linux/net_dropmon.h
+F: include/net/drop_monitor.h
NETWORKING DRIVERS
M: "David S. Miller" <davem@davemloft.net>
F: include/uapi/linux/nfc.h
F: drivers/nfc/
F: include/linux/platform_data/nfcmrvl.h
-F: include/linux/platform_data/nxp-nci.h
F: Documentation/devicetree/bindings/net/nfc/
NFS, SUNRPC, AND LOCKD CLIENTS
M: GR-Linux-NIC-Dev@marvell.com
L: netdev@vger.kernel.org
S: Supported
-F: drivers/net/ethernet/qlogic/qlge/
+F: drivers/staging/qlge/
QM1D1B0004 MEDIA DRIVER
M: Akihiro Tsukada <tskd08@gmail.com>
F: drivers/net/ethernet/ti/netcp*
TI PCM3060 ASoC CODEC DRIVER
- M: Kirill Marinushkin <kmarinushkin@birdec.tech>
+ M: Kirill Marinushkin <kmarinushkin@birdec.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Maintained
F: Documentation/devicetree/bindings/sound/pcm3060.txt
BCM57508,
BCM57504,
BCM57502,
+ BCM57508_NPAR,
+ BCM57504_NPAR,
+ BCM57502_NPAR,
BCM58802,
BCM58804,
BCM58808,
[BCM57508] = { "Broadcom BCM57508 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" },
[BCM57504] = { "Broadcom BCM57504 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" },
[BCM57502] = { "Broadcom BCM57502 NetXtreme-E 10Gb/25Gb/50Gb Ethernet" },
+ [BCM57508_NPAR] = { "Broadcom BCM57508 NetXtreme-E Ethernet Partition" },
+ [BCM57504_NPAR] = { "Broadcom BCM57504 NetXtreme-E Ethernet Partition" },
+ [BCM57502_NPAR] = { "Broadcom BCM57502 NetXtreme-E Ethernet Partition" },
[BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" },
[BCM58804] = { "Broadcom BCM58804 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
[BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
{ PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 },
{ PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 },
{ PCI_VDEVICE(BROADCOM, 0x1752), .driver_data = BCM57502 },
+ { PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57508_NPAR },
+ { PCI_VDEVICE(BROADCOM, 0x1801), .driver_data = BCM57504_NPAR },
+ { PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57502_NPAR },
+ { PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57508_NPAR },
+ { PCI_VDEVICE(BROADCOM, 0x1804), .driver_data = BCM57504_NPAR },
+ { PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57502_NPAR },
{ PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 },
{ PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 },
#ifdef CONFIG_BNXT_SRIOV
return 0;
}
-static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 cp_cons,
- u32 agg_bufs)
+static struct rx_agg_cmp *bnxt_get_agg(struct bnxt *bp,
+ struct bnxt_cp_ring_info *cpr,
+ u16 cp_cons, u16 curr)
+{
+ struct rx_agg_cmp *agg;
+
+ cp_cons = RING_CMP(ADV_RAW_CMP(cp_cons, curr));
+ agg = (struct rx_agg_cmp *)
+ &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+ return agg;
+}
+
+static struct rx_agg_cmp *bnxt_get_tpa_agg_p5(struct bnxt *bp,
+ struct bnxt_rx_ring_info *rxr,
+ u16 agg_id, u16 curr)
+{
+ struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[agg_id];
+
+ return &tpa_info->agg_arr[curr];
+}
+
+static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
+ u16 start, u32 agg_bufs, bool tpa)
{
struct bnxt_napi *bnapi = cpr->bnapi;
struct bnxt *bp = bnapi->bp;
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
u16 prod = rxr->rx_agg_prod;
u16 sw_prod = rxr->rx_sw_agg_prod;
+ bool p5_tpa = false;
u32 i;
+ if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
+ p5_tpa = true;
+
for (i = 0; i < agg_bufs; i++) {
u16 cons;
struct rx_agg_cmp *agg;
struct rx_bd *prod_bd;
struct page *page;
- agg = (struct rx_agg_cmp *)
- &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+ if (p5_tpa)
+ agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, start + i);
+ else
+ agg = bnxt_get_agg(bp, cpr, idx, start + i);
cons = agg->rx_agg_cmp_opaque;
__clear_bit(cons, rxr->rx_agg_bmap);
prod = NEXT_RX_AGG(prod);
sw_prod = NEXT_RX_AGG(sw_prod);
- cp_cons = NEXT_CMP(cp_cons);
}
rxr->rx_agg_prod = prod;
rxr->rx_sw_agg_prod = sw_prod;
{
unsigned int payload = offset_and_len >> 16;
unsigned int len = offset_and_len & 0xffff;
- struct skb_frag_struct *frag;
+ skb_frag_t *frag;
struct page *page = data;
u16 prod = rxr->rx_prod;
struct sk_buff *skb;
frag = &skb_shinfo(skb)->frags[0];
skb_frag_size_sub(frag, payload);
- frag->page_offset += payload;
+ skb_frag_off_add(frag, payload);
skb->data_len -= payload;
skb->tail += payload;
static struct sk_buff *bnxt_rx_pages(struct bnxt *bp,
struct bnxt_cp_ring_info *cpr,
- struct sk_buff *skb, u16 cp_cons,
- u32 agg_bufs)
+ struct sk_buff *skb, u16 idx,
+ u32 agg_bufs, bool tpa)
{
struct bnxt_napi *bnapi = cpr->bnapi;
struct pci_dev *pdev = bp->pdev;
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
u16 prod = rxr->rx_agg_prod;
+ bool p5_tpa = false;
u32 i;
+ if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
+ p5_tpa = true;
+
for (i = 0; i < agg_bufs; i++) {
u16 cons, frag_len;
struct rx_agg_cmp *agg;
struct page *page;
dma_addr_t mapping;
- agg = (struct rx_agg_cmp *)
- &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+ if (p5_tpa)
+ agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, i);
+ else
+ agg = bnxt_get_agg(bp, cpr, idx, i);
cons = agg->rx_agg_cmp_opaque;
frag_len = (le32_to_cpu(agg->rx_agg_cmp_len_flags_type) &
RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT;
* allocated already.
*/
rxr->rx_agg_prod = prod;
- bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs - i);
+ bnxt_reuse_rx_agg_bufs(cpr, idx, i, agg_bufs - i, tpa);
return NULL;
}
skb->truesize += PAGE_SIZE;
prod = NEXT_RX_AGG(prod);
- cp_cons = NEXT_CMP(cp_cons);
}
rxr->rx_agg_prod = prod;
return skb;
} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
struct rx_tpa_end_cmp *tpa_end = cmp;
- agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
- RX_TPA_END_CMP_AGG_BUFS) >>
- RX_TPA_END_CMP_AGG_BUFS_SHIFT;
+ if (bp->flags & BNXT_FLAG_CHIP_P5)
+ return 0;
+
+ agg_bufs = TPA_END_AGG_BUFS(tpa_end);
}
if (agg_bufs) {
rxr->rx_next_cons = 0xffff;
}
+static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
+{
+ struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
+ u16 idx = agg_id & MAX_TPA_P5_MASK;
+
+ if (test_bit(idx, map->agg_idx_bmap))
+ idx = find_first_zero_bit(map->agg_idx_bmap,
+ BNXT_AGG_IDX_BMAP_SIZE);
+ __set_bit(idx, map->agg_idx_bmap);
+ map->agg_id_tbl[agg_id] = idx;
+ return idx;
+}
+
+static void bnxt_free_agg_idx(struct bnxt_rx_ring_info *rxr, u16 idx)
+{
+ struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
+
+ __clear_bit(idx, map->agg_idx_bmap);
+}
+
+static u16 bnxt_lookup_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
+{
+ struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
+
+ return map->agg_id_tbl[agg_id];
+}
+
static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
struct rx_tpa_start_cmp *tpa_start,
struct rx_tpa_start_cmp_ext *tpa_start1)
{
- u8 agg_id = TPA_START_AGG_ID(tpa_start);
- u16 cons, prod;
- struct bnxt_tpa_info *tpa_info;
struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf;
+ struct bnxt_tpa_info *tpa_info;
+ u16 cons, prod, agg_id;
struct rx_bd *prod_bd;
dma_addr_t mapping;
+ if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ agg_id = TPA_START_AGG_ID_P5(tpa_start);
+ agg_id = bnxt_alloc_agg_idx(rxr, agg_id);
+ } else {
+ agg_id = TPA_START_AGG_ID(tpa_start);
+ }
cons = tpa_start->rx_tpa_start_cmp_opaque;
prod = rxr->rx_prod;
cons_rx_buf = &rxr->rx_buf_ring[cons];
prod_rx_buf = &rxr->rx_buf_ring[prod];
tpa_info = &rxr->rx_tpa[agg_id];
- if (unlikely(cons != rxr->rx_next_cons)) {
- netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n",
- cons, rxr->rx_next_cons);
+ if (unlikely(cons != rxr->rx_next_cons ||
+ TPA_START_ERROR(tpa_start))) {
+ netdev_warn(bp->dev, "TPA cons %x, expected cons %x, error code %x\n",
+ cons, rxr->rx_next_cons,
+ TPA_START_ERROR_CODE(tpa_start1));
bnxt_sched_reset(bp, rxr);
return;
}
tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2);
tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata);
tpa_info->hdr_info = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_hdr_info);
+ tpa_info->agg_count = 0;
rxr->rx_prod = NEXT_RX(prod);
cons = NEXT_RX(cons);
cons_rx_buf->data = NULL;
}
-static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 cp_cons,
- u32 agg_bufs)
+static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 idx, u32 agg_bufs)
{
if (agg_bufs)
- bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
+ bnxt_reuse_rx_agg_bufs(cpr, idx, 0, agg_bufs, true);
}
+#ifdef CONFIG_INET
+static void bnxt_gro_tunnel(struct sk_buff *skb, __be16 ip_proto)
+{
+ struct udphdr *uh = NULL;
+
+ if (ip_proto == htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)skb->data;
+
+ if (iph->protocol == IPPROTO_UDP)
+ uh = (struct udphdr *)(iph + 1);
+ } else {
+ struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+
+ if (iph->nexthdr == IPPROTO_UDP)
+ uh = (struct udphdr *)(iph + 1);
+ }
+ if (uh) {
+ if (uh->check)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ else
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+ }
+}
+#endif
+
static struct sk_buff *bnxt_gro_func_5731x(struct bnxt_tpa_info *tpa_info,
int payload_off, int tcp_ts,
struct sk_buff *skb)
}
if (inner_mac_off) { /* tunnel */
- struct udphdr *uh = NULL;
__be16 proto = *((__be16 *)(skb->data + outer_ip_off -
ETH_HLEN - 2));
- if (proto == htons(ETH_P_IP)) {
- struct iphdr *iph = (struct iphdr *)skb->data;
+ bnxt_gro_tunnel(skb, proto);
+ }
+#endif
+ return skb;
+}
- if (iph->protocol == IPPROTO_UDP)
- uh = (struct udphdr *)(iph + 1);
- } else {
- struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+static struct sk_buff *bnxt_gro_func_5750x(struct bnxt_tpa_info *tpa_info,
+ int payload_off, int tcp_ts,
+ struct sk_buff *skb)
+{
+#ifdef CONFIG_INET
+ u16 outer_ip_off, inner_ip_off, inner_mac_off;
+ u32 hdr_info = tpa_info->hdr_info;
+ int iphdr_len, nw_off;
- if (iph->nexthdr == IPPROTO_UDP)
- uh = (struct udphdr *)(iph + 1);
- }
- if (uh) {
- if (uh->check)
- skb_shinfo(skb)->gso_type |=
- SKB_GSO_UDP_TUNNEL_CSUM;
- else
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
- }
+ inner_ip_off = BNXT_TPA_INNER_L3_OFF(hdr_info);
+ inner_mac_off = BNXT_TPA_INNER_L2_OFF(hdr_info);
+ outer_ip_off = BNXT_TPA_OUTER_L3_OFF(hdr_info);
+
+ nw_off = inner_ip_off - ETH_HLEN;
+ skb_set_network_header(skb, nw_off);
+ iphdr_len = (tpa_info->flags2 & RX_TPA_START_CMP_FLAGS2_IP_TYPE) ?
+ sizeof(struct ipv6hdr) : sizeof(struct iphdr);
+ skb_set_transport_header(skb, nw_off + iphdr_len);
+
+ if (inner_mac_off) { /* tunnel */
+ __be16 proto = *((__be16 *)(skb->data + outer_ip_off -
+ ETH_HLEN - 2));
+
+ bnxt_gro_tunnel(skb, proto);
}
#endif
return skb;
return NULL;
}
- if (nw_off) { /* tunnel */
- struct udphdr *uh = NULL;
-
- if (skb->protocol == htons(ETH_P_IP)) {
- struct iphdr *iph = (struct iphdr *)skb->data;
-
- if (iph->protocol == IPPROTO_UDP)
- uh = (struct udphdr *)(iph + 1);
- } else {
- struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
-
- if (iph->nexthdr == IPPROTO_UDP)
- uh = (struct udphdr *)(iph + 1);
- }
- if (uh) {
- if (uh->check)
- skb_shinfo(skb)->gso_type |=
- SKB_GSO_UDP_TUNNEL_CSUM;
- else
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
- }
- }
+ if (nw_off) /* tunnel */
+ bnxt_gro_tunnel(skb, skb->protocol);
#endif
return skb;
}
skb_shinfo(skb)->gso_size =
le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len);
skb_shinfo(skb)->gso_type = tpa_info->gso_type;
- payload_off = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
- RX_TPA_END_CMP_PAYLOAD_OFFSET) >>
- RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT;
+ if (bp->flags & BNXT_FLAG_CHIP_P5)
+ payload_off = TPA_END_PAYLOAD_OFF_P5(tpa_end1);
+ else
+ payload_off = TPA_END_PAYLOAD_OFF(tpa_end);
skb = bp->gro_func(tpa_info, payload_off, TPA_END_GRO_TS(tpa_end), skb);
if (likely(skb))
tcp_gro_complete(skb);
{
struct bnxt_napi *bnapi = cpr->bnapi;
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
- u8 agg_id = TPA_END_AGG_ID(tpa_end);
u8 *data_ptr, agg_bufs;
- u16 cp_cons = RING_CMP(*raw_cons);
unsigned int len;
struct bnxt_tpa_info *tpa_info;
dma_addr_t mapping;
struct sk_buff *skb;
+ u16 idx = 0, agg_id;
void *data;
+ bool gro;
if (unlikely(bnapi->in_reset)) {
int rc = bnxt_discard_rx(bp, cpr, raw_cons, tpa_end);
return NULL;
}
- tpa_info = &rxr->rx_tpa[agg_id];
+ if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ agg_id = TPA_END_AGG_ID_P5(tpa_end);
+ agg_id = bnxt_lookup_agg_idx(rxr, agg_id);
+ agg_bufs = TPA_END_AGG_BUFS_P5(tpa_end1);
+ tpa_info = &rxr->rx_tpa[agg_id];
+ if (unlikely(agg_bufs != tpa_info->agg_count)) {
+ netdev_warn(bp->dev, "TPA end agg_buf %d != expected agg_bufs %d\n",
+ agg_bufs, tpa_info->agg_count);
+ agg_bufs = tpa_info->agg_count;
+ }
+ tpa_info->agg_count = 0;
+ *event |= BNXT_AGG_EVENT;
+ bnxt_free_agg_idx(rxr, agg_id);
+ idx = agg_id;
+ gro = !!(bp->flags & BNXT_FLAG_GRO);
+ } else {
+ agg_id = TPA_END_AGG_ID(tpa_end);
+ agg_bufs = TPA_END_AGG_BUFS(tpa_end);
+ tpa_info = &rxr->rx_tpa[agg_id];
+ idx = RING_CMP(*raw_cons);
+ if (agg_bufs) {
+ if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons))
+ return ERR_PTR(-EBUSY);
+
+ *event |= BNXT_AGG_EVENT;
+ idx = NEXT_CMP(idx);
+ }
+ gro = !!TPA_END_GRO(tpa_end);
+ }
data = tpa_info->data;
data_ptr = tpa_info->data_ptr;
prefetch(data_ptr);
len = tpa_info->len;
mapping = tpa_info->mapping;
- agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
- RX_TPA_END_CMP_AGG_BUFS) >> RX_TPA_END_CMP_AGG_BUFS_SHIFT;
-
- if (agg_bufs) {
- if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons))
- return ERR_PTR(-EBUSY);
-
- *event |= BNXT_AGG_EVENT;
- cp_cons = NEXT_CMP(cp_cons);
- }
-
if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
- bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
+ bnxt_abort_tpa(cpr, idx, agg_bufs);
if (agg_bufs > MAX_SKB_FRAGS)
netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
agg_bufs, (int)MAX_SKB_FRAGS);
if (len <= bp->rx_copy_thresh) {
skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping);
if (!skb) {
- bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
+ bnxt_abort_tpa(cpr, idx, agg_bufs);
return NULL;
}
} else {
new_data = __bnxt_alloc_rx_data(bp, &new_mapping, GFP_ATOMIC);
if (!new_data) {
- bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
+ bnxt_abort_tpa(cpr, idx, agg_bufs);
return NULL;
}
if (!skb) {
kfree(data);
- bnxt_abort_tpa(cpr, cp_cons, agg_bufs);
+ bnxt_abort_tpa(cpr, idx, agg_bufs);
return NULL;
}
skb_reserve(skb, bp->rx_offset);
}
if (agg_bufs) {
- skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs);
+ skb = bnxt_rx_pages(bp, cpr, skb, idx, agg_bufs, true);
if (!skb) {
/* Page reuse already handled by bnxt_rx_pages(). */
return NULL;
(tpa_info->flags2 & RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3;
}
- if (TPA_END_GRO(tpa_end))
+ if (gro)
skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb);
return skb;
}
+static void bnxt_tpa_agg(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+ struct rx_agg_cmp *rx_agg)
+{
+ u16 agg_id = TPA_AGG_AGG_ID(rx_agg);
+ struct bnxt_tpa_info *tpa_info;
+
+ agg_id = bnxt_lookup_agg_idx(rxr, agg_id);
+ tpa_info = &rxr->rx_tpa[agg_id];
+ BUG_ON(tpa_info->agg_count >= MAX_SKB_FRAGS);
+ tpa_info->agg_arr[tpa_info->agg_count++] = *rx_agg;
+}
+
static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
struct sk_buff *skb)
{
rxcmp = (struct rx_cmp *)
&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+ cmp_type = RX_CMP_TYPE(rxcmp);
+
+ if (cmp_type == CMP_TYPE_RX_TPA_AGG_CMP) {
+ bnxt_tpa_agg(bp, rxr, (struct rx_agg_cmp *)rxcmp);
+ goto next_rx_no_prod_no_len;
+ }
+
tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
cp_cons = RING_CMP(tmp_raw_cons);
rxcmp1 = (struct rx_cmp_ext *)
if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
return -EBUSY;
- cmp_type = RX_CMP_TYPE(rxcmp);
-
prod = rxr->rx_prod;
if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) {
bnxt_reuse_rx_data(rxr, cons, data);
if (agg_bufs)
- bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
+ bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, agg_bufs,
+ false);
rc = -EIO;
if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
bnxt_reuse_rx_data(rxr, cons, data);
if (!skb) {
if (agg_bufs)
- bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
+ bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0,
+ agg_bufs, false);
rc = -ENOMEM;
goto next_rx;
}
}
if (agg_bufs) {
- skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs);
+ skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs, false);
if (!skb) {
rc = -ENOMEM;
goto next_rx;
if (bnapi->events & BNXT_RX_EVENT) {
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
- bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
if (bnapi->events & BNXT_AGG_EVENT)
bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+ bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
}
bnapi->events = 0;
}
max_agg_idx = bp->rx_agg_nr_pages * RX_DESC_CNT;
for (i = 0; i < bp->rx_nr_rings; i++) {
struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+ struct bnxt_tpa_idx_map *map;
int j;
if (rxr->rx_tpa) {
- for (j = 0; j < MAX_TPA; j++) {
+ for (j = 0; j < bp->max_tpa; j++) {
struct bnxt_tpa_info *tpa_info =
&rxr->rx_tpa[j];
u8 *data = tpa_info->data;
__free_page(rxr->rx_page);
rxr->rx_page = NULL;
}
+ map = rxr->rx_tpa_idx_map;
+ if (map)
+ memset(map->agg_idx_bmap, 0, sizeof(map->agg_idx_bmap));
}
}
return 0;
}
+static void bnxt_free_tpa_info(struct bnxt *bp)
+{
+ int i;
+
+ for (i = 0; i < bp->rx_nr_rings; i++) {
+ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+
+ kfree(rxr->rx_tpa_idx_map);
+ rxr->rx_tpa_idx_map = NULL;
+ if (rxr->rx_tpa) {
+ kfree(rxr->rx_tpa[0].agg_arr);
+ rxr->rx_tpa[0].agg_arr = NULL;
+ }
+ kfree(rxr->rx_tpa);
+ rxr->rx_tpa = NULL;
+ }
+}
+
+static int bnxt_alloc_tpa_info(struct bnxt *bp)
+{
+ int i, j, total_aggs = 0;
+
+ bp->max_tpa = MAX_TPA;
+ if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (!bp->max_tpa_v2)
+ return 0;
+ bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
+ total_aggs = bp->max_tpa * MAX_SKB_FRAGS;
+ }
+
+ for (i = 0; i < bp->rx_nr_rings; i++) {
+ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+ struct rx_agg_cmp *agg;
+
+ rxr->rx_tpa = kcalloc(bp->max_tpa, sizeof(struct bnxt_tpa_info),
+ GFP_KERNEL);
+ if (!rxr->rx_tpa)
+ return -ENOMEM;
+
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ continue;
+ agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL);
+ rxr->rx_tpa[0].agg_arr = agg;
+ if (!agg)
+ return -ENOMEM;
+ for (j = 1; j < bp->max_tpa; j++)
+ rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS;
+ rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
+ GFP_KERNEL);
+ if (!rxr->rx_tpa_idx_map)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
static void bnxt_free_rx_rings(struct bnxt *bp)
{
int i;
if (!bp->rx_ring)
return;
+ bnxt_free_tpa_info(bp);
for (i = 0; i < bp->rx_nr_rings; i++) {
struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
struct bnxt_ring_struct *ring;
page_pool_destroy(rxr->page_pool);
rxr->page_pool = NULL;
- kfree(rxr->rx_tpa);
- rxr->rx_tpa = NULL;
-
kfree(rxr->rx_agg_bmap);
rxr->rx_agg_bmap = NULL;
static int bnxt_alloc_rx_rings(struct bnxt *bp)
{
- int i, rc, agg_rings = 0, tpa_rings = 0;
+ int i, rc = 0, agg_rings = 0;
if (!bp->rx_ring)
return -ENOMEM;
if (bp->flags & BNXT_FLAG_AGG_RINGS)
agg_rings = 1;
- if (bp->flags & BNXT_FLAG_TPA)
- tpa_rings = 1;
-
for (i = 0; i < bp->rx_nr_rings; i++) {
struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
struct bnxt_ring_struct *ring;
rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL);
if (!rxr->rx_agg_bmap)
return -ENOMEM;
-
- if (tpa_rings) {
- rxr->rx_tpa = kcalloc(MAX_TPA,
- sizeof(struct bnxt_tpa_info),
- GFP_KERNEL);
- if (!rxr->rx_tpa)
- return -ENOMEM;
- }
}
}
- return 0;
+ if (bp->flags & BNXT_FLAG_TPA)
+ rc = bnxt_alloc_tpa_info(bp);
+ return rc;
}
static void bnxt_free_tx_rings(struct bnxt *bp)
u8 *data;
dma_addr_t mapping;
- for (i = 0; i < MAX_TPA; i++) {
+ for (i = 0; i < bp->max_tpa; i++) {
data = __bnxt_alloc_rx_data(bp, &mapping,
GFP_KERNEL);
if (!data)
if (!bp->bnapi)
return;
- size = sizeof(struct ctx_hw_stats);
+ size = bp->hw_ring_stats_size;
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
u32 size, i;
struct pci_dev *pdev = bp->pdev;
- size = sizeof(struct ctx_hw_stats);
+ size = bp->hw_ring_stats_size;
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
{
struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
+ u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX;
struct hwrm_vnic_tpa_cfg_input req = {0};
if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
nsegs = (MAX_SKB_FRAGS - n) / n;
}
- segs = ilog2(nsegs);
+ if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ segs = MAX_TPA_SEGS_P5;
+ max_aggs = bp->max_tpa;
+ } else {
+ segs = ilog2(nsegs);
+ }
req.max_agg_segs = cpu_to_le16(segs);
- req.max_aggs = cpu_to_le16(VNIC_TPA_CFG_REQ_MAX_AGGS_MAX);
+ req.max_aggs = cpu_to_le16(max_aggs);
req.min_agg_len = cpu_to_le32(512);
}
if (flags &
VNIC_QCAPS_RESP_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP)
bp->flags |= BNXT_FLAG_ROCE_MIRROR_CAP;
+ bp->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported);
+ if (bp->max_tpa_v2)
+ bp->hw_ring_stats_size =
+ sizeof(struct ctx_hw_stats_ext);
+ else
+ bp->hw_ring_stats_size = sizeof(struct ctx_hw_stats);
}
mutex_unlock(&bp->hwrm_cmd_lock);
return rc;
static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
{
+ bool agg_rings = !!(bp->flags & BNXT_FLAG_AGG_RINGS);
int i, rc = 0;
u32 type;
if (rc)
goto err_out;
bnxt_set_db(bp, &rxr->rx_db, type, map_idx, ring->fw_ring_id);
- bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
+ /* If we have agg rings, post agg buffers first. */
+ if (!agg_rings)
+ bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id;
if (bp->flags & BNXT_FLAG_CHIP_P5) {
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
}
}
- if (bp->flags & BNXT_FLAG_AGG_RINGS) {
+ if (agg_rings) {
type = HWRM_RING_ALLOC_AGG;
for (i = 0; i < bp->rx_nr_rings; i++) {
struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
bnxt_set_db(bp, &rxr->rx_agg_db, type, map_idx,
ring->fw_ring_id);
bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+ bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
bp->grp_info[grp_idx].agg_fw_ring_id = ring->fw_ring_id;
}
}
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1);
+ req.stats_dma_length = cpu_to_le16(bp->hw_ring_stats_size);
req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
mutex_lock(&bp->hwrm_cmd_lock);
bnxt_hwrm_vnic_set_rss(bp, i, false);
}
- static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
- bool irq_re_init)
+ static void bnxt_clear_vnic(struct bnxt *bp)
{
- if (bp->vnic_info) {
- bnxt_hwrm_clear_vnic_filter(bp);
+ if (!bp->vnic_info)
+ return;
+
+ bnxt_hwrm_clear_vnic_filter(bp);
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5)) {
/* clear all RSS setting before free vnic ctx */
bnxt_hwrm_clear_vnic_rss(bp);
bnxt_hwrm_vnic_ctx_free(bp);
- /* before free the vnic, undo the vnic tpa settings */
- if (bp->flags & BNXT_FLAG_TPA)
- bnxt_set_tpa(bp, false);
- bnxt_hwrm_vnic_free(bp);
}
+ /* before free the vnic, undo the vnic tpa settings */
+ if (bp->flags & BNXT_FLAG_TPA)
+ bnxt_set_tpa(bp, false);
+ bnxt_hwrm_vnic_free(bp);
+ if (bp->flags & BNXT_FLAG_CHIP_P5)
+ bnxt_hwrm_vnic_ctx_free(bp);
+ }
+
+ static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
+ bool irq_re_init)
+ {
+ bnxt_clear_vnic(bp);
bnxt_hwrm_ring_free(bp, close_path);
bnxt_hwrm_ring_grp_free(bp);
if (irq_re_init) {
if (changes & BNXT_FLAG_TPA) {
update_tpa = true;
if ((bp->flags & BNXT_FLAG_TPA) == 0 ||
- (flags & BNXT_FLAG_TPA) == 0)
+ (flags & BNXT_FLAG_TPA) == 0 ||
+ (bp->flags & BNXT_FLAG_CHIP_P5))
re_init = true;
}
if (flags != bp->flags) {
u32 old_flags = bp->flags;
- bp->flags = flags;
-
if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
+ bp->flags = flags;
if (update_tpa)
bnxt_set_ring_params(bp);
return rc;
if (re_init) {
bnxt_close_nic(bp, false, false);
+ bp->flags = flags;
if (update_tpa)
bnxt_set_ring_params(bp);
return bnxt_open_nic(bp, false, false);
}
if (update_tpa) {
+ bp->flags = flags;
rc = bnxt_set_tpa(bp,
(flags & BNXT_FLAG_TPA) ?
true : false);
bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
}
+static int bnxt_fw_init_one_p1(struct bnxt *bp)
+{
+ int rc;
+
+ bp->fw_cap = 0;
+ rc = bnxt_hwrm_ver_get(bp);
+ if (rc)
+ return rc;
+
+ if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
+ rc = bnxt_alloc_kong_hwrm_resources(bp);
+ if (rc)
+ bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL;
+ }
+
+ if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
+ bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
+ rc = bnxt_alloc_hwrm_short_cmd_req(bp);
+ if (rc)
+ return rc;
+ }
+ rc = bnxt_hwrm_func_reset(bp);
+ if (rc)
+ return -ENODEV;
+
+ bnxt_hwrm_fw_set_time(bp);
+ return 0;
+}
+
+static int bnxt_fw_init_one_p2(struct bnxt *bp)
+{
+ int rc;
+
+ /* Get the MAX capabilities for this function */
+ rc = bnxt_hwrm_func_qcaps(bp);
+ if (rc) {
+ netdev_err(bp->dev, "hwrm query capability failure rc: %x\n",
+ rc);
+ return -ENODEV;
+ }
+
+ rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp);
+ if (rc)
+ netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
+ rc);
+
+ rc = bnxt_hwrm_func_drv_rgtr(bp);
+ if (rc)
+ return -ENODEV;
+
+ rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
+ if (rc)
+ return -ENODEV;
+
+ bnxt_hwrm_func_qcfg(bp);
+ bnxt_hwrm_vnic_qcaps(bp);
+ bnxt_hwrm_port_led_qcaps(bp);
+ bnxt_ethtool_init(bp);
+ bnxt_dcb_init(bp);
+ return 0;
+}
+
static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
{
int rc;
goto init_err_pci_clean;
mutex_init(&bp->hwrm_cmd_lock);
- rc = bnxt_hwrm_ver_get(bp);
+
+ rc = bnxt_fw_init_one_p1(bp);
if (rc)
goto init_err_pci_clean;
- if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
- rc = bnxt_alloc_kong_hwrm_resources(bp);
- if (rc)
- bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL;
- }
-
- if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
- bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
- rc = bnxt_alloc_hwrm_short_cmd_req(bp);
- if (rc)
- goto init_err_pci_clean;
- }
-
if (BNXT_CHIP_P5(bp))
bp->flags |= BNXT_FLAG_CHIP_P5;
- rc = bnxt_hwrm_func_reset(bp);
+ rc = bnxt_fw_init_one_p2(bp);
if (rc)
goto init_err_pci_clean;
- bnxt_hwrm_fw_set_time(bp);
-
dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
NETIF_F_TSO | NETIF_F_TSO6 |
NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
bp->gro_func = bnxt_gro_func_5730x;
if (BNXT_CHIP_P4(bp))
bp->gro_func = bnxt_gro_func_5731x;
+ else if (BNXT_CHIP_P5(bp))
+ bp->gro_func = bnxt_gro_func_5750x;
}
if (!BNXT_CHIP_P4_PLUS(bp))
bp->flags |= BNXT_FLAG_DOUBLE_DB;
- rc = bnxt_hwrm_func_drv_rgtr(bp);
- if (rc)
- goto init_err_pci_clean;
-
- rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
- if (rc)
- goto init_err_pci_clean;
-
bp->ulp_probe = bnxt_ulp_probe;
- rc = bnxt_hwrm_queue_qportcfg(bp);
- if (rc) {
- netdev_err(bp->dev, "hwrm query qportcfg failure rc: %x\n",
- rc);
- rc = -1;
- goto init_err_pci_clean;
- }
- /* Get the MAX capabilities for this function */
- rc = bnxt_hwrm_func_qcaps(bp);
- if (rc) {
- netdev_err(bp->dev, "hwrm query capability failure rc: %x\n",
- rc);
- rc = -1;
- goto init_err_pci_clean;
- }
-
- rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp);
- if (rc)
- netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
- rc);
-
rc = bnxt_init_mac_addr(bp);
if (rc) {
dev_err(&pdev->dev, "Unable to initialize mac address.\n");
if (rc)
goto init_err_pci_clean;
}
- bnxt_hwrm_func_qcfg(bp);
- bnxt_hwrm_vnic_qcaps(bp);
- bnxt_hwrm_port_led_qcaps(bp);
- bnxt_ethtool_init(bp);
- bnxt_dcb_init(bp);
/* MTU range: 60 - FW defined max */
dev->min_mtu = ETH_ZLEN;
#ifdef CONFIG_PM_SLEEP
static int bnxt_suspend(struct device *device)
{
- struct pci_dev *pdev = to_pci_dev(device);
- struct net_device *dev = pci_get_drvdata(pdev);
+ struct net_device *dev = dev_get_drvdata(device);
struct bnxt *bp = netdev_priv(dev);
int rc = 0;
static int bnxt_resume(struct device *device)
{
- struct pci_dev *pdev = to_pci_dev(device);
- struct net_device *dev = pci_get_drvdata(pdev);
+ struct net_device *dev = dev_get_drvdata(device);
struct bnxt *bp = netdev_priv(dev);
int rc = 0;
return rc;
}
-#define BNXT_NUM_STATS 22
+static const char * const bnxt_ring_stats_str[] = {
+ "rx_ucast_packets",
+ "rx_mcast_packets",
+ "rx_bcast_packets",
+ "rx_discards",
+ "rx_drops",
+ "rx_ucast_bytes",
+ "rx_mcast_bytes",
+ "rx_bcast_bytes",
+ "tx_ucast_packets",
+ "tx_mcast_packets",
+ "tx_bcast_packets",
+ "tx_discards",
+ "tx_drops",
+ "tx_ucast_bytes",
+ "tx_mcast_bytes",
+ "tx_bcast_bytes",
+};
+
+static const char * const bnxt_ring_tpa_stats_str[] = {
+ "tpa_packets",
+ "tpa_bytes",
+ "tpa_events",
+ "tpa_aborts",
+};
+
+static const char * const bnxt_ring_tpa2_stats_str[] = {
+ "rx_tpa_eligible_pkt",
+ "rx_tpa_eligible_bytes",
+ "rx_tpa_pkt",
+ "rx_tpa_bytes",
+ "rx_tpa_errors",
+};
+
+static const char * const bnxt_ring_sw_stats_str[] = {
+ "rx_l4_csum_errors",
+ "missed_irqs",
+};
#define BNXT_RX_STATS_ENTRY(counter) \
{ BNXT_RX_STATS_OFFSET(counter), __stringify(counter) }
BNXT_TX_STATS_EXT_COS_ENTRY(6), \
BNXT_TX_STATS_EXT_COS_ENTRY(7) \
+#define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(n) \
+ BNXT_RX_STATS_EXT_ENTRY(rx_discard_bytes_cos##n), \
+ BNXT_RX_STATS_EXT_ENTRY(rx_discard_packets_cos##n)
+
+#define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES \
+ BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(0), \
+ BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(1), \
+ BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(2), \
+ BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(3), \
+ BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(4), \
+ BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(5), \
+ BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(6), \
+ BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(7)
+
#define BNXT_RX_STATS_PRI_ENTRY(counter, n) \
{ BNXT_RX_STATS_EXT_OFFSET(counter##_cos0), \
__stringify(counter##_pri##n) }
BNXT_RX_STATS_EXT_ENTRY(rx_buffer_passed_threshold),
BNXT_RX_STATS_EXT_ENTRY(rx_pcs_symbol_err),
BNXT_RX_STATS_EXT_ENTRY(rx_corrected_bits),
+ BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES,
};
static const struct {
ARRAY_SIZE(bnxt_tx_pkts_pri_arr))
#define BNXT_NUM_PCIE_STATS ARRAY_SIZE(bnxt_pcie_stats_arr)
+static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp)
+{
+ if (BNXT_SUPPORTS_TPA(bp)) {
+ if (bp->max_tpa_v2)
+ return ARRAY_SIZE(bnxt_ring_tpa2_stats_str);
+ return ARRAY_SIZE(bnxt_ring_tpa_stats_str);
+ }
+ return 0;
+}
+
+static int bnxt_get_num_ring_stats(struct bnxt *bp)
+{
+ int num_stats;
+
+ num_stats = ARRAY_SIZE(bnxt_ring_stats_str) +
+ ARRAY_SIZE(bnxt_ring_sw_stats_str) +
+ bnxt_get_num_tpa_ring_stats(bp);
+ return num_stats * bp->cp_nr_rings;
+}
+
static int bnxt_get_num_stats(struct bnxt *bp)
{
- int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
+ int num_stats = bnxt_get_num_ring_stats(bp);
num_stats += BNXT_NUM_SW_FUNC_STATS;
{
u32 i, j = 0;
struct bnxt *bp = netdev_priv(dev);
- u32 stat_fields = sizeof(struct ctx_hw_stats) / 8;
+ u32 stat_fields = ARRAY_SIZE(bnxt_ring_stats_str) +
+ bnxt_get_num_tpa_ring_stats(bp);
if (!bp->bnapi) {
- j += BNXT_NUM_STATS * bp->cp_nr_rings + BNXT_NUM_SW_FUNC_STATS;
+ j += bnxt_get_num_ring_stats(bp) + BNXT_NUM_SW_FUNC_STATS;
goto skip_ring_stats;
}
static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
{
struct bnxt *bp = netdev_priv(dev);
- u32 i;
+ static const char * const *str;
+ u32 i, j, num_str;
switch (stringset) {
- /* The number of strings must match BNXT_NUM_STATS defined above. */
case ETH_SS_STATS:
for (i = 0; i < bp->cp_nr_rings; i++) {
- sprintf(buf, "[%d]: rx_ucast_packets", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: rx_mcast_packets", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: rx_bcast_packets", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: rx_discards", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: rx_drops", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: rx_ucast_bytes", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: rx_mcast_bytes", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: rx_bcast_bytes", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tx_ucast_packets", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tx_mcast_packets", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tx_bcast_packets", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tx_discards", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tx_drops", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tx_ucast_bytes", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tx_mcast_bytes", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tx_bcast_bytes", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tpa_packets", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tpa_bytes", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tpa_events", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: tpa_aborts", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: rx_l4_csum_errors", i);
- buf += ETH_GSTRING_LEN;
- sprintf(buf, "[%d]: missed_irqs", i);
- buf += ETH_GSTRING_LEN;
+ num_str = ARRAY_SIZE(bnxt_ring_stats_str);
+ for (j = 0; j < num_str; j++) {
+ sprintf(buf, "[%d]: %s", i,
+ bnxt_ring_stats_str[j]);
+ buf += ETH_GSTRING_LEN;
+ }
+ if (!BNXT_SUPPORTS_TPA(bp))
+ goto skip_tpa_stats;
+
+ if (bp->max_tpa_v2) {
+ num_str = ARRAY_SIZE(bnxt_ring_tpa2_stats_str);
+ str = bnxt_ring_tpa2_stats_str;
+ } else {
+ num_str = ARRAY_SIZE(bnxt_ring_tpa_stats_str);
+ str = bnxt_ring_tpa_stats_str;
+ }
+ for (j = 0; j < num_str; j++) {
+ sprintf(buf, "[%d]: %s", i, str[j]);
+ buf += ETH_GSTRING_LEN;
+ }
+skip_tpa_stats:
+ num_str = ARRAY_SIZE(bnxt_ring_sw_stats_str);
+ for (j = 0; j < num_str; j++) {
+ sprintf(buf, "[%d]: %s", i,
+ bnxt_ring_sw_stats_str[j]);
+ buf += ETH_GSTRING_LEN;
+ }
}
for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) {
strcpy(buf, bnxt_sw_func_stats[i].string);
mutex_lock(&bp->hwrm_cmd_lock);
hwrm_err = _hwrm_send_message(bp, &install, sizeof(install),
INSTALL_PACKAGE_TIMEOUT);
- if (hwrm_err)
- goto flash_pkg_exit;
-
- if (resp->error_code) {
+ if (hwrm_err) {
u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err;
- if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
+ if (resp->error_code && error_code ==
+ NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
install.flags |= cpu_to_le16(
NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG);
hwrm_err = _hwrm_send_message(bp, &install,
sizeof(install),
INSTALL_PACKAGE_TIMEOUT);
- if (hwrm_err)
- goto flash_pkg_exit;
}
+ if (hwrm_err)
+ goto flash_pkg_exit;
}
if (resp->result) {
return -ENOMEM;
err = bitmap_parse_user(ubuf, count, t, adap->sge.egr_sz);
- if (err)
+ if (err) {
+ kvfree(t);
return err;
+ }
bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz);
kvfree(t);
{
int i;
u32 size = 0;
- struct dentry *de;
static struct t4_debugfs_entry t4_debugfs_files[] = {
{ "cim_la", &cim_la_fops, 0400, 0 },
}
}
- de = debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap,
- &flash_debugfs_fops, adap->params.sf_size);
+ debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap,
+ &flash_debugfs_fops, adap->params.sf_size);
debugfs_create_bool("use_backdoor", 0600,
adap->debugfs_root, &adap->use_bd);
debugfs_create_bool("trace_rss", 0600,
memcpy(dst + cur,
page_address(skb_frag_page(frag)) +
- frag->page_offset, skb_frag_size(frag));
+ skb_frag_off(frag), skb_frag_size(frag));
cur += skb_frag_size(frag);
}
} else {
lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
(u64)tx_buff->indir_dma,
(u64)num_entries);
+ dma_unmap_single(dev, tx_buff->indir_dma,
+ sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
} else {
tx_buff->num_entries = num_entries;
lpar_rc = send_subcrq(adapter, handle_array[queue_num],
union sub_crq *next;
int index;
int i, j;
- u8 *first;
restart_loop:
while (pending_scrq(adapter, scrq)) {
txbuff->data_dma[j] = 0;
}
- /* if sub_crq was sent indirectly */
- first = &txbuff->indir_arr[0].generic.first;
- if (*first == IBMVNIC_CRQ_CMD) {
- dma_unmap_single(dev, txbuff->indir_dma,
- sizeof(txbuff->indir_arr),
- DMA_TO_DEVICE);
- *first = 0;
- }
if (txbuff->last_frag) {
dev_kfree_skb_any(txbuff->skb);
static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
struct sk_buff *skb)
{
- struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
unsigned char *va;
unsigned int pull_len;
/* update all of the pointers */
skb_frag_size_sub(frag, pull_len);
- frag->page_offset += pull_len;
+ skb_frag_off_add(frag, pull_len);
skb->data_len -= pull_len;
skb->tail += pull_len;
}
skb_headlen(skb),
DMA_FROM_DEVICE);
} else {
- struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
dma_sync_single_range_for_cpu(rx_ring->dev,
IXGBE_CB(skb)->dma,
- frag->page_offset,
+ skb_frag_off(frag),
skb_frag_size(frag),
DMA_FROM_DEVICE);
}
return;
}
if (ixgbe_check_fw_error(adapter)) {
- if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
- rtnl_lock();
+ if (!test_bit(__IXGBE_DOWN, &adapter->state))
unregister_netdev(adapter->netdev);
- rtnl_unlock();
- }
ixgbe_service_event_complete(adapter);
return;
}
struct sk_buff *skb = first->skb;
struct ixgbe_tx_buffer *tx_buffer;
union ixgbe_adv_tx_desc *tx_desc;
- struct skb_frag_struct *frag;
+ skb_frag_t *frag;
dma_addr_t dma;
unsigned int data_len, size;
u32 tx_flags = first->tx_flags;
* otherwise try next time
*/
for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
- count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+ count += TXD_USE_COUNT(skb_frag_size(
+ &skb_shinfo(skb)->frags[f]));
if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
tx_ring->tx_stats.tx_busy++;
struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
- struct mlx5_wqe_eth_seg eth;
- struct mlx5_wqe_data_seg data[0];
+ union {
+ struct {
+ struct mlx5_wqe_eth_seg eth;
+ struct mlx5_wqe_data_seg data[0];
+ };
+ u8 tls_progress_params_ctx[0];
+ };
};
struct mlx5e_rx_wqe_ll {
MLX5E_SQ_STATE_IPSEC,
MLX5E_SQ_STATE_AM,
MLX5E_SQ_STATE_TLS,
+ MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
};
struct mlx5e_sq_wqe_info {
struct mlx5e_tx_wqe *wqe;
u8 ds_count;
u8 pkt_count;
- u8 max_ds_count;
- u8 complete;
u8 inline_on;
};
u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
struct ethtool_ts_info *info);
+ int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+ struct ethtool_flash *flash);
void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
struct ethtool_pauseparam *pauseparam);
int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
struct mlx5e_params *params);
void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
u16 num_channels);
-u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
void mlx5e_rx_dim_work(struct work_struct *work);
void mlx5e_tx_dim_work(struct work_struct *work);
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2019 Mellanox Technologies. */
-#include <net/devlink.h>
#include "reporter.h"
#include "lib/eq.h"
u8 state;
int err;
- if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
- return 0;
-
err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
if (err) {
netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
sq->sqn, err);
- return err;
+ goto out;
}
- if (state != MLX5_SQC_STATE_ERR) {
- netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
- return -EINVAL;
- }
+ if (state != MLX5_SQC_STATE_ERR)
+ goto out;
mlx5e_tx_disable_queue(sq->txq);
err = mlx5e_wait_for_sq_flush(sq);
if (err)
- return err;
+ goto out;
/* At this point, no new packets will arrive from the stack as TXQ is
* marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
err = mlx5e_sq_to_ready(sq, state);
if (err)
- return err;
+ goto out;
mlx5e_reset_txqsq_cc_pc(sq);
sq->stats->recover++;
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
mlx5e_activate_txqsq(sq);
return 0;
+ out:
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ return err;
}
static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter,
char *err_str,
struct mlx5e_tx_err_ctx *err_ctx)
{
- if (IS_ERR_OR_NULL(tx_reporter)) {
+ if (!tx_reporter) {
netdev_err(err_ctx->sq->channel->netdev, err_str);
return err_ctx->recover(err_ctx->sq);
}
int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
{
+ struct devlink_health_reporter *reporter;
struct mlx5_core_dev *mdev = priv->mdev;
struct devlink *devlink = priv_to_devlink(mdev);
- priv->tx_reporter =
+ reporter =
devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
MLX5_REPORTER_TX_GRACEFUL_PERIOD,
true, priv);
- if (IS_ERR(priv->tx_reporter))
+ if (IS_ERR(reporter)) {
netdev_warn(priv->netdev,
"Failed to create tx reporter, err = %ld\n",
- PTR_ERR(priv->tx_reporter));
- return IS_ERR_OR_NULL(priv->tx_reporter);
+ PTR_ERR(reporter));
+ return PTR_ERR(reporter);
+ }
+ priv->tx_reporter = reporter;
+ return 0;
}
void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
{
- if (IS_ERR_OR_NULL(priv->tx_reporter))
+ if (!priv->tx_reporter)
return;
devlink_health_reporter_destroy(priv->tx_reporter);
struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
struct mlx5e_channel *c)
{
- struct mlx5e_channel_param cparam = {};
+ struct mlx5e_channel_param *cparam;
struct dim_cq_moder icocq_moder = {};
int err;
if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
return -EINVAL;
- mlx5e_build_xsk_cparam(priv, params, xsk, &cparam);
+ cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL);
+ if (!cparam)
+ return -ENOMEM;
- err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq);
+ mlx5e_build_xsk_cparam(priv, params, xsk, cparam);
+
+ err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq);
if (unlikely(err))
- return err;
+ goto err_free_cparam;
- err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq);
+ err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq);
if (unlikely(err))
goto err_close_rx_cq;
- err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq);
+ err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq);
if (unlikely(err))
goto err_close_rq;
* is disabled and then reenabled, but the SQ continues receiving CQEs
* from the old UMEM.
*/
- err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true);
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true);
if (unlikely(err))
goto err_close_tx_cq;
- err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq);
+ err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq);
if (unlikely(err))
goto err_close_sq;
/* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
* triggered and NAPI to be called on the correct CPU.
*/
- err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq);
+ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq);
if (unlikely(err))
goto err_close_icocq;
+ kvfree(cparam);
+
spin_lock_init(&c->xskicosq_lock);
set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
err_close_rx_cq:
mlx5e_close_cq(&c->xskrq.cq);
+err_free_cparam:
+ kvfree(cparam);
+
return err;
}
{
set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
/* TX queue is created active. */
+
+ spin_lock(&c->xskicosq_lock);
mlx5e_trigger_irq(&c->xskicosq);
+ spin_unlock(&c->xskicosq_lock);
}
void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) :
mlx5e_port_speed2linkmodes(mdev, speed, !ext);
+ if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) &&
+ autoneg != AUTONEG_ENABLE) {
+ netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n",
+ __func__);
+ err = -EINVAL;
+ goto out;
+ }
+
link_modes = link_modes & eproto.cap;
if (!link_modes) {
netdev_err(priv->netdev, "%s: Not supported link mode(s) requested",
struct mlx5_core_dev *mdev = priv->mdev;
int err;
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+ return -EOPNOTSUPP;
+
if (pauseparam->autoneg)
return -EINVAL;
return 0;
}
+ int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+ struct ethtool_flash *flash)
+ {
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *dev = priv->netdev;
+ const struct firmware *fw;
+ int err;
+
+ if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
+ return -EOPNOTSUPP;
+
+ err = request_firmware_direct(&fw, flash->data, &dev->dev);
+ if (err)
+ return err;
+
+ dev_hold(dev);
+ rtnl_unlock();
+
+ err = mlx5_firmware_flash(mdev, fw, NULL);
+ release_firmware(fw);
+
+ rtnl_lock();
+ dev_put(dev);
+ return err;
+ }
+
+ static int mlx5e_flash_device(struct net_device *dev,
+ struct ethtool_flash *flash)
+ {
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_flash_device(priv, flash);
+ }
+
static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
bool is_rx_cq)
{
return priv->channels.params.pflags;
}
-#ifndef CONFIG_MLX5_EN_RXNFC
-/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS
- * otherwise this function will be defined from en_fs_ethtool.c
- */
static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- if (info->cmd != ETHTOOL_GRXRINGS)
- return -EOPNOTSUPP;
- /* ring_count is needed by ethtool -x */
- info->data = priv->channels.params.num_channels;
- return 0;
+ /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part
+ * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc,
+ * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc
+ * is compiled out via CONFIG_MLX5_EN_RXNFC=n.
+ */
+ if (info->cmd == ETHTOOL_GRXRINGS) {
+ info->data = priv->channels.params.num_channels;
+ return 0;
+ }
+
+ return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs);
+}
+
+static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ return mlx5e_ethtool_set_rxnfc(dev, cmd);
}
-#endif
const struct ethtool_ops mlx5e_ethtool_ops = {
.get_drvinfo = mlx5e_get_drvinfo,
.get_rxfh = mlx5e_get_rxfh,
.set_rxfh = mlx5e_set_rxfh,
.get_rxnfc = mlx5e_get_rxnfc,
-#ifdef CONFIG_MLX5_EN_RXNFC
.set_rxnfc = mlx5e_set_rxnfc,
-#endif
.get_tunable = mlx5e_get_tunable,
.set_tunable = mlx5e_set_tunable,
.get_pauseparam = mlx5e_get_pauseparam,
.set_wol = mlx5e_set_wol,
.get_module_info = mlx5e_get_module_info,
.get_module_eeprom = mlx5e_get_module_eeprom,
+ .flash_device = mlx5e_flash_device,
.get_priv_flags = mlx5e_get_priv_flags,
.set_priv_flags = mlx5e_set_priv_flags,
.self_test = mlx5e_self_test,
sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
sq->stop_room = MLX5E_SQ_STOP_ROOM;
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
+ if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+ set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
if (mlx5_accel_is_tls_device(c->priv->mdev)) {
void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
{
sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
- clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
netdev_tx_reset_queue(sq->txq);
netif_tx_start_queue(sq->txq);
goto err_close_channels;
}
- if (!IS_ERR_OR_NULL(priv->tx_reporter))
+ if (priv->tx_reporter)
devlink_health_reporter_state_update(priv->tx_reporter,
DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
#ifdef CONFIG_MLX5_ESWITCH
static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
struct flow_cls_offload *cls_flower,
- int flags)
+ unsigned long flags)
{
switch (cls_flower->command) {
case FLOW_CLS_REPLACE:
static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
void *cb_priv)
{
+ unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
struct mlx5e_priv *priv = cb_priv;
switch (type) {
case TC_SETUP_CLSFLOWER:
- return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS |
- MLX5E_TC_NIC_OFFLOAD);
+ return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
default:
return -EOPNOTSUPP;
}
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) {
+ if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
netdev_err(netdev,
"Active offloaded tc filters, can't turn hw_tc_offload off\n");
return -EINVAL;
netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
}
if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
- features &= ~NETIF_F_LRO;
- if (params->lro_en)
+ if (features & NETIF_F_LRO) {
netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
+ features &= ~NETIF_F_LRO;
+ }
}
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
case HWTSTAMP_FILTER_NTP_ALL:
/* Disable CQE compression */
- netdev_warn(priv->netdev, "Disabling cqe compression");
+ if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+ netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
if (err) {
netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
/* TX inline */
- params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
+ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode);
/* RSS */
mlx5e_build_rss_params(rss_params, params->num_channels);
#include <linux/mlx5/fs.h>
#include <linux/mlx5/device.h>
#include <linux/rhashtable.h>
+#include <linux/refcount.h>
+#include <linux/completion.h>
#include <net/tc_act/tc_mirred.h>
#include <net/tc_act/tc_vlan.h>
#include <net/tc_act/tc_tunnel_key.h>
struct mlx5_fc *counter;
};
-#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
enum {
- MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS,
- MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS,
- MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD,
- MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD,
- MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE),
- MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1),
- MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2),
- MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3),
- MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4),
- MLX5E_TC_FLOW_NOT_READY = BIT(MLX5E_TC_FLOW_BASE + 5),
+ MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
+ MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
+ MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
+ MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
+ MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
};
#define MLX5E_TC_MAX_SPLITS 1
* container_of(helper item, containing struct type, helper field[index])
*/
struct encap_flow_item {
+ struct mlx5e_encap_entry *e; /* attached encap instance */
struct list_head list;
int index;
};
struct rhash_head node;
struct mlx5e_priv *priv;
u64 cookie;
- u16 flags;
+ unsigned long flags;
struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
/* Flow can be associated with multiple encap IDs.
* The number of encaps is bounded by the number of supported
*/
struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5e_tc_flow *peer_flow;
+ struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
+ struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
struct list_head hairpin; /* flows sharing the same hairpin */
struct list_head peer; /* flows with peer flow */
struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */
+ refcount_t refcnt;
+ struct rcu_head rcu_head;
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
/* a node of a hash table which keeps all the hairpin entries */
struct hlist_node hairpin_hlist;
+ /* protects flows list */
+ spinlock_t flows_lock;
/* flows sharing the same hairpin */
struct list_head flows;
+ /* hpe's that were not fully initialized when dead peer update event
+ * function traversed them.
+ */
+ struct list_head dead_peer_wait_list;
u16 peer_vhca_id;
u8 prio;
struct mlx5e_hairpin *hp;
+ refcount_t refcnt;
+ struct completion res_ready;
};
struct mod_hdr_key {
/* a node of a hash table which keeps all the mod_hdr entries */
struct hlist_node mod_hdr_hlist;
+ /* protects flows list */
+ spinlock_t flows_lock;
/* flows sharing the same mod_hdr entry */
struct list_head flows;
struct mod_hdr_key key;
u32 mod_hdr_id;
+
+ refcount_t refcnt;
+ struct completion res_ready;
+ int compl_result;
};
#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
+{
+ if (!flow || !refcount_inc_not_zero(&flow->refcnt))
+ return ERR_PTR(-EINVAL);
+ return flow;
+}
+
+static void mlx5e_flow_put(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ if (refcount_dec_and_test(&flow->refcnt)) {
+ mlx5e_tc_del_flow(priv, flow);
+ kfree_rcu(flow, rcu_head);
+ }
+}
+
+static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before setting bit. */
+ smp_mb__before_atomic();
+ set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
+
+static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
+ unsigned long flag)
+{
+ /* test_and_set_bit() provides all necessary barriers */
+ return test_and_set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_test_and_set(flow, flag) \
+ __flow_flag_test_and_set(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before clearing bit. */
+ smp_mb__before_atomic();
+ clear_bit(flag, &flow->flags);
+}
+
+#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ bool ret = test_bit(flag, &flow->flags);
+
+ /* Read fields of flow structure only after checking flags. */
+ smp_mb__after_atomic();
+ return ret;
+}
+
+#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
+{
+ return flow_flag_test(flow, ESWITCH);
+}
+
+static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
+{
+ return flow_flag_test(flow, OFFLOADED);
+}
+
static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
{
return jhash(key->actions,
return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
}
+static struct mod_hdr_tbl *
+get_mod_hdr_table(struct mlx5e_priv *priv, int namespace)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr :
+ &priv->fs.tc.mod_hdr;
+}
+
+static struct mlx5e_mod_hdr_entry *
+mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key)
+{
+ struct mlx5e_mod_hdr_entry *mh, *found = NULL;
+
+ hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
+ if (!cmp_mod_hdr_info(&mh->key, key)) {
+ refcount_inc(&mh->refcnt);
+ found = mh;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
+ struct mlx5e_mod_hdr_entry *mh,
+ int namespace)
+{
+ struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace);
+
+ if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
+ return;
+ hash_del(&mh->mod_hdr_hlist);
+ mutex_unlock(&tbl->lock);
+
+ WARN_ON(!list_empty(&mh->flows));
+ if (mh->compl_result > 0)
+ mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
+
+ kfree(mh);
+}
+
+static int get_flow_name_space(struct mlx5e_tc_flow *flow)
+{
+ return mlx5e_is_eswitch_flow(flow) ?
+ MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
+}
static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5e_tc_flow_parse_attr *parse_attr)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
int num_actions, actions_size, namespace, err;
struct mlx5e_mod_hdr_entry *mh;
+ struct mod_hdr_tbl *tbl;
struct mod_hdr_key key;
- bool found = false;
u32 hash_key;
num_actions = parse_attr->num_mod_hdr_actions;
hash_key = hash_mod_hdr_info(&key);
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
- namespace = MLX5_FLOW_NAMESPACE_FDB;
- hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
- mod_hdr_hlist, hash_key) {
- if (!cmp_mod_hdr_info(&mh->key, &key)) {
- found = true;
- break;
- }
- }
- } else {
- namespace = MLX5_FLOW_NAMESPACE_KERNEL;
- hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
- mod_hdr_hlist, hash_key) {
- if (!cmp_mod_hdr_info(&mh->key, &key)) {
- found = true;
- break;
- }
- }
- }
+ namespace = get_flow_name_space(flow);
+ tbl = get_mod_hdr_table(priv, namespace);
+
+ mutex_lock(&tbl->lock);
+ mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
+ if (mh) {
+ mutex_unlock(&tbl->lock);
+ wait_for_completion(&mh->res_ready);
- if (found)
+ if (mh->compl_result < 0) {
+ err = -EREMOTEIO;
+ goto attach_header_err;
+ }
goto attach_flow;
+ }
mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
- if (!mh)
+ if (!mh) {
+ mutex_unlock(&tbl->lock);
return -ENOMEM;
+ }
mh->key.actions = (void *)mh + sizeof(*mh);
memcpy(mh->key.actions, key.actions, actions_size);
mh->key.num_actions = num_actions;
+ spin_lock_init(&mh->flows_lock);
INIT_LIST_HEAD(&mh->flows);
+ refcount_set(&mh->refcnt, 1);
+ init_completion(&mh->res_ready);
+
+ hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
+ mutex_unlock(&tbl->lock);
err = mlx5_modify_header_alloc(priv->mdev, namespace,
mh->key.num_actions,
mh->key.actions,
&mh->mod_hdr_id);
- if (err)
- goto out_err;
-
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
- hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
- else
- hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
+ if (err) {
+ mh->compl_result = err;
+ goto alloc_header_err;
+ }
+ mh->compl_result = 1;
+ complete_all(&mh->res_ready);
attach_flow:
+ flow->mh = mh;
+ spin_lock(&mh->flows_lock);
list_add(&flow->mod_hdr, &mh->flows);
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ spin_unlock(&mh->flows_lock);
+ if (mlx5e_is_eswitch_flow(flow))
flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
else
flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
return 0;
-out_err:
- kfree(mh);
+alloc_header_err:
+ complete_all(&mh->res_ready);
+attach_header_err:
+ mlx5e_mod_hdr_put(priv, mh, namespace);
return err;
}
static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
- struct list_head *next = flow->mod_hdr.next;
+ /* flow wasn't fully initialized */
+ if (!flow->mh)
+ return;
+ spin_lock(&flow->mh->flows_lock);
list_del(&flow->mod_hdr);
+ spin_unlock(&flow->mh->flows_lock);
- if (list_empty(next)) {
- struct mlx5e_mod_hdr_entry *mh;
-
- mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
-
- mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
- hash_del(&mh->mod_hdr_hlist);
- kfree(mh);
- }
+ mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow));
+ flow->mh = NULL;
}
static
hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
hairpin_hlist, hash_key) {
- if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio)
+ if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
+ refcount_inc(&hpe->refcnt);
return hpe;
+ }
}
return NULL;
}
+static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
+ struct mlx5e_hairpin_entry *hpe)
+{
+ /* no more hairpin flows for us, release the hairpin pair */
+ if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
+ return;
+ hash_del(&hpe->hairpin_hlist);
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+
+ if (!IS_ERR_OR_NULL(hpe->hp)) {
+ netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
+ dev_name(hpe->hp->pair->peer_mdev->device));
+
+ mlx5e_hairpin_destroy(hpe->hp);
+ }
+
+ WARN_ON(!list_empty(&hpe->flows));
+ kfree(hpe);
+}
+
#define UNKNOWN_MATCH_PRIO 8
static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
extack);
if (err)
return err;
+
+ mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
- if (hpe)
+ if (hpe) {
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+ wait_for_completion(&hpe->res_ready);
+
+ if (IS_ERR(hpe->hp)) {
+ err = -EREMOTEIO;
+ goto out_err;
+ }
goto attach_flow;
+ }
hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
- if (!hpe)
+ if (!hpe) {
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
return -ENOMEM;
+ }
+ spin_lock_init(&hpe->flows_lock);
INIT_LIST_HEAD(&hpe->flows);
+ INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
hpe->peer_vhca_id = peer_id;
hpe->prio = match_prio;
+ refcount_set(&hpe->refcnt, 1);
+ init_completion(&hpe->res_ready);
+
+ hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
+ hash_hairpin_info(peer_id, match_prio));
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
params.log_data_size = 15;
params.log_data_size = min_t(u8, params.log_data_size,
params.num_channels = link_speed64;
hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex);
+ hpe->hp = hp;
+ complete_all(&hpe->res_ready);
if (IS_ERR(hp)) {
err = PTR_ERR(hp);
- goto create_hairpin_err;
+ goto out_err;
}
netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
dev_name(hp->pair->peer_mdev->device),
hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
- hpe->hp = hp;
- hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
- hash_hairpin_info(peer_id, match_prio));
-
attach_flow:
if (hpe->hp->num_channels > 1) {
- flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS;
+ flow_flag_set(flow, HAIRPIN_RSS);
flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
} else {
flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
}
+
+ flow->hpe = hpe;
+ spin_lock(&hpe->flows_lock);
list_add(&flow->hairpin, &hpe->flows);
+ spin_unlock(&hpe->flows_lock);
return 0;
-create_hairpin_err:
- kfree(hpe);
+out_err:
+ mlx5e_hairpin_put(priv, hpe);
return err;
}
static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
- struct list_head *next = flow->hairpin.next;
+ /* flow wasn't fully initialized */
+ if (!flow->hpe)
+ return;
+ spin_lock(&flow->hpe->flows_lock);
list_del(&flow->hairpin);
+ spin_unlock(&flow->hpe->flows_lock);
- /* no more hairpin flows for us, release the hairpin pair */
- if (list_empty(next)) {
- struct mlx5e_hairpin_entry *hpe;
-
- hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
-
- netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
- dev_name(hpe->hp->pair->peer_mdev->device));
-
- mlx5e_hairpin_destroy(hpe->hp);
- hash_del(&hpe->hairpin_hlist);
- kfree(hpe);
- }
+ mlx5e_hairpin_put(priv, flow->hpe);
+ flow->hpe = NULL;
}
static int
.flags = FLOW_ACT_NO_APPEND,
};
struct mlx5_fc *counter = NULL;
- bool table_created = false;
int err, dest_ix = 0;
flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
flow_context->flow_tag = attr->flow_tag;
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
+ if (flow_flag_test(flow, HAIRPIN)) {
err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
- if (err) {
- goto err_add_hairpin_flow;
- }
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
+ if (err)
+ return err;
+
+ if (flow_flag_test(flow, HAIRPIN_RSS)) {
dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[dest_ix].ft = attr->hairpin_ft;
} else {
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(dev, true);
- if (IS_ERR(counter)) {
- err = PTR_ERR(counter);
- goto err_fc_create;
- }
+ if (IS_ERR(counter))
+ return PTR_ERR(counter);
+
dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dest[dest_ix].counter_id = mlx5_fc_id(counter);
dest_ix++;
flow_act.modify_id = attr->mod_hdr_id;
kfree(parse_attr->mod_hdr_actions);
if (err)
- goto err_create_mod_hdr_id;
+ return err;
}
+ mutex_lock(&priv->fs.tc.t_lock);
if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
int tc_grp_size, tc_tbl_size;
u32 max_flow_counter;
MLX5E_TC_TABLE_NUM_GROUPS,
MLX5E_TC_FT_LEVEL, 0);
if (IS_ERR(priv->fs.tc.t)) {
+ mutex_unlock(&priv->fs.tc.t_lock);
NL_SET_ERR_MSG_MOD(extack,
"Failed to create tc offload table\n");
netdev_err(priv->netdev,
"Failed to create tc offload table\n");
- err = PTR_ERR(priv->fs.tc.t);
- goto err_create_ft;
+ return PTR_ERR(priv->fs.tc.t);
}
-
- table_created = true;
}
if (attr->match_level != MLX5_MATCH_NONE)
flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
&flow_act, dest, dest_ix);
+ mutex_unlock(&priv->fs.tc.t_lock);
- if (IS_ERR(flow->rule[0])) {
- err = PTR_ERR(flow->rule[0]);
- goto err_add_rule;
- }
+ if (IS_ERR(flow->rule[0]))
+ return PTR_ERR(flow->rule[0]);
return 0;
-
-err_add_rule:
- if (table_created) {
- mlx5_destroy_flow_table(priv->fs.tc.t);
- priv->fs.tc.t = NULL;
- }
-err_create_ft:
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- mlx5e_detach_mod_hdr(priv, flow);
-err_create_mod_hdr_id:
- mlx5_fc_destroy(dev, counter);
-err_fc_create:
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
- mlx5e_hairpin_flow_del(priv, flow);
-err_add_hairpin_flow:
- return err;
}
static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
struct mlx5_fc *counter = NULL;
counter = attr->counter;
- mlx5_del_flow_rules(flow->rule[0]);
+ if (!IS_ERR_OR_NULL(flow->rule[0]))
+ mlx5_del_flow_rules(flow->rule[0]);
mlx5_fc_destroy(priv->mdev, counter);
- if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) {
+ mutex_lock(&priv->fs.tc.t_lock);
+ if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
mlx5_destroy_flow_table(priv->fs.tc.t);
priv->fs.tc.t = NULL;
}
+ mutex_unlock(&priv->fs.tc.t_lock);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
+ if (flow_flag_test(flow, HAIRPIN))
mlx5e_hairpin_flow_del(priv, flow);
}
}
}
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
return rule;
}
struct mlx5e_tc_flow *flow,
struct mlx5_esw_flow_attr *attr)
{
- flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
+ flow_flag_clear(flow, OFFLOADED);
if (attr->split_count)
mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
if (!IS_ERR(rule))
- flow->flags |= MLX5E_TC_FLOW_SLOW;
+ flow_flag_set(flow, SLOW);
return rule;
}
slow_attr->split_count = 0;
slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
- flow->flags &= ~MLX5E_TC_FLOW_SLOW;
+ flow_flag_clear(flow, SLOW);
+}
+
+/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
+ * function.
+ */
+static void unready_flow_add(struct mlx5e_tc_flow *flow,
+ struct list_head *unready_flows)
+{
+ flow_flag_set(flow, NOT_READY);
+ list_add_tail(&flow->unready, unready_flows);
+}
+
+/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
+ * function.
+ */
+static void unready_flow_del(struct mlx5e_tc_flow *flow)
+{
+ list_del(&flow->unready);
+ flow_flag_clear(flow, NOT_READY);
}
static void add_unready_flow(struct mlx5e_tc_flow *flow)
rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &rpriv->uplink_priv;
- flow->flags |= MLX5E_TC_FLOW_NOT_READY;
- list_add_tail(&flow->unready, &uplink_priv->unready_flows);
+ mutex_lock(&uplink_priv->unready_flows_lock);
+ unready_flow_add(flow, &uplink_priv->unready_flows);
+ mutex_unlock(&uplink_priv->unready_flows_lock);
}
static void remove_unready_flow(struct mlx5e_tc_flow *flow)
{
- list_del(&flow->unready);
- flow->flags &= ~MLX5E_TC_FLOW_NOT_READY;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5_eswitch *esw;
+
+ esw = flow->priv->mdev->priv.eswitch;
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &rpriv->uplink_priv;
+
+ mutex_lock(&uplink_priv->unready_flows_lock);
+ unready_flow_del(flow);
+ mutex_unlock(&uplink_priv->unready_flows_lock);
}
static int
if (attr->chain > max_chain) {
NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
- err = -EOPNOTSUPP;
- goto err_max_prio_chain;
+ return -EOPNOTSUPP;
}
if (attr->prio > max_prio) {
NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
- err = -EOPNOTSUPP;
- goto err_max_prio_chain;
+ return -EOPNOTSUPP;
}
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
extack, &encap_dev, &encap_valid);
if (err)
- goto err_attach_encap;
+ return err;
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
err = mlx5_eswitch_add_vlan_action(esw, attr);
if (err)
- goto err_add_vlan;
+ return err;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
kfree(parse_attr->mod_hdr_actions);
if (err)
- goto err_mod_hdr;
+ return err;
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(attr->counter_dev, true);
- if (IS_ERR(counter)) {
- err = PTR_ERR(counter);
- goto err_create_counter;
- }
+ if (IS_ERR(counter))
+ return PTR_ERR(counter);
attr->counter = counter;
}
flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
}
- if (IS_ERR(flow->rule[0])) {
- err = PTR_ERR(flow->rule[0]);
- goto err_add_rule;
- }
+ if (IS_ERR(flow->rule[0]))
+ return PTR_ERR(flow->rule[0]);
+ else
+ flow_flag_set(flow, OFFLOADED);
return 0;
-
-err_add_rule:
- mlx5_fc_destroy(attr->counter_dev, counter);
-err_create_counter:
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- mlx5e_detach_mod_hdr(priv, flow);
-err_mod_hdr:
- mlx5_eswitch_del_vlan_action(esw, attr);
-err_add_vlan:
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
- if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
- mlx5e_detach_encap(priv, flow, out_index);
-err_attach_encap:
-err_max_prio_chain:
- return err;
}
static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
struct mlx5_esw_flow_attr slow_attr;
int out_index;
- if (flow->flags & MLX5E_TC_FLOW_NOT_READY) {
+ if (flow_flag_test(flow, NOT_READY)) {
remove_unready_flow(flow);
kvfree(attr->parse_attr);
return;
}
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
- if (flow->flags & MLX5E_TC_FLOW_SLOW)
+ if (mlx5e_is_offloaded_flow(flow)) {
+ if (flow_flag_test(flow, SLOW))
mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
else
mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr slow_attr, *esw_attr;
+ struct encap_flow_item *efi, *tmp;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
- struct encap_flow_item *efi;
struct mlx5e_tc_flow *flow;
int err;
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(priv);
- list_for_each_entry(efi, &e->flows, list) {
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
bool all_flow_encaps_valid = true;
int i;
flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ continue;
+
esw_attr = flow->esw_attr;
spec = &esw_attr->parse_attr->spec;
}
/* Do not offload flows with unresolved neighbors */
if (!all_flow_encaps_valid)
- continue;
+ goto loop_cont;
/* update from slow path rule to encap rule */
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
err);
- continue;
+ goto loop_cont;
}
mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */
flow->rule[0] = rule;
+ /* was unset when slow path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+
+loop_cont:
+ mlx5e_flow_put(priv, flow);
}
}
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr slow_attr;
+ struct encap_flow_item *efi, *tmp;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
- struct encap_flow_item *efi;
struct mlx5e_tc_flow *flow;
int err;
- list_for_each_entry(efi, &e->flows, list) {
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ continue;
+
spec = &flow->esw_attr->parse_attr->spec;
/* update from encap rule to slow path rule */
err = PTR_ERR(rule);
mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
err);
- continue;
+ goto loop_cont;
}
mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */
flow->rule[0] = rule;
+ /* was unset when fast path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+
+loop_cont:
+ mlx5e_flow_put(priv, flow);
}
/* we know that the encap is valid */
static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
{
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ if (mlx5e_is_eswitch_flow(flow))
return flow->esw_attr->counter;
else
return flow->nic_attr->counter;
return;
list_for_each_entry(e, &nhe->encap_list, encap_list) {
- struct encap_flow_item *efi;
- if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
+ struct encap_flow_item *efi, *tmp;
+
+ if (!(e->flags & MLX5_ENCAP_ENTRY_VALID) ||
+ !mlx5e_encap_take(e))
continue;
- list_for_each_entry(efi, &e->flows, list) {
+
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
flow = container_of(efi, struct mlx5e_tc_flow,
encaps[efi->index]);
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ continue;
+
+ if (mlx5e_is_offloaded_flow(flow)) {
counter = mlx5e_tc_get_counter(flow);
lastuse = mlx5_fc_query_lastuse(counter);
if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
+ mlx5e_flow_put(netdev_priv(e->out_dev), flow);
neigh_used = true;
break;
}
}
+
+ mlx5e_flow_put(netdev_priv(e->out_dev), flow);
}
+
+ mlx5e_encap_put(netdev_priv(e->out_dev), e);
if (neigh_used)
break;
}
}
}
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index)
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
{
- struct list_head *next = flow->encaps[out_index].list.next;
+ WARN_ON(!list_empty(&e->flows));
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
- list_del(&flow->encaps[out_index].list);
- if (list_empty(next)) {
- struct mlx5e_encap_entry *e;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
- e = list_entry(next, struct mlx5e_encap_entry, flows);
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+ kfree(e->encap_header);
+ kfree(e);
+}
- if (e->flags & MLX5_ENCAP_ENTRY_VALID)
- mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- hash_del_rcu(&e->encap_hlist);
- kfree(e->encap_header);
- kfree(e);
+ if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow, int out_index)
+{
+ struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ /* flow wasn't fully initialized */
+ if (!e)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->encaps[out_index].list);
+ flow->encaps[out_index].e = NULL;
+ if (!refcount_dec_and_test(&e->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
}
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
}
static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
- if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
- !(flow->flags & MLX5E_TC_FLOW_DUP))
+ if (!flow_flag_test(flow, ESWITCH) ||
+ !flow_flag_test(flow, DUP))
return;
mutex_lock(&esw->offloads.peer_mutex);
list_del(&flow->peer);
mutex_unlock(&esw->offloads.peer_mutex);
- flow->flags &= ~MLX5E_TC_FLOW_DUP;
+ flow_flag_clear(flow, DUP);
mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
kvfree(flow->peer_flow);
static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+ if (mlx5e_is_eswitch_flow(flow)) {
mlx5e_tc_del_fdb_peer_flow(flow);
mlx5e_tc_del_fdb_flow(priv, flow);
} else {
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
struct net_device *filter_dev,
- u8 *match_level, u8 *tunnel_match_level)
+ u8 *inner_match_level, u8 *outer_match_level)
{
struct netlink_ext_ack *extack = f->common.extack;
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
struct flow_dissector *dissector = rule->match.dissector;
u16 addr_type = 0;
u8 ip_proto = 0;
+ u8 *match_level;
- *match_level = MLX5_MATCH_NONE;
+ match_level = outer_match_level;
if (dissector->used_keys &
~(BIT(FLOW_DISSECTOR_KEY_META) |
}
if (mlx5e_get_tc_tun(filter_dev)) {
- if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level))
+ if (parse_tunnel_attr(priv, spec, f, filter_dev,
+ outer_match_level))
return -EOPNOTSUPP;
- /* In decap flow, header pointers should point to the inner
+ /* At this point, header pointers should point to the inner
* headers, outer header were already set by parse_tunnel_attr
*/
+ match_level = inner_match_level;
headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
spec);
headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
struct flow_cls_offload *f,
struct net_device *filter_dev)
{
+ u8 inner_match_level, outer_match_level, non_tunnel_match_level;
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_eswitch *esw = dev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
- u8 match_level, tunnel_match_level = MLX5_MATCH_NONE;
struct mlx5_eswitch_rep *rep;
+ bool is_eswitch_flow;
int err;
- err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level);
+ inner_match_level = MLX5_MATCH_NONE;
+ outer_match_level = MLX5_MATCH_NONE;
+
+ err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level,
+ &outer_match_level);
+ non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
+ outer_match_level : inner_match_level;
- if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
+ is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
+ if (!err && is_eswitch_flow) {
rep = rpriv->rep;
if (rep->vport != MLX5_VPORT_UPLINK &&
(esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
- esw->offloads.inline_mode < match_level)) {
+ esw->offloads.inline_mode < non_tunnel_match_level)) {
NL_SET_ERR_MSG_MOD(extack,
"Flow is not offloaded due to min inline setting");
netdev_warn(priv->netdev,
"Flow is not offloaded due to min inline setting, required %d actual %d\n",
- match_level, esw->offloads.inline_mode);
+ non_tunnel_match_level, esw->offloads.inline_mode);
return -EOPNOTSUPP;
}
}
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+ if (is_eswitch_flow) {
- flow->esw_attr->match_level = match_level;
- flow->esw_attr->tunnel_match_level = tunnel_match_level;
+ flow->esw_attr->inner_match_level = inner_match_level;
+ flow->esw_attr->outer_match_level = outer_match_level;
} else {
- flow->nic_attr->match_level = match_level;
+ flow->nic_attr->match_level = non_tunnel_match_level;
}
return err;
{
u32 actions;
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ if (mlx5e_is_eswitch_flow(flow))
actions = flow->esw_attr->action;
else
actions = flow->nic_attr->action;
- if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
+ if (flow_flag_test(flow, EGRESS) &&
!((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
- (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)))
+ (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
+ (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
return false;
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
same_hw_devs(priv, netdev_priv(peer_dev))) {
parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
- flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
+ flow_flag_set(flow, HAIRPIN);
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
} else {
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_entry *e;
+ struct encap_key e_key;
+
+ hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+ encap_hlist, hash_key) {
+ e_key.ip_tun_key = &e->tun_info->key;
+ e_key.tc_tunnel = e->tunnel;
+ if (!cmp_encap_info(&e_key, key) &&
+ mlx5e_encap_take(e))
+ return e;
+ }
+
+ return NULL;
+}
+
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct net_device *mirred_dev,
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr;
const struct ip_tunnel_info *tun_info;
- struct encap_key key, e_key;
+ struct encap_key key;
struct mlx5e_encap_entry *e;
unsigned short family;
uintptr_t hash_key;
- bool found = false;
int err = 0;
parse_attr = attr->parse_attr;
hash_key = hash_encap_info(&key);
- hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
- encap_hlist, hash_key) {
- e_key.ip_tun_key = &e->tun_info->key;
- e_key.tc_tunnel = e->tunnel;
- if (!cmp_encap_info(&e_key, &key)) {
- found = true;
- break;
- }
- }
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ e = mlx5e_encap_get(priv, &key, hash_key);
/* must verify if encap is valid or not */
- if (found)
+ if (e) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ wait_for_completion(&e->res_ready);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (e->compl_result) {
+ err = -EREMOTEIO;
+ goto out_err;
+ }
goto attach_flow;
+ }
e = kzalloc(sizeof(*e), GFP_KERNEL);
- if (!e)
- return -ENOMEM;
+ if (!e) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ refcount_set(&e->refcnt, 1);
+ init_completion(&e->res_ready);
e->tun_info = tun_info;
err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
- if (err)
+ if (err) {
+ kfree(e);
+ e = NULL;
goto out_err;
+ }
INIT_LIST_HEAD(&e->flows);
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
if (family == AF_INET)
err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
else if (family == AF_INET6)
err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
- if (err)
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ complete_all(&e->res_ready);
+ if (err) {
+ e->compl_result = err;
goto out_err;
-
- hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+ }
attach_flow:
+ flow->encaps[out_index].e = e;
list_add(&flow->encaps[out_index].list, &e->flows);
flow->encaps[out_index].index = out_index;
*encap_dev = e->out_dev;
} else {
*encap_valid = false;
}
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
return err;
out_err:
- kfree(e);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ if (e)
+ mlx5e_encap_put(priv, e);
return err;
}
if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
- struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
+ struct net_device *uplink_upper;
+ rcu_read_lock();
+ uplink_upper =
+ netdev_master_upper_dev_get_rcu(uplink_dev);
if (uplink_upper &&
netif_is_lag_master(uplink_upper) &&
uplink_upper == out_dev)
out_dev = uplink_dev;
+ rcu_read_unlock();
if (is_vlan_dev(out_dev)) {
err = add_vlan_push_action(priv, attr,
return 0;
}
-static void get_flags(int flags, u16 *flow_flags)
+static void get_flags(int flags, unsigned long *flow_flags)
{
- u16 __flow_flags = 0;
+ unsigned long __flow_flags = 0;
- if (flags & MLX5E_TC_INGRESS)
- __flow_flags |= MLX5E_TC_FLOW_INGRESS;
- if (flags & MLX5E_TC_EGRESS)
- __flow_flags |= MLX5E_TC_FLOW_EGRESS;
+ if (flags & MLX5_TC_FLAG(INGRESS))
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
+ if (flags & MLX5_TC_FLAG(EGRESS))
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
- if (flags & MLX5E_TC_ESW_OFFLOAD)
- __flow_flags |= MLX5E_TC_FLOW_ESWITCH;
- if (flags & MLX5E_TC_NIC_OFFLOAD)
- __flow_flags |= MLX5E_TC_FLOW_NIC;
+ if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
+ if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
+ __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
*flow_flags = __flow_flags;
}
.automatic_shrinking = true,
};
-static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags)
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
+ unsigned long flags)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *uplink_rpriv;
- if (flags & MLX5E_TC_ESW_OFFLOAD) {
+ if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
return &uplink_rpriv->uplink_priv.tc_ht;
} else /* NIC offload */
{
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
- flow->flags & MLX5E_TC_FLOW_INGRESS;
+ flow_flag_test(flow, INGRESS);
bool act_is_encap = !!(attr->action &
MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
static int
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
- struct flow_cls_offload *f, u16 flow_flags,
+ struct flow_cls_offload *f, unsigned long flow_flags,
struct mlx5e_tc_flow_parse_attr **__parse_attr,
struct mlx5e_tc_flow **__flow)
{
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
- int err;
+ int out_index, err;
flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
flow->cookie = f->cookie;
flow->flags = flow_flags;
flow->priv = priv;
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+ INIT_LIST_HEAD(&flow->encaps[out_index].list);
+ INIT_LIST_HEAD(&flow->mod_hdr);
+ INIT_LIST_HEAD(&flow->hairpin);
+ refcount_set(&flow->refcnt, 1);
*__flow = flow;
*__parse_attr = parse_attr;
esw_attr->parse_attr = parse_attr;
esw_attr->chain = f->common.chain_index;
- esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
+ esw_attr->prio = f->common.prio;
esw_attr->in_rep = in_rep;
esw_attr->in_mdev = in_mdev;
static struct mlx5e_tc_flow *
__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct flow_cls_offload *f,
- u16 flow_flags,
+ unsigned long flow_flags,
struct net_device *filter_dev,
struct mlx5_eswitch_rep *in_rep,
struct mlx5_core_dev *in_mdev)
struct mlx5e_tc_flow *flow;
int attr_size, err;
- flow_flags |= MLX5E_TC_FLOW_ESWITCH;
+ flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
attr_size = sizeof(struct mlx5_esw_flow_attr);
err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
&parse_attr, &flow);
return flow;
err_free:
- kfree(flow);
- kvfree(parse_attr);
+ mlx5e_flow_put(priv, flow);
out:
return ERR_PTR(err);
}
static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
struct mlx5e_tc_flow *flow,
- u16 flow_flags)
+ unsigned long flow_flags)
{
struct mlx5e_priv *priv = flow->priv, *peer_priv;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
}
flow->peer_flow = peer_flow;
- flow->flags |= MLX5E_TC_FLOW_DUP;
+ flow_flag_set(flow, DUP);
mutex_lock(&esw->offloads.peer_mutex);
list_add_tail(&flow->peer, &esw->offloads.peer_flows);
mutex_unlock(&esw->offloads.peer_mutex);
static int
mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct flow_cls_offload *f,
- u16 flow_flags,
+ unsigned long flow_flags,
struct net_device *filter_dev,
struct mlx5e_tc_flow **__flow)
{
static int
mlx5e_add_nic_flow(struct mlx5e_priv *priv,
struct flow_cls_offload *f,
- u16 flow_flags,
+ unsigned long flow_flags,
struct net_device *filter_dev,
struct mlx5e_tc_flow **__flow)
{
if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
return -EOPNOTSUPP;
- flow_flags |= MLX5E_TC_FLOW_NIC;
+ flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
attr_size = sizeof(struct mlx5_nic_flow_attr);
err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
&parse_attr, &flow);
if (err)
goto err_free;
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+ flow_flag_set(flow, OFFLOADED);
kvfree(parse_attr);
*__flow = flow;
return 0;
err_free:
- kfree(flow);
+ mlx5e_flow_put(priv, flow);
kvfree(parse_attr);
out:
return err;
static int
mlx5e_tc_add_flow(struct mlx5e_priv *priv,
struct flow_cls_offload *f,
- int flags,
+ unsigned long flags,
struct net_device *filter_dev,
struct mlx5e_tc_flow **flow)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- u16 flow_flags;
+ unsigned long flow_flags;
int err;
get_flags(flags, &flow_flags);
}
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
- struct flow_cls_offload *f, int flags)
+ struct flow_cls_offload *f, unsigned long flags)
{
struct netlink_ext_ack *extack = f->common.extack;
struct rhashtable *tc_ht = get_tc_ht(priv, flags);
struct mlx5e_tc_flow *flow;
int err = 0;
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+ rcu_read_lock();
+ flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
+ rcu_read_unlock();
if (flow) {
NL_SET_ERR_MSG_MOD(extack,
"flow cookie already exists, ignoring");
if (err)
goto out;
- err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
+ err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
if (err)
goto err_free;
return 0;
err_free:
- mlx5e_tc_del_flow(priv, flow);
- kfree(flow);
+ mlx5e_flow_put(priv, flow);
out:
return err;
}
-#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS)
-#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS)
-
static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
{
- if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK))
- return true;
+ bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
+ bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
- return false;
+ return flow_flag_test(flow, INGRESS) == dir_ingress &&
+ flow_flag_test(flow, EGRESS) == dir_egress;
}
int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
- struct flow_cls_offload *f, int flags)
+ struct flow_cls_offload *f, unsigned long flags)
{
struct rhashtable *tc_ht = get_tc_ht(priv, flags);
struct mlx5e_tc_flow *flow;
+ int err;
+ rcu_read_lock();
flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
- if (!flow || !same_flow_direction(flow, flags))
- return -EINVAL;
+ if (!flow || !same_flow_direction(flow, flags)) {
+ err = -EINVAL;
+ goto errout;
+ }
+ /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
+ * set.
+ */
+ if (flow_flag_test_and_set(flow, DELETED)) {
+ err = -EINVAL;
+ goto errout;
+ }
rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
+ rcu_read_unlock();
- mlx5e_tc_del_flow(priv, flow);
-
- kfree(flow);
+ mlx5e_flow_put(priv, flow);
return 0;
+
+errout:
+ rcu_read_unlock();
+ return err;
}
int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
- struct flow_cls_offload *f, int flags)
+ struct flow_cls_offload *f, unsigned long flags)
{
struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
struct rhashtable *tc_ht = get_tc_ht(priv, flags);
u64 lastuse = 0;
u64 packets = 0;
u64 bytes = 0;
+ int err = 0;
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
- if (!flow || !same_flow_direction(flow, flags))
- return -EINVAL;
+ rcu_read_lock();
+ flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
+ tc_ht_params));
+ rcu_read_unlock();
+ if (IS_ERR(flow))
+ return PTR_ERR(flow);
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
+ if (!same_flow_direction(flow, flags)) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (mlx5e_is_offloaded_flow(flow)) {
counter = mlx5e_tc_get_counter(flow);
if (!counter)
- return 0;
+ goto errout;
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
}
if (!peer_esw)
goto out;
- if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
- (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
+ if (flow_flag_test(flow, DUP) &&
+ flow_flag_test(flow->peer_flow, OFFLOADED)) {
u64 bytes2;
u64 packets2;
u64 lastuse2;
mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
out:
flow_stats_update(&f->stats, bytes, packets, lastuse);
+errout:
+ mlx5e_flow_put(priv, flow);
+ return err;
+}
+
+static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch *esw;
+ u16 vport_num;
+ u32 rate_mbps;
+ int err;
+
+ esw = priv->mdev->priv.eswitch;
+ /* rate is given in bytes/sec.
+ * First convert to bits/sec and then round to the nearest mbit/secs.
+ * mbit means million bits.
+ * Moreover, if rate is non zero we choose to configure to a minimum of
+ * 1 mbit/sec.
+ */
+ rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
+ vport_num = rpriv->rep->vport;
+
+ err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
+
+ return err;
+}
+
+static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ const struct flow_action_entry *act;
+ int err;
+ int i;
+
+ if (!flow_action_has_entries(flow_action)) {
+ NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
+ return -EINVAL;
+ }
+
+ if (!flow_offload_has_one_action(flow_action)) {
+ NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
+ return -EOPNOTSUPP;
+ }
+
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_POLICE:
+ err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
+ if (err)
+ return err;
+
+ rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
+ return -EOPNOTSUPP;
+ }
+ }
return 0;
}
+int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ struct netlink_ext_ack *extack = ma->common.extack;
+ int prio = TC_H_MAJ(ma->common.prio) >> 16;
+
+ if (prio != 1) {
+ NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
+ return -EINVAL;
+ }
+
+ return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
+}
+
+int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ struct netlink_ext_ack *extack = ma->common.extack;
+
+ return apply_police_params(priv, 0, extack);
+}
+
+void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct rtnl_link_stats64 cur_stats;
+ u64 dbytes;
+ u64 dpkts;
+
+ cur_stats = priv->stats.vf_vport;
+ dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
+ dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
+ rpriv->prev_vf_vport_stats = cur_stats;
+ flow_stats_update(&ma->stats, dpkts, dbytes, jiffies);
+}
+
static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
struct mlx5e_priv *peer_priv)
{
struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
- struct mlx5e_hairpin_entry *hpe;
+ struct mlx5e_hairpin_entry *hpe, *tmp;
+ LIST_HEAD(init_wait_list);
u16 peer_vhca_id;
int bkt;
peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
- hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
- if (hpe->peer_vhca_id == peer_vhca_id)
+ mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
+ hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
+ if (refcount_inc_not_zero(&hpe->refcnt))
+ list_add(&hpe->dead_peer_wait_list, &init_wait_list);
+ mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+
+ list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
+ wait_for_completion(&hpe->res_ready);
+ if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
hpe->hp->pair->peer_gone = true;
+
+ mlx5e_hairpin_put(priv, hpe);
}
}
struct mlx5e_tc_table *tc = &priv->fs.tc;
int err;
- hash_init(tc->mod_hdr_tbl);
+ mutex_init(&tc->t_lock);
+ mutex_init(&tc->mod_hdr.lock);
+ hash_init(tc->mod_hdr.hlist);
+ mutex_init(&tc->hairpin_tbl_lock);
hash_init(tc->hairpin_tbl);
err = rhashtable_init(&tc->ht, &tc_ht_params);
if (tc->netdevice_nb.notifier_call)
unregister_netdevice_notifier(&tc->netdevice_nb);
+ mutex_destroy(&tc->mod_hdr.lock);
+ mutex_destroy(&tc->hairpin_tbl_lock);
+
rhashtable_destroy(&tc->ht);
if (!IS_ERR_OR_NULL(tc->t)) {
mlx5_destroy_flow_table(tc->t);
tc->t = NULL;
}
+ mutex_destroy(&tc->t_lock);
}
int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
}
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags)
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
{
struct rhashtable *tc_ht = get_tc_ht(priv, flags);
reoffload_flows_work);
struct mlx5e_tc_flow *flow, *tmp;
- rtnl_lock();
+ mutex_lock(&rpriv->unready_flows_lock);
list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
- remove_unready_flow(flow);
+ unready_flow_del(flow);
}
- rtnl_unlock();
+ mutex_unlock(&rpriv->unready_flows_lock);
}
#include <linux/if_ether.h>
#include <linux/if_link.h>
+#include <linux/atomic.h>
#include <net/devlink.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/eswitch.h>
bool trusted;
};
+/* Vport context events */
+enum mlx5_eswitch_vport_event {
+ MLX5_VPORT_UC_ADDR_CHANGE = BIT(0),
+ MLX5_VPORT_MC_ADDR_CHANGE = BIT(1),
+ MLX5_VPORT_PROMISC_CHANGE = BIT(3),
+};
+
struct mlx5_vport {
struct mlx5_core_dev *dev;
int vport;
} qos;
bool enabled;
- u16 enabled_events;
+ enum mlx5_eswitch_vport_event enabled_events;
};
enum offloads_fdb_flags {
struct mlx5_eswitch_rep *vport_reps;
struct list_head peer_flows;
struct mutex peer_mutex;
+ struct mutex encap_tbl_lock; /* protects encap_tbl */
DECLARE_HASHTABLE(encap_tbl, 8);
- DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+ struct mod_hdr_tbl mod_hdr;
DECLARE_HASHTABLE(termtbl_tbl, 8);
struct mutex termtbl_mutex; /* protects termtbl hash */
const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
u8 inline_mode;
- u64 num_flows;
+ atomic64_t num_flows;
enum devlink_eswitch_encap_mode encap;
};
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
+ /* legacy data structures */
struct mlx5_eswitch_fdb fdb_table;
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
+ struct esw_mc_addr mc_promisc;
+ /* end of legacy */
struct workqueue_struct *work_queue;
struct mlx5_vport *vports;
u32 flags;
* and async SRIOV admin state changes
*/
struct mutex state_lock;
- struct esw_mc_addr mc_promisc;
struct {
bool enabled;
struct mlx5_esw_functions esw_funcs;
};
-void esw_offloads_cleanup(struct mlx5_eswitch *esw);
-int esw_offloads_init(struct mlx5_eswitch *esw);
+void esw_offloads_disable(struct mlx5_eswitch *esw);
+int esw_offloads_enable(struct mlx5_eswitch *esw);
void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
+int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
+ u32 rate_mbps);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
struct mlx5_termtbl_handle *termtbl;
} dests[MLX5_MAX_FLOW_FWD_VPORTS];
u32 mod_hdr_id;
- u8 match_level;
- u8 tunnel_match_level;
+ u8 inner_match_level;
+ u8 outer_match_level;
struct mlx5_fc *counter;
u32 chain;
u16 prio;
(vport) = &(esw)->vports[i], \
(i) < (esw)->total_vports; (i)++)
+#define mlx5_esw_for_all_vports_reverse(esw, i, vport) \
+ for ((i) = (esw)->total_vports - 1; \
+ (vport) = &(esw)->vports[i], \
+ (i) >= MLX5_VPORT_PF; (i)--)
+
#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \
for ((i) = MLX5_VPORT_FIRST_VF; \
(vport) = &(esw)->vports[(i)], \
void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
+void
+mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
+ enum mlx5_eswitch_vport_event enabled_events);
+void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
- if (attr->tunnel_match_level != MLX5_MATCH_NONE)
- spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
- if (attr->match_level != MLX5_MATCH_NONE)
- spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
- } else if (attr->match_level != MLX5_MATCH_NONE) {
+ if (attr->outer_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
- }
+ if (attr->inner_match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_id = attr->mod_hdr_id;
if (IS_ERR(rule))
goto err_add_rule;
else
- esw->offloads.num_flows++;
+ atomic64_inc(&esw->offloads.num_flows);
return rule;
mlx5_eswitch_set_rule_source_port(esw, spec, attr);
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
- if (attr->match_level != MLX5_MATCH_NONE)
+ if (attr->outer_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
if (IS_ERR(rule))
goto add_err;
- esw->offloads.num_flows++;
+ atomic64_inc(&esw->offloads.num_flows);
return rule;
add_err:
mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl);
}
- esw->offloads.num_flows--;
+ atomic64_dec(&esw->offloads.num_flows);
if (fwd_rule) {
esw_put_prio_table(esw, attr->chain, attr->prio, 1);
fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
!attr->dest_chain);
+ mutex_lock(&esw->state_lock);
+
err = esw_add_vlan_action_check(attr, push, pop, fwd);
if (err)
- return err;
+ goto unlock;
attr->vlan_handled = false;
attr->vlan_handled = true;
}
- return 0;
+ goto unlock;
}
if (!push && !pop)
- return 0;
+ goto unlock;
if (!(offloads->vlan_push_pop_refcount)) {
/* it's the 1st vlan rule, apply global vlan pop policy */
out:
if (!err)
attr->vlan_handled = true;
+unlock:
+ mutex_unlock(&esw->state_lock);
return err;
}
pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+ mutex_lock(&esw->state_lock);
+
vport = esw_vlan_action_get_vport(attr, push, pop);
if (!push && !pop && fwd) {
if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
vport->vlan_refcount--;
- return 0;
+ goto out;
}
if (push) {
skip_unset_push:
offloads->vlan_push_pop_refcount--;
if (offloads->vlan_push_pop_refcount)
- return 0;
+ goto out;
/* no more vlan rules, stop global vlan pop policy */
err = esw_set_global_vlan_pop(esw, 0);
out:
+ mutex_unlock(&esw->state_lock);
return err;
}
mlx5_del_flow_rules(rule);
}
-static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw)
+static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
{
u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
u8 fdb_to_vport_reg_c_id;
int err;
- err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
- out, sizeof(out));
- if (err)
- return err;
-
- fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
- esw_vport_context.fdb_to_vport_reg_c_id);
-
- fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
- MLX5_SET(modify_esw_vport_context_in, in,
- esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
-
- MLX5_SET(modify_esw_vport_context_in, in,
- field_select.fdb_to_vport_reg_c_id, 1);
-
- return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
- in, sizeof(in));
-}
-
-static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
-{
- u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
- u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
- u8 fdb_to_vport_reg_c_id;
- int err;
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return 0;
err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
out, sizeof(out));
fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
esw_vport_context.fdb_to_vport_reg_c_id);
- fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
+ if (enable)
+ fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
+ else
+ fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
MLX5_SET(modify_esw_vport_context_in, in,
esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
int esw_offloads_init_reps(struct mlx5_eswitch *esw)
{
int total_vports = esw->total_vports;
- struct mlx5_core_dev *dev = esw->dev;
struct mlx5_eswitch_rep *rep;
- u8 hw_id[ETH_ALEN], rep_type;
int vport_index;
+ u8 rep_type;
esw->offloads.vport_reps = kcalloc(total_vports,
sizeof(struct mlx5_eswitch_rep),
if (!esw->offloads.vport_reps)
return -ENOMEM;
- mlx5_query_mac_address(dev, hw_id);
-
mlx5_esw_for_all_reps(esw, vport_index, rep) {
rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
rep->vport_index = vport_index;
- ether_addr_copy(rep->hw_id, hw_id);
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
atomic_set(&rep->rep_data[rep_type].state,
return NOTIFY_OK;
}
-int esw_offloads_init(struct mlx5_eswitch *esw)
+int esw_offloads_enable(struct mlx5_eswitch *esw)
{
int err;
if (err)
return err;
- if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- err = mlx5_eswitch_enable_passing_vport_metadata(esw);
- if (err)
- goto err_vport_metadata;
- }
+ err = esw_set_passing_vport_metadata(esw, true);
+ if (err)
+ goto err_vport_metadata;
+
+ mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
err = esw_offloads_load_all_reps(esw);
if (err)
return 0;
err_reps:
- if (mlx5_eswitch_vport_match_metadata_enabled(esw))
- mlx5_eswitch_disable_passing_vport_metadata(esw);
+ mlx5_eswitch_disable_pf_vf_vports(esw);
+ esw_set_passing_vport_metadata(esw, false);
err_vport_metadata:
esw_offloads_steering_cleanup(esw);
return err;
return err;
}
-void esw_offloads_cleanup(struct mlx5_eswitch *esw)
+void esw_offloads_disable(struct mlx5_eswitch *esw)
{
mlx5_rdma_disable_roce(esw->dev);
esw_offloads_devcom_cleanup(esw);
esw_offloads_unload_all_reps(esw);
- if (mlx5_eswitch_vport_match_metadata_enabled(esw))
- mlx5_eswitch_disable_passing_vport_metadata(esw);
+ mlx5_eswitch_disable_pf_vf_vports(esw);
+ esw_set_passing_vport_metadata(esw, false);
esw_offloads_steering_cleanup(esw);
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
}
break;
}
- if (esw->offloads.num_flows > 0) {
+ if (atomic64_read(&esw->offloads.num_flows) > 0) {
NL_SET_ERR_MSG_MOD(extack,
"Can't set inline mode when flows are configured");
return -EOPNOTSUPP;
if (esw->offloads.encap == encap)
return 0;
- if (esw->offloads.num_flows > 0) {
+ if (atomic64_read(&esw->offloads.num_flows) > 0) {
NL_SET_ERR_MSG_MOD(extack,
"Can't set encapsulation when flows are configured");
return -EOPNOTSUPP;
int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
struct mlxsw_sp_port *mlxsw_sp_port,
- bool ingress)
+ bool ingress,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_acl_block_binding *binding;
int err;
if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress)))
return -EEXIST;
+ if (!ingress && block->egress_blocker_rule_count) {
+ NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules");
+ return -EOPNOTSUPP;
+ }
+
binding = kzalloc(sizeof(*binding), GFP_KERNEL);
if (!binding)
return -ENOMEM;
void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
unsigned int priority)
{
- rulei->priority = priority >> 16;
+ rulei->priority = priority;
}
void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei,
{
struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+ struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
int err;
err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei);
* one, to be directly bound to device. The rest of the
* rulesets are bound by "Goto action set".
*/
- err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
- ruleset->ht_key.block);
+ err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block);
if (err)
goto err_ruleset_block_bind;
}
list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
- ruleset->ht_key.block->rule_count++;
+ block->rule_count++;
+ block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker;
return 0;
err_ruleset_block_bind:
{
struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+ struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
+ block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker;
ruleset->ht_key.block->rule_count--;
list_del(&rule->list);
if (!ruleset->ht_key.chain_index &&
{
u8 *va;
struct vlan_ethhdr *veh;
- struct skb_frag_struct *frag;
+ skb_frag_t *frag;
__wsum vsum;
va = addr;
skb->len -= VLAN_HLEN;
skb->data_len -= VLAN_HLEN;
frag = skb_shinfo(skb)->frags;
- frag->page_offset += VLAN_HLEN;
- skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN);
+ skb_frag_off_add(frag, VLAN_HLEN);
+ skb_frag_size_sub(frag, VLAN_HLEN);
}
}
{
struct myri10ge_priv *mgp = ss->mgp;
struct sk_buff *skb;
- struct skb_frag_struct *rx_frags;
+ skb_frag_t *rx_frags;
struct myri10ge_rx_buf *rx;
int i, idx, remainder, bytes;
struct pci_dev *pdev = mgp->pdev;
return 0;
}
rx_frags = skb_shinfo(skb)->frags;
- /* Fill skb_frag_struct(s) with data from our receive */
+ /* Fill skb_frag_t(s) with data from our receive */
for (i = 0, remainder = len; remainder > 0; i++) {
myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes);
skb_fill_page_desc(skb, i, rx->info[idx].page,
}
/* remove padding */
- rx_frags[0].page_offset += MXGEFW_PAD;
- rx_frags[0].size -= MXGEFW_PAD;
+ skb_frag_off_add(&rx_frags[0], MXGEFW_PAD);
+ skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD);
len -= MXGEFW_PAD;
skb->len = len;
struct myri10ge_slice_state *ss;
struct mcp_kreq_ether_send *req;
struct myri10ge_tx_buf *tx;
- struct skb_frag_struct *frag;
+ skb_frag_t *frag;
struct netdev_queue *netdev_queue;
dma_addr_t bus;
u32 low;
static int myri10ge_change_mtu(struct net_device *dev, int new_mtu)
{
struct myri10ge_priv *mgp = netdev_priv(dev);
- int error = 0;
netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu);
if (mgp->running) {
} else
dev->mtu = new_mtu;
- return error;
+ return 0;
}
/*
* setup (if available). */
status = myri10ge_request_irq(mgp);
if (status != 0)
- goto abort_with_firmware;
+ goto abort_with_slices;
myri10ge_free_irq(mgp);
/* Save configuration space to be restored if the
skb_frag_t *frag = skb_shinfo(skb)->frags + i;
slots_used += fill_pg_buf(skb_frag_page(frag),
- frag->page_offset,
+ skb_frag_off(frag),
skb_frag_size(frag), &pb[slots_used]);
}
return slots_used;
for (i = 0; i < frags; i++) {
skb_frag_t *frag = skb_shinfo(skb)->frags + i;
unsigned long size = skb_frag_size(frag);
- unsigned long offset = frag->page_offset;
+ unsigned long offset = skb_frag_off(frag);
/* Skip unused frames from start of page */
offset &= ~PAGE_MASK;
struct rtnl_link_stats64 *t)
{
struct net_device_context *ndev_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+ struct netvsc_device *nvdev;
struct netvsc_vf_pcpu_stats vf_tot;
int i;
+ rcu_read_lock();
+
+ nvdev = rcu_dereference(ndev_ctx->nvdev);
if (!nvdev)
- return;
+ goto out;
netdev_stats_to_stats64(t, &net->stats);
t->rx_packets += packets;
t->multicast += multicast;
}
+ out:
+ rcu_read_unlock();
}
static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
#include <linux/debugfs.h>
#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/inet.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
#include <net/devlink.h>
+#include <net/ip.h>
+#include <uapi/linux/devlink.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/udp.h>
#include "netdevsim.h"
static struct dentry *nsim_dev_ddir;
+#define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32)
+
+static ssize_t nsim_dev_take_snapshot_write(struct file *file,
+ const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct nsim_dev *nsim_dev = file->private_data;
+ void *dummy_data;
+ int err;
+ u32 id;
+
+ dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL);
+ if (!dummy_data)
+ return -ENOMEM;
+
+ get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
+
+ id = devlink_region_shapshot_id_get(priv_to_devlink(nsim_dev));
+ err = devlink_region_snapshot_create(nsim_dev->dummy_region,
+ dummy_data, id, kfree);
+ if (err) {
+ pr_err("Failed to create region snapshot\n");
+ kfree(dummy_data);
+ return err;
+ }
+
+ return count;
+}
+
+static const struct file_operations nsim_dev_take_snapshot_fops = {
+ .open = simple_open,
+ .write = nsim_dev_take_snapshot_write,
+ .llseek = generic_file_llseek,
+};
+
static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
{
char dev_ddir_name[16];
return PTR_ERR_OR_ZERO(nsim_dev->ports_ddir) ?: -EINVAL;
debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir,
&nsim_dev->fw_update_status);
+ debugfs_create_u32("max_macs", 0600, nsim_dev->ddir,
+ &nsim_dev->max_macs);
+ debugfs_create_bool("test1", 0600, nsim_dev->ddir,
+ &nsim_dev->test1);
+ debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev,
+ &nsim_dev_take_snapshot_fops);
return 0;
}
debugfs_remove_recursive(nsim_dev_port->ddir);
}
+ static struct net *nsim_devlink_net(struct devlink *devlink)
+ {
+ return &init_net;
+ }
+
static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv)
{
- struct nsim_dev *nsim_dev = priv;
+ struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data,
- NSIM_RESOURCE_IPV4_FIB, false);
+ return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false);
}
static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv)
{
- struct nsim_dev *nsim_dev = priv;
+ struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data,
- NSIM_RESOURCE_IPV4_FIB_RULES, false);
+ return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false);
}
static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv)
{
- struct nsim_dev *nsim_dev = priv;
+ struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data,
- NSIM_RESOURCE_IPV6_FIB, false);
+ return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false);
}
static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv)
{
- struct nsim_dev *nsim_dev = priv;
+ struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data,
- NSIM_RESOURCE_IPV6_FIB_RULES, false);
+ return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false);
}
static int nsim_dev_resources_register(struct devlink *devlink)
{
- struct nsim_dev *nsim_dev = devlink_priv(devlink);
struct devlink_resource_size_params params = {
.size_max = (u64)-1,
.size_granularity = 1,
.unit = DEVLINK_RESOURCE_UNIT_ENTRY
};
+ struct net *net = nsim_devlink_net(devlink);
int err;
u64 n;
goto out;
}
- n = nsim_fib_get_val(nsim_dev->fib_data,
- NSIM_RESOURCE_IPV4_FIB, true);
+ n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true);
err = devlink_resource_register(devlink, "fib", n,
NSIM_RESOURCE_IPV4_FIB,
NSIM_RESOURCE_IPV4, ¶ms);
return err;
}
- n = nsim_fib_get_val(nsim_dev->fib_data,
- NSIM_RESOURCE_IPV4_FIB_RULES, true);
+ n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true);
err = devlink_resource_register(devlink, "fib-rules", n,
NSIM_RESOURCE_IPV4_FIB_RULES,
NSIM_RESOURCE_IPV4, ¶ms);
goto out;
}
- n = nsim_fib_get_val(nsim_dev->fib_data,
- NSIM_RESOURCE_IPV6_FIB, true);
+ n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true);
err = devlink_resource_register(devlink, "fib", n,
NSIM_RESOURCE_IPV6_FIB,
NSIM_RESOURCE_IPV6, ¶ms);
return err;
}
- n = nsim_fib_get_val(nsim_dev->fib_data,
- NSIM_RESOURCE_IPV6_FIB_RULES, true);
+ n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true);
err = devlink_resource_register(devlink, "fib-rules", n,
NSIM_RESOURCE_IPV6_FIB_RULES,
NSIM_RESOURCE_IPV6, ¶ms);
devlink_resource_occ_get_register(devlink,
NSIM_RESOURCE_IPV4_FIB,
nsim_dev_ipv4_fib_resource_occ_get,
- nsim_dev);
+ net);
devlink_resource_occ_get_register(devlink,
NSIM_RESOURCE_IPV4_FIB_RULES,
nsim_dev_ipv4_fib_rules_res_occ_get,
- nsim_dev);
+ net);
devlink_resource_occ_get_register(devlink,
NSIM_RESOURCE_IPV6_FIB,
nsim_dev_ipv6_fib_resource_occ_get,
- nsim_dev);
+ net);
devlink_resource_occ_get_register(devlink,
NSIM_RESOURCE_IPV6_FIB_RULES,
nsim_dev_ipv6_fib_rules_res_occ_get,
- nsim_dev);
+ net);
out:
return err;
}
+enum nsim_devlink_param_id {
+ NSIM_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ NSIM_DEVLINK_PARAM_ID_TEST1,
+};
+
+static const struct devlink_param nsim_devlink_params[] = {
+ DEVLINK_PARAM_GENERIC(MAX_MACS,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, NULL),
+ DEVLINK_PARAM_DRIVER(NSIM_DEVLINK_PARAM_ID_TEST1,
+ "test1", DEVLINK_PARAM_TYPE_BOOL,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, NULL),
+};
+
+static void nsim_devlink_set_params_init_values(struct nsim_dev *nsim_dev,
+ struct devlink *devlink)
+{
+ union devlink_param_value value;
+
+ value.vu32 = nsim_dev->max_macs;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ value);
+ value.vbool = nsim_dev->test1;
+ devlink_param_driverinit_value_set(devlink,
+ NSIM_DEVLINK_PARAM_ID_TEST1,
+ value);
+}
+
+static void nsim_devlink_param_load_driverinit_values(struct devlink *devlink)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ union devlink_param_value saved_value;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ &saved_value);
+ if (!err)
+ nsim_dev->max_macs = saved_value.vu32;
+ err = devlink_param_driverinit_value_get(devlink,
+ NSIM_DEVLINK_PARAM_ID_TEST1,
+ &saved_value);
+ if (!err)
+ nsim_dev->test1 = saved_value.vbool;
+}
+
+#define NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX 16
+
+static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev,
+ struct devlink *devlink)
+{
+ nsim_dev->dummy_region =
+ devlink_region_create(devlink, "dummy",
+ NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX,
+ NSIM_DEV_DUMMY_REGION_SIZE);
+ return PTR_ERR_OR_ZERO(nsim_dev->dummy_region);
+}
+
+static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev)
+{
+ devlink_region_destroy(nsim_dev->dummy_region);
+}
+
+struct nsim_trap_item {
+ void *trap_ctx;
+ enum devlink_trap_action action;
+};
+
+struct nsim_trap_data {
+ struct delayed_work trap_report_dw;
+ struct nsim_trap_item *trap_items_arr;
+ struct nsim_dev *nsim_dev;
+ spinlock_t trap_lock; /* Protects trap_items_arr */
+};
+
+/* All driver-specific traps must be documented in
+ * Documentation/networking/devlink-trap-netdevsim.rst
+ */
+enum {
+ NSIM_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX,
+ NSIM_TRAP_ID_FID_MISS,
+};
+
+#define NSIM_TRAP_NAME_FID_MISS "fid_miss"
+
+#define NSIM_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
+
+#define NSIM_TRAP_DROP(_id, _group_id) \
+ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \
+ DEVLINK_TRAP_GROUP_GENERIC(_group_id), \
+ NSIM_TRAP_METADATA)
+#define NSIM_TRAP_EXCEPTION(_id, _group_id) \
+ DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \
+ DEVLINK_TRAP_GROUP_GENERIC(_group_id), \
+ NSIM_TRAP_METADATA)
+#define NSIM_TRAP_DRIVER_EXCEPTION(_id, _group_id) \
+ DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, NSIM_TRAP_ID_##_id, \
+ NSIM_TRAP_NAME_##_id, \
+ DEVLINK_TRAP_GROUP_GENERIC(_group_id), \
+ NSIM_TRAP_METADATA)
+
+static const struct devlink_trap nsim_traps_arr[] = {
+ NSIM_TRAP_DROP(SMAC_MC, L2_DROPS),
+ NSIM_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS),
+ NSIM_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS),
+ NSIM_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS),
+ NSIM_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS),
+ NSIM_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS),
+ NSIM_TRAP_DRIVER_EXCEPTION(FID_MISS, L2_DROPS),
+ NSIM_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS),
+ NSIM_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS),
+ NSIM_TRAP_DROP(TAIL_DROP, BUFFER_DROPS),
+};
+
+#define NSIM_TRAP_L4_DATA_LEN 100
+
+static struct sk_buff *nsim_dev_trap_skb_build(void)
+{
+ int tot_len, data_len = NSIM_TRAP_L4_DATA_LEN;
+ struct sk_buff *skb;
+ struct udphdr *udph;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+ tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len;
+
+ eth = skb_put(skb, sizeof(struct ethhdr));
+ eth_random_addr(eth->h_dest);
+ eth_random_addr(eth->h_source);
+ eth->h_proto = htons(ETH_P_IP);
+ skb->protocol = htons(ETH_P_IP);
+
+ iph = skb_put(skb, sizeof(struct iphdr));
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = in_aton("192.0.2.1");
+ iph->daddr = in_aton("198.51.100.1");
+ iph->version = 0x4;
+ iph->frag_off = 0;
+ iph->ihl = 0x5;
+ iph->tot_len = htons(tot_len);
+ iph->ttl = 100;
+ ip_send_check(iph);
+
+ udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len);
+ get_random_bytes(&udph->source, sizeof(u16));
+ get_random_bytes(&udph->dest, sizeof(u16));
+ udph->len = htons(sizeof(struct udphdr) + data_len);
+
+ return skb;
+}
+
+static void nsim_dev_trap_report(struct nsim_dev_port *nsim_dev_port)
+{
+ struct nsim_dev *nsim_dev = nsim_dev_port->ns->nsim_dev;
+ struct devlink *devlink = priv_to_devlink(nsim_dev);
+ struct nsim_trap_data *nsim_trap_data;
+ int i;
+
+ nsim_trap_data = nsim_dev->trap_data;
+
+ spin_lock(&nsim_trap_data->trap_lock);
+ for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) {
+ struct nsim_trap_item *nsim_trap_item;
+ struct sk_buff *skb;
+
+ nsim_trap_item = &nsim_trap_data->trap_items_arr[i];
+ if (nsim_trap_item->action == DEVLINK_TRAP_ACTION_DROP)
+ continue;
+
+ skb = nsim_dev_trap_skb_build();
+ if (!skb)
+ continue;
+ skb->dev = nsim_dev_port->ns->netdev;
+
+ /* Trapped packets are usually passed to devlink in softIRQ,
+ * but in this case they are generated in a workqueue. Disable
+ * softIRQs to prevent lockdep from complaining about
+ * "incosistent lock state".
+ */
+ local_bh_disable();
+ devlink_trap_report(devlink, skb, nsim_trap_item->trap_ctx,
+ &nsim_dev_port->devlink_port);
+ local_bh_enable();
+ consume_skb(skb);
+ }
+ spin_unlock(&nsim_trap_data->trap_lock);
+}
+
+#define NSIM_TRAP_REPORT_INTERVAL_MS 100
+
+static void nsim_dev_trap_report_work(struct work_struct *work)
+{
+ struct nsim_trap_data *nsim_trap_data;
+ struct nsim_dev_port *nsim_dev_port;
+ struct nsim_dev *nsim_dev;
+
+ nsim_trap_data = container_of(work, struct nsim_trap_data,
+ trap_report_dw.work);
+ nsim_dev = nsim_trap_data->nsim_dev;
+
+ /* For each running port and enabled packet trap, generate a UDP
+ * packet with a random 5-tuple and report it.
+ */
+ mutex_lock(&nsim_dev->port_list_lock);
+ list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) {
+ if (!netif_running(nsim_dev_port->ns->netdev))
+ continue;
+
+ nsim_dev_trap_report(nsim_dev_port);
+ }
+ mutex_unlock(&nsim_dev->port_list_lock);
+
+ schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw,
+ msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS));
+}
+
+static int nsim_dev_traps_init(struct devlink *devlink)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct nsim_trap_data *nsim_trap_data;
+ int err;
+
+ nsim_trap_data = kzalloc(sizeof(*nsim_trap_data), GFP_KERNEL);
+ if (!nsim_trap_data)
+ return -ENOMEM;
+
+ nsim_trap_data->trap_items_arr = kcalloc(ARRAY_SIZE(nsim_traps_arr),
+ sizeof(struct nsim_trap_item),
+ GFP_KERNEL);
+ if (!nsim_trap_data->trap_items_arr) {
+ err = -ENOMEM;
+ goto err_trap_data_free;
+ }
+
+ /* The lock is used to protect the action state of the registered
+ * traps. The value is written by user and read in delayed work when
+ * iterating over all the traps.
+ */
+ spin_lock_init(&nsim_trap_data->trap_lock);
+ nsim_trap_data->nsim_dev = nsim_dev;
+ nsim_dev->trap_data = nsim_trap_data;
+
+ err = devlink_traps_register(devlink, nsim_traps_arr,
+ ARRAY_SIZE(nsim_traps_arr), NULL);
+ if (err)
+ goto err_trap_items_free;
+
+ INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw,
+ nsim_dev_trap_report_work);
+ schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw,
+ msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS));
+
+ return 0;
+
+err_trap_items_free:
+ kfree(nsim_trap_data->trap_items_arr);
+err_trap_data_free:
+ kfree(nsim_trap_data);
+ return err;
+}
+
+static void nsim_dev_traps_exit(struct devlink *devlink)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+
+ cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw);
+ devlink_traps_unregister(devlink, nsim_traps_arr,
+ ARRAY_SIZE(nsim_traps_arr));
+ kfree(nsim_dev->trap_data->trap_items_arr);
+ kfree(nsim_dev->trap_data);
+}
+
static int nsim_dev_reload(struct devlink *devlink,
struct netlink_ext_ack *extack)
{
- struct nsim_dev *nsim_dev = devlink_priv(devlink);
enum nsim_resource_id res_ids[] = {
NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
};
+ struct net *net = nsim_devlink_net(devlink);
int i;
for (i = 0; i < ARRAY_SIZE(res_ids); ++i) {
err = devlink_resource_size_get(devlink, res_ids[i], &val);
if (!err) {
- err = nsim_fib_set_max(nsim_dev->fib_data,
- res_ids[i], val, extack);
+ err = nsim_fib_set_max(net, res_ids[i], val, extack);
if (err)
return err;
}
}
+ nsim_devlink_param_load_driverinit_values(devlink);
return 0;
}
return 0;
}
+static struct nsim_trap_item *
+nsim_dev_trap_item_lookup(struct nsim_dev *nsim_dev, u16 trap_id)
+{
+ struct nsim_trap_data *nsim_trap_data = nsim_dev->trap_data;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) {
+ if (nsim_traps_arr[i].id == trap_id)
+ return &nsim_trap_data->trap_items_arr[i];
+ }
+
+ return NULL;
+}
+
+static int nsim_dev_devlink_trap_init(struct devlink *devlink,
+ const struct devlink_trap *trap,
+ void *trap_ctx)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct nsim_trap_item *nsim_trap_item;
+
+ nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id);
+ if (WARN_ON(!nsim_trap_item))
+ return -ENOENT;
+
+ nsim_trap_item->trap_ctx = trap_ctx;
+ nsim_trap_item->action = trap->init_action;
+
+ return 0;
+}
+
+static int
+nsim_dev_devlink_trap_action_set(struct devlink *devlink,
+ const struct devlink_trap *trap,
+ enum devlink_trap_action action)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct nsim_trap_item *nsim_trap_item;
+
+ nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id);
+ if (WARN_ON(!nsim_trap_item))
+ return -ENOENT;
+
+ spin_lock(&nsim_dev->trap_data->trap_lock);
+ nsim_trap_item->action = action;
+ spin_unlock(&nsim_dev->trap_data->trap_lock);
+
+ return 0;
+}
+
static const struct devlink_ops nsim_dev_devlink_ops = {
.reload = nsim_dev_reload,
.flash_update = nsim_dev_flash_update,
+ .trap_init = nsim_dev_devlink_trap_init,
+ .trap_action_set = nsim_dev_devlink_trap_action_set,
};
+#define NSIM_DEV_MAX_MACS_DEFAULT 32
+#define NSIM_DEV_TEST1_DEFAULT true
+
static struct nsim_dev *
nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
{
INIT_LIST_HEAD(&nsim_dev->port_list);
mutex_init(&nsim_dev->port_list_lock);
nsim_dev->fw_update_status = true;
+ nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT;
+ nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT;
- nsim_dev->fib_data = nsim_fib_create();
- if (IS_ERR(nsim_dev->fib_data)) {
- err = PTR_ERR(nsim_dev->fib_data);
- goto err_devlink_free;
- }
-
err = nsim_dev_resources_register(devlink);
if (err)
- goto err_fib_destroy;
+ goto err_devlink_free;
err = devlink_register(devlink, &nsim_bus_dev->dev);
if (err)
goto err_resources_unregister;
- err = nsim_dev_debugfs_init(nsim_dev);
+ err = devlink_params_register(devlink, nsim_devlink_params,
+ ARRAY_SIZE(nsim_devlink_params));
if (err)
goto err_dl_unregister;
+ nsim_devlink_set_params_init_values(nsim_dev, devlink);
+
+ err = nsim_dev_dummy_region_init(nsim_dev, devlink);
+ if (err)
+ goto err_params_unregister;
+
+ err = nsim_dev_traps_init(devlink);
+ if (err)
+ goto err_dummy_region_exit;
+
+ err = nsim_dev_debugfs_init(nsim_dev);
+ if (err)
+ goto err_traps_exit;
err = nsim_bpf_dev_init(nsim_dev);
if (err)
goto err_debugfs_exit;
+ devlink_params_publish(devlink);
return nsim_dev;
err_debugfs_exit:
nsim_dev_debugfs_exit(nsim_dev);
+err_traps_exit:
+ nsim_dev_traps_exit(devlink);
+err_dummy_region_exit:
+ nsim_dev_dummy_region_exit(nsim_dev);
+err_params_unregister:
+ devlink_params_unregister(devlink, nsim_devlink_params,
+ ARRAY_SIZE(nsim_devlink_params));
err_dl_unregister:
devlink_unregister(devlink);
err_resources_unregister:
devlink_resources_unregister(devlink, NULL);
- err_fib_destroy:
- nsim_fib_destroy(nsim_dev->fib_data);
err_devlink_free:
devlink_free(devlink);
return ERR_PTR(err);
nsim_bpf_dev_exit(nsim_dev);
nsim_dev_debugfs_exit(nsim_dev);
+ nsim_dev_traps_exit(devlink);
+ nsim_dev_dummy_region_exit(nsim_dev);
+ devlink_params_unregister(devlink, nsim_devlink_params,
+ ARRAY_SIZE(nsim_devlink_params));
devlink_unregister(devlink);
devlink_resources_unregister(devlink, NULL);
- nsim_fib_destroy(nsim_dev->fib_data);
mutex_destroy(&nsim_dev->port_list_lock);
devlink_free(devlink);
}
struct nsim_dev {
struct nsim_bus_dev *nsim_bus_dev;
struct nsim_fib_data *fib_data;
+ struct nsim_trap_data *trap_data;
struct dentry *ddir;
struct dentry *ports_ddir;
struct bpf_offload_dev *bpf_dev;
struct list_head port_list;
struct mutex port_list_lock; /* protects port list */
bool fw_update_status;
+ u32 max_macs;
+ bool test1;
+ struct devlink_region *dummy_region;
};
int nsim_dev_init(void);
int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
unsigned int port_index);
- struct nsim_fib_data *nsim_fib_create(void);
- void nsim_fib_destroy(struct nsim_fib_data *fib_data);
- u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
- enum nsim_resource_id res_id, bool max);
- int nsim_fib_set_max(struct nsim_fib_data *fib_data,
- enum nsim_resource_id res_id, u64 val,
+ int nsim_fib_init(void);
+ void nsim_fib_exit(void);
+ u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max);
+ int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val,
struct netlink_ext_ack *extack);
#if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
{
int ret;
- ret = genphy_config_init(phydev);
- if (ret < 0)
- return ret;
-
/* The RX and TX delay default is:
* after HW reset: RX delay enabled and TX delay disabled
* after SW reset: RX delay enabled, while TX delay retains the
* value before reset.
- *
- * So let's first disable the RX and TX delays in PHY and enable
- * them based on the mode selected (this also takes care of RGMII
- * mode where we expect delays to be disabled)
*/
-
- ret = at803x_disable_rx_delay(phydev);
- if (ret < 0)
- return ret;
- ret = at803x_disable_tx_delay(phydev);
- if (ret < 0)
- return ret;
-
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
- phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
- /* If RGMII_ID or RGMII_RXID are specified enable RX delay,
- * otherwise keep it disabled
- */
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
ret = at803x_enable_rx_delay(phydev);
- if (ret < 0)
- return ret;
- }
+ else
+ ret = at803x_disable_rx_delay(phydev);
+ if (ret < 0)
+ return ret;
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
- phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
- /* If RGMII_ID or RGMII_TXID are specified enable TX delay,
- * otherwise keep it disabled
- */
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
ret = at803x_enable_tx_delay(phydev);
- }
+ else
+ ret = at803x_disable_tx_delay(phydev);
return ret;
}
*/
static int genphy_config_advert(struct phy_device *phydev)
{
- u32 advertise;
- int bmsr, adv;
- int err, changed = 0;
+ int err, bmsr, changed = 0;
+ u32 adv;
/* Only allow advertising what this PHY supports */
linkmode_and(phydev->advertising, phydev->advertising,
phydev->supported);
- if (!ethtool_convert_link_mode_to_legacy_u32(&advertise,
- phydev->advertising))
- phydev_warn(phydev, "PHY advertising (%*pb) more modes than genphy supports, some modes not advertised.\n",
- __ETHTOOL_LINK_MODE_MASK_NBITS,
- phydev->advertising);
+
+ adv = linkmode_adv_to_mii_adv_t(phydev->advertising);
/* Setup standard advertisement */
err = phy_modify_changed(phydev, MII_ADVERTISE,
ADVERTISE_ALL | ADVERTISE_100BASE4 |
ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM,
- ethtool_adv_to_mii_adv_t(advertise));
+ adv);
if (err < 0)
return err;
if (err > 0)
if (!(bmsr & BMSR_ESTATEN))
return changed;
- /* Configure gigabit if it's supported */
- adv = 0;
- if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
- phydev->supported) ||
- linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
- phydev->supported))
- adv = ethtool_adv_to_mii_ctrl1000_t(advertise);
+ adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
err = phy_modify_changed(phydev, MII_CTRL1000,
ADVERTISE_1000FULL | ADVERTISE_1000HALF,
EXPORT_SYMBOL(genphy_restart_aneg);
/**
- * genphy_config_aneg - restart auto-negotiation or write BMCR
+ * __genphy_config_aneg - restart auto-negotiation or write BMCR
* @phydev: target phy_device struct
+ * @changed: whether autoneg is requested
*
* Description: If auto-negotiation is enabled, we configure the
* advertising, and then restart auto-negotiation. If it is not
* enabled, then we write the BMCR.
*/
-int genphy_config_aneg(struct phy_device *phydev)
+int __genphy_config_aneg(struct phy_device *phydev, bool changed)
{
- int err, changed;
+ int err;
- changed = genphy_config_eee_advert(phydev);
+ if (genphy_config_eee_advert(phydev))
+ changed = true;
if (AUTONEG_ENABLE != phydev->autoneg)
return genphy_setup_forced(phydev);
err = genphy_config_advert(phydev);
if (err < 0) /* error */
return err;
+ else if (err)
+ changed = true;
- changed |= err;
-
- if (changed == 0) {
+ if (!changed) {
/* Advertisement hasn't changed, but maybe aneg was never on to
* begin with? Or maybe phy was isolated?
*/
return ctl;
if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
- changed = 1; /* do restart aneg */
+ changed = true; /* do restart aneg */
}
/* Only restart aneg if we are advertising something different
* than we were before.
*/
- if (changed > 0)
- return genphy_restart_aneg(phydev);
-
- return 0;
+ return changed ? genphy_restart_aneg(phydev) : 0;
}
-EXPORT_SYMBOL(genphy_config_aneg);
+EXPORT_SYMBOL(__genphy_config_aneg);
/**
* genphy_aneg_done - return auto-negotiation status
*/
int genphy_update_link(struct phy_device *phydev)
{
- int status;
+ int status = 0, bmcr;
+
+ bmcr = phy_read(phydev, MII_BMCR);
+ if (bmcr < 0)
+ return bmcr;
+
+ /* Autoneg is being started, therefore disregard BMSR value and
+ * report link as down.
+ */
+ if (bmcr & BMCR_ANRESTART)
+ goto done;
/* The link state is latched low so that momentary link
* drops can be detected. Do not double-read the status
*/
int genphy_read_status(struct phy_device *phydev)
{
- int adv, lpa, lpagb, err, old_link = phydev->link;
+ int lpa, lpagb, err, old_link = phydev->link;
/* Update the link, but return if there was an error */
err = genphy_update_link(phydev);
phydev->pause = 0;
phydev->asym_pause = 0;
- linkmode_zero(phydev->lp_advertising);
-
if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
if (phydev->is_gigabit_capable) {
lpagb = phy_read(phydev, MII_STAT1000);
if (lpagb < 0)
return lpagb;
- adv = phy_read(phydev, MII_CTRL1000);
- if (adv < 0)
- return adv;
-
if (lpagb & LPA_1000MSFAIL) {
+ int adv = phy_read(phydev, MII_CTRL1000);
+
+ if (adv < 0)
+ return adv;
+
if (adv & CTL1000_ENABLE_MASTER)
phydev_err(phydev, "Master/Slave resolution failed, maybe conflicting manual settings?\n");
else
}
EXPORT_SYMBOL(genphy_soft_reset);
-int genphy_config_init(struct phy_device *phydev)
-{
- int val;
- __ETHTOOL_DECLARE_LINK_MODE_MASK(features) = { 0, };
-
- linkmode_set_bit_array(phy_basic_ports_array,
- ARRAY_SIZE(phy_basic_ports_array),
- features);
- linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, features);
- linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, features);
-
- /* Do we support autonegotiation? */
- val = phy_read(phydev, MII_BMSR);
- if (val < 0)
- return val;
-
- if (val & BMSR_ANEGCAPABLE)
- linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, features);
-
- if (val & BMSR_100FULL)
- linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, features);
- if (val & BMSR_100HALF)
- linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, features);
- if (val & BMSR_10FULL)
- linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, features);
- if (val & BMSR_10HALF)
- linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, features);
-
- if (val & BMSR_ESTATEN) {
- val = phy_read(phydev, MII_ESTATUS);
- if (val < 0)
- return val;
-
- if (val & ESTATUS_1000_TFULL)
- linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
- features);
- if (val & ESTATUS_1000_THALF)
- linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
- features);
- if (val & ESTATUS_1000_XFULL)
- linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
- features);
- }
-
- linkmode_and(phydev->supported, phydev->supported, features);
- linkmode_and(phydev->advertising, phydev->advertising, features);
-
- return 0;
-}
-EXPORT_SYMBOL(genphy_config_init);
-
/**
* genphy_read_abilities - read PHY abilities from Clause 22 registers
* @phydev: target phy_device struct
return;
}
- memcpy(&intdata, urb->transfer_buffer, 4);
- le32_to_cpus(&intdata);
+ intdata = get_unaligned_le32(urb->transfer_buffer);
if (intdata & INT_ENP_PHY_INT) {
netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
struct sk_buff *skb, gfp_t flags)
{
u32 tx_cmd_a, tx_cmd_b;
+ void *ptr;
if (skb_cow_head(skb, TX_OVERHEAD)) {
dev_kfree_skb_any(skb);
tx_cmd_b |= skb_vlan_tag_get(skb) & TX_CMD_B_VTAG_MASK_;
}
- skb_push(skb, 4);
- cpu_to_le32s(&tx_cmd_b);
- memcpy(skb->data, &tx_cmd_b, 4);
-
- skb_push(skb, 4);
- cpu_to_le32s(&tx_cmd_a);
- memcpy(skb->data, &tx_cmd_a, 4);
+ ptr = skb_push(skb, 8);
+ put_unaligned_le32(tx_cmd_a, ptr);
+ put_unaligned_le32(tx_cmd_b, ptr + 4);
return skb;
}
struct sk_buff *skb2;
unsigned char *packet;
- memcpy(&rx_cmd_a, skb->data, sizeof(rx_cmd_a));
- le32_to_cpus(&rx_cmd_a);
+ rx_cmd_a = get_unaligned_le32(skb->data);
skb_pull(skb, sizeof(rx_cmd_a));
- memcpy(&rx_cmd_b, skb->data, sizeof(rx_cmd_b));
- le32_to_cpus(&rx_cmd_b);
+ rx_cmd_b = get_unaligned_le32(skb->data);
skb_pull(skb, sizeof(rx_cmd_b));
- memcpy(&rx_cmd_c, skb->data, sizeof(rx_cmd_c));
- le16_to_cpus(&rx_cmd_c);
+ rx_cmd_c = get_unaligned_le16(skb->data);
skb_pull(skb, sizeof(rx_cmd_c));
packet = skb->data;
ret = register_netdev(netdev);
if (ret != 0) {
netif_err(dev, probe, netdev, "couldn't register the device\n");
- goto out3;
+ goto out4;
}
usb_set_intfdata(intf, dev);
ret = lan78xx_phy_init(dev);
if (ret < 0)
- goto out4;
+ goto out5;
return 0;
- out4:
+ out5:
unregister_netdev(netdev);
+ out4:
+ usb_free_urb(dev->urb_intr);
out3:
lan78xx_unbind(dev, intf);
out2:
static u16 frag_get_pending_idx(skb_frag_t *frag)
{
- return (u16)frag->page_offset;
+ return (u16)skb_frag_off(frag);
}
static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
{
- frag->page_offset = pending_idx;
+ skb_frag_off_set(frag, pending_idx);
}
static inline pending_ring_idx_t pending_index(unsigned i)
skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
nskb = xenvif_alloc_skb(0);
if (unlikely(nskb == NULL)) {
+ skb_shinfo(skb)->nr_frags = 0;
kfree_skb(skb);
xenvif_tx_err(queue, &txreq, extra_count, idx);
if (net_ratelimit())
if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
/* Failure in xenvif_set_skb_gso is fatal. */
+ skb_shinfo(skb)->nr_frags = 0;
kfree_skb(skb);
kfree_skb(nskb);
break;
int j;
skb->truesize += skb->data_len;
for (j = 0; j < i; j++)
- put_page(frags[j].page.p);
+ put_page(skb_frag_page(&frags[j]));
return -ENOMEM;
}
BUG();
offset += len;
- frags[i].page.p = page;
- frags[i].page_offset = 0;
+ __skb_frag_set_page(&frags[i], page);
+ skb_frag_off_set(&frags[i], 0);
skb_frag_size_set(&frags[i], len);
}
#ifdef CONFIG_DEBUG_FS
xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
- if (IS_ERR_OR_NULL(xen_netback_dbg_root))
- pr_warn("Init of debugfs returned %ld!\n",
- PTR_ERR(xen_netback_dbg_root));
#endif /* CONFIG_DEBUG_FS */
return 0;
if (reply) {
refcount_set(&reply->refcnt, 1);
init_completion(&reply->received);
+ spin_lock_init(&reply->lock);
}
return reply;
}
if (!reply->callback) {
rc = 0;
+ goto no_callback;
+ }
+
+ spin_lock_irqsave(&reply->lock, flags);
+ if (reply->rc) {
+ /* Bail out when the requestor has already left: */
+ rc = reply->rc;
} else {
if (cmd) {
reply->offset = (u16)((char *)cmd - (char *)iob->data);
rc = reply->callback(card, reply, (unsigned long)iob);
}
}
+ spin_unlock_irqrestore(&reply->lock, flags);
+ no_callback:
if (rc <= 0)
qeth_notify_reply(reply, rc);
qeth_put_reply(reply);
rc = (timeout == -ERESTARTSYS) ? -EINTR : -ETIME;
qeth_dequeue_reply(card, reply);
+
+ if (reply_cb) {
+ /* Wait until the callback for a late reply has completed: */
+ spin_lock_irq(&reply->lock);
+ if (rc)
+ /* Zap any callback that's still pending: */
+ reply->rc = rc;
+ spin_unlock_irq(&reply->lock);
+ }
+
if (!rc)
rc = reply->rc;
qeth_put_reply(reply);
int cnt, elements = 0;
for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) {
- struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[cnt];
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[cnt];
elements += qeth_get_elements_for_range(
(addr_t)skb_frag_address(frag),
for (frag = 0; frag < numfrags; frag++) {
count = add_physinfo_entries(page_to_pfn(
skb_frag_page(&skb_shinfo(skb)->frags[frag])),
- skb_shinfo(skb)->frags[frag].page_offset,
- skb_shinfo(skb)->frags[frag].size, count,
- frags_max, frags);
+ skb_frag_off(&skb_shinfo(skb)->frags[frag]),
+ skb_frag_size(&skb_shinfo(skb)->frags[frag]),
+ count, frags_max, frags);
/* add_physinfo_entries only returns
* zero if the frags array is out of room
* That should never happen because we
}
/* poll_for_irq - checks the status of the response queue
- * @v: Void pointer to the visronic devdata struct.
+ * @t: pointer to the 'struct timer_list' from which we can retrieve the
+ * the visornic devdata struct.
*
* Main function of the vnic_incoming thread. Periodically check the response
* queue and drain it if needed.
MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1,
};
+#define MLX5_FC_BULK_SIZE_FACTOR 128
+
+enum mlx5_fc_bulk_alloc_bitmask {
+ MLX5_FC_BULK_128 = (1 << 0),
+ MLX5_FC_BULK_256 = (1 << 1),
+ MLX5_FC_BULK_512 = (1 << 2),
+ MLX5_FC_BULK_1024 = (1 << 3),
+ MLX5_FC_BULK_2048 = (1 << 4),
+ MLX5_FC_BULK_4096 = (1 << 5),
+ MLX5_FC_BULK_8192 = (1 << 6),
+ MLX5_FC_BULK_16384 = (1 << 7),
+};
+
+#define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum))
+
struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_0[0x30];
u8 vhca_id[0x10];
u8 reserved_at_2e0[0x7];
u8 max_qp_mcg[0x19];
- u8 reserved_at_300[0x18];
+ u8 reserved_at_300[0x10];
+ u8 flow_counter_bulk_alloc[0x8];
u8 log_max_mcg[0x8];
u8 reserved_at_320[0x3];
struct mlx5_ifc_tisc_bits {
u8 strict_lag_tx_port_affinity[0x1];
u8 tls_en[0x1];
- u8 reserved_at_1[0x2];
+ u8 reserved_at_2[0x2];
u8 lag_tx_port_affinity[0x04];
u8 reserved_at_8[0x4];
SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
};
+enum {
+ ELEMENT_TYPE_CAP_MASK_TASR = 1 << 0,
+ ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1,
+ ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2,
+ ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3,
+};
+
struct mlx5_ifc_scheduling_context_bits {
u8 element_type[0x8];
u8 reserved_at_8[0x18];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
- u8 reserved_at_40[0x40];
+ u8 reserved_at_40[0x38];
+ u8 flow_counter_bulk[0x8];
};
struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
};
struct mlx5_ifc_tls_progress_params_bits {
- u8 valid[0x1];
- u8 reserved_at_1[0x7];
- u8 pd[0x18];
+ u8 reserved_at_0[0x8];
+ u8 tisn[0x18];
u8 next_record_tcp_sn[0x20];
#include <linux/compiler.h>
#include <linux/time.h>
#include <linux/bug.h>
+#include <linux/bvec.h>
#include <linux/cache.h>
#include <linux/rbtree.h>
#include <linux/socket.h>
*/
#define GSO_BY_FRAGS 0xFFFF
-typedef struct skb_frag_struct skb_frag_t;
-
-struct skb_frag_struct {
- struct {
- struct page *p;
- } page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
- __u32 page_offset;
- __u32 size;
-#else
- __u16 page_offset;
- __u16 size;
-#endif
-};
+typedef struct bio_vec skb_frag_t;
/**
- * skb_frag_size - Returns the size of a skb fragment
+ * skb_frag_size() - Returns the size of a skb fragment
* @frag: skb fragment
*/
static inline unsigned int skb_frag_size(const skb_frag_t *frag)
{
- return frag->size;
+ return frag->bv_len;
}
/**
- * skb_frag_size_set - Sets the size of a skb fragment
+ * skb_frag_size_set() - Sets the size of a skb fragment
* @frag: skb fragment
* @size: size of fragment
*/
static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
{
- frag->size = size;
+ frag->bv_len = size;
}
/**
- * skb_frag_size_add - Incrementes the size of a skb fragment by %delta
+ * skb_frag_size_add() - Increments the size of a skb fragment by @delta
* @frag: skb fragment
* @delta: value to add
*/
static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
{
- frag->size += delta;
+ frag->bv_len += delta;
}
/**
- * skb_frag_size_sub - Decrements the size of a skb fragment by %delta
+ * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta
* @frag: skb fragment
* @delta: value to subtract
*/
static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
{
- frag->size -= delta;
+ frag->bv_len -= delta;
}
/**
* skb_frag_foreach_page - loop over pages in a fragment
*
* @f: skb frag to operate on
- * @f_off: offset from start of f->page.p
+ * @f_off: offset from start of f->bv_page
* @f_len: length from f_off to loop over
* @p: (temp var) current page
* @p_off: (temp var) offset from start of current page,
struct bpf_flow_dissector;
bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
- __be16 proto, int nhoff, int hlen);
+ __be16 proto, int nhoff, int hlen, unsigned int flags);
bool __skb_flow_dissect(const struct net *net,
const struct sk_buff *skb,
to->l4_hash = from->l4_hash;
};
+ static inline void skb_copy_decrypted(struct sk_buff *to,
+ const struct sk_buff *from)
+ {
+ #ifdef CONFIG_TLS_DEVICE
+ to->decrypted = from->decrypted;
+ #endif
+ }
+
#ifdef NET_SKBUFF_DATA_USES_OFFSET
static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
{
* that not all callers have unique ownership of the page but rely
* on page_is_pfmemalloc doing the right thing(tm).
*/
- frag->page.p = page;
- frag->page_offset = off;
+ frag->bv_page = page;
+ frag->bv_offset = off;
skb_frag_size_set(frag, size);
page = compound_head(page);
skb->pfmemalloc = true;
}
+/**
+ * skb_frag_off() - Returns the offset of a skb fragment
+ * @frag: the paged fragment
+ */
+static inline unsigned int skb_frag_off(const skb_frag_t *frag)
+{
+ return frag->bv_offset;
+}
+
+/**
+ * skb_frag_off_add() - Increments the offset of a skb fragment by @delta
+ * @frag: skb fragment
+ * @delta: value to add
+ */
+static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
+{
+ frag->bv_offset += delta;
+}
+
+/**
+ * skb_frag_off_set() - Sets the offset of a skb fragment
+ * @frag: skb fragment
+ * @offset: offset of fragment
+ */
+static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
+{
+ frag->bv_offset = offset;
+}
+
+/**
+ * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment
+ * @fragto: skb fragment where offset is set
+ * @fragfrom: skb fragment offset is copied from
+ */
+static inline void skb_frag_off_copy(skb_frag_t *fragto,
+ const skb_frag_t *fragfrom)
+{
+ fragto->bv_offset = fragfrom->bv_offset;
+}
+
/**
* skb_frag_page - retrieve the page referred to by a paged fragment
* @frag: the paged fragment
*/
static inline struct page *skb_frag_page(const skb_frag_t *frag)
{
- return frag->page.p;
+ return frag->bv_page;
}
/**
*/
static inline void *skb_frag_address(const skb_frag_t *frag)
{
- return page_address(skb_frag_page(frag)) + frag->page_offset;
+ return page_address(skb_frag_page(frag)) + skb_frag_off(frag);
}
/**
if (unlikely(!ptr))
return NULL;
- return ptr + frag->page_offset;
+ return ptr + skb_frag_off(frag);
+}
+
+/**
+ * skb_frag_page_copy() - sets the page in a fragment from another fragment
+ * @fragto: skb fragment where page is set
+ * @fragfrom: skb fragment page is copied from
+ */
+static inline void skb_frag_page_copy(skb_frag_t *fragto,
+ const skb_frag_t *fragfrom)
+{
+ fragto->bv_page = fragfrom->bv_page;
}
/**
*/
static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
{
- frag->page.p = page;
+ frag->bv_page = page;
}
/**
enum dma_data_direction dir)
{
return dma_map_page(dev, skb_frag_page(frag),
- frag->page_offset + offset, size, dir);
+ skb_frag_off(frag) + offset, size, dir);
}
static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
if (skb_zcopy(skb))
return false;
if (i) {
- const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
return page == skb_frag_page(frag) &&
- off == frag->page_offset + skb_frag_size(frag);
+ off == skb_frag_off(frag) + skb_frag_size(frag);
}
return false;
}
struct xt_action_param xt;
};
+#if IS_ENABLED(CONFIG_NETFILTER)
static inline struct net *nft_net(const struct nft_pktinfo *pkt)
{
return pkt->xt.state->net;
pkt->skb = skb;
pkt->xt.state = state;
}
+#endif
static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
struct sk_buff *skb)
unsigned char *udata;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
- u16 flags:13,
- bound:1,
+ u16 flags:14,
genmask:2;
u8 klen;
u8 dlen;
int family;
struct module *owner;
unsigned int hook_mask;
+#if IS_ENABLED(CONFIG_NETFILTER)
nf_hookfn *hooks[NF_MAX_HOOKS];
int (*ops_register)(struct net *net, const struct nf_hook_ops *ops);
void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops);
+#endif
};
int nft_chain_validate_dependency(const struct nft_chain *chain,
* @flow_block: flow block (for hardware offload)
*/
struct nft_base_chain {
+#if IS_ENABLED(CONFIG_NETFILTER)
struct nf_hook_ops ops;
+#endif
const struct nft_chain_type *type;
u8 policy;
u8 flags;
use:30;
u64 handle;
/* runtime data below here */
+#if IS_ENABLED(CONFIG_NETFILTER)
struct nf_hook_ops *ops ____cacheline_aligned;
+#endif
struct nf_flowtable data;
};
#define MODULE_ALIAS_NFT_OBJ(type) \
MODULE_ALIAS("nft-obj-" __stringify(type))
+#if IS_ENABLED(CONFIG_NF_TABLES)
+
/*
* The gencursor defines two generations, the currently active and the
* next one. Objects contain a bitmask of 2 bits specifying the generations
ext->genmask ^= nft_genmask_next(net);
}
+#endif /* IS_ENABLED(CONFIG_NF_TABLES) */
+
/*
* We use a free bit in the genmask field to indicate the element
* is busy, meaning it is currently being processed either by
struct nft_trans_set {
struct nft_set *set;
u32 set_id;
+ bool bound;
};
#define nft_trans_set(trans) \
(((struct nft_trans_set *)trans->data)->set)
#define nft_trans_set_id(trans) \
(((struct nft_trans_set *)trans->data)->set_id)
+ #define nft_trans_set_bound(trans) \
+ (((struct nft_trans_set *)trans->data)->bound)
struct nft_trans_chain {
bool update;
struct nft_trans_elem {
struct nft_set *set;
struct nft_set_elem elem;
+ bool bound;
};
#define nft_trans_elem_set(trans) \
(((struct nft_trans_elem *)trans->data)->set)
#define nft_trans_elem(trans) \
(((struct nft_trans_elem *)trans->data)->elem)
+ #define nft_trans_elem_set_bound(trans) \
+ (((struct nft_trans_elem *)trans->data)->bound)
struct nft_trans_obj {
struct nft_object *obj;
u32 len;
u32 base_offset;
u32 offset;
+ struct nft_data data;
struct nft_data mask;
};
struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule);
void nft_flow_rule_destroy(struct nft_flow_rule *flow);
int nft_flow_rule_offload_commit(struct net *net);
+void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command command);
#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
(__reg)->base_offset = \
(__reg)->key = __key; \
memset(&(__reg)->mask, 0xff, (__reg)->len);
+ int nft_chain_offload_priority(struct nft_base_chain *basechain);
+
#endif
return block->q;
}
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident);
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident);
-void __tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident);
-void tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident);
-
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode);
{
}
-static inline
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- return 0;
-}
-
-static inline
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- return 0;
-}
-
-static inline
-void __tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-}
-
-static inline
-void tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-}
-
static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
{
cls_common->chain_index = tp->chain->index;
cls_common->protocol = tp->protocol;
- cls_common->prio = tp->prio;
+ cls_common->prio = tp->prio >> 16;
if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE)
cls_common->extack = extack;
}
BPF_MAP_TYPE_QUEUE,
BPF_MAP_TYPE_STACK,
BPF_MAP_TYPE_SK_STORAGE,
+ BPF_MAP_TYPE_DEVMAP_HASH,
};
/* Note that tracing related programs such as
* If no cookie has been set yet, generate a new cookie. Once
* generated, the socket cookie remains stable for the life of the
* socket. This helper can be useful for monitoring per socket
- * networking traffic statistics as it provides a unique socket
- * identifier per namespace.
+ * networking traffic statistics as it provides a global socket
+ * identifier that can be assumed unique.
* Return
* A 8-byte long non-decreasing number on success, or 0 if the
* socket field is missing inside *skb*.
* **-EPERM** if no permission to send the *sig*.
*
* **-EAGAIN** if bpf program can try again.
+ *
+ * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * Description
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+ *
+ * *iph* points to the start of the IPv4 or IPv6 header, while
+ * *iph_len* contains **sizeof**\ (**struct iphdr**) or
+ * **sizeof**\ (**struct ip6hdr**).
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header.
+ *
+ * Return
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** SYN cookie cannot be issued due to error
+ *
+ * **-ENOENT** SYN cookie should not be issued (no SYN flood)
+ *
+ * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
+ *
+ * **-EPROTONOSUPPORT** IP packet version is not 4 or 6
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(strtoul), \
FN(sk_storage_get), \
FN(sk_storage_delete), \
- FN(send_signal),
+ FN(send_signal), \
+ FN(tcp_gen_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
+#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)
+
struct bpf_flow_keys {
__u16 nhoff;
__u16 thoff;
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
};
+ __u32 flags;
+ __be32 flow_label;
};
struct bpf_func_info {
if (!skb)
goto wait_for_memory;
+ #ifdef CONFIG_TLS_DEVICE
+ skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+ #endif
skb_entail(sk, skb);
copy = size_goal;
}
struct sockcm_cookie sockc;
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
- bool process_backlog = false;
+ int process_backlog = 0;
bool zc = false;
long timeo;
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- if (process_backlog && sk_flush_backlog(sk)) {
- process_backlog = false;
- goto restart;
+ if (unlikely(process_backlog >= 16)) {
+ process_backlog = 0;
+ if (sk_flush_backlog(sk))
+ goto restart;
}
first_skb = tcp_rtx_and_write_queues_empty(sk);
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
if (!skb)
goto wait_for_memory;
- process_backlog = true;
+ process_backlog++;
skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb);
break;
frags = skb_shinfo(skb)->frags;
while (offset) {
- if (frags->size > offset)
+ if (skb_frag_size(frags) > offset)
goto out;
- offset -= frags->size;
+ offset -= skb_frag_size(frags);
frags++;
}
}
- if (frags->size != PAGE_SIZE || frags->page_offset) {
+ if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
int remaining = zc->recv_skip_hint;
+ int size = skb_frag_size(frags);
- while (remaining && (frags->size != PAGE_SIZE ||
- frags->page_offset)) {
- remaining -= frags->size;
+ while (remaining && (size != PAGE_SIZE ||
+ skb_frag_off(frags))) {
+ remaining -= size;
frags++;
+ size = skb_frag_size(frags);
}
zc->recv_skip_hint -= remaining;
break;
return 1;
for (i = 0; i < shi->nr_frags; ++i) {
- const struct skb_frag_struct *f = &shi->frags[i];
- unsigned int offset = f->page_offset;
+ const skb_frag_t *f = &shi->frags[i];
+ unsigned int offset = skb_frag_off(f);
struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
sg_set_page(&sg, page, skb_frag_size(f),
buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
if (!buff)
return -ENOMEM; /* We'll just try again later. */
+ skb_copy_decrypted(buff, skb);
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
} else {
shinfo->frags[k] = shinfo->frags[i];
if (eat) {
- shinfo->frags[k].page_offset += eat;
+ skb_frag_off_add(&shinfo->frags[k], eat);
skb_frag_size_sub(&shinfo->frags[k], eat);
eat = 0;
}
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
return -ENOMEM;
+ skb_copy_decrypted(buff, skb);
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
sk_mem_charge(sk, nskb->truesize);
skb = tcp_send_head(sk);
+ skb_copy_decrypted(nskb, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
return;
list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
- if (trans->msg_type == NFT_MSG_NEWSET &&
- nft_trans_set(trans) == set) {
- set->bound = true;
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (nft_trans_set(trans) == set)
+ nft_trans_set_bound(trans) = true;
+ break;
+ case NFT_MSG_NEWSETELEM:
+ if (nft_trans_elem_set(trans) == set)
+ nft_trans_elem_set_bound(trans) = true;
break;
}
}
chain->flags |= NFT_BASE_CHAIN | flags;
basechain->policy = NF_ACCEPT;
+ if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+ nft_chain_offload_priority(basechain) < 0)
+ return -EOPNOTSUPP;
+
flow_block_init(&basechain->flow_block);
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
break;
case NFT_MSG_NEWSET:
trans->ctx.table->use--;
- if (nft_trans_set(trans)->bound) {
+ if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
break;
}
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- if (nft_trans_elem_set(trans)->bound) {
+ if (nft_trans_elem_set_bound(trans)) {
nft_trans_destroy(trans);
break;
}
.exit = nf_tables_exit_net,
};
+static struct flow_indr_block_ing_entry block_ing_entry = {
+ .cb = nft_indr_block_get_and_ing_cmd,
+ .list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
static int __init nf_tables_module_init(void)
{
int err;
goto err5;
nft_chain_route_init();
+ flow_indr_add_block_ing_cb(&block_ing_entry);
return err;
err5:
rhltable_destroy(&nft_objname_ht);
static void __exit nf_tables_module_exit(void)
{
+ flow_indr_del_block_ing_cb(&block_ing_entry);
nfnetlink_subsys_unregister(&nf_tables_subsys);
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
}
static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
- __be16 proto,
- struct netlink_ext_ack *extack)
+ __be16 proto, int priority,
+ struct netlink_ext_ack *extack)
{
common->protocol = proto;
+ common->prio = priority;
common->extack = extack;
}
return 0;
}
+ int nft_chain_offload_priority(struct nft_base_chain *basechain)
+ {
+ if (basechain->ops.priority <= 0 ||
+ basechain->ops.priority > USHRT_MAX)
+ return -1;
+
+ return 0;
+ }
+
static int nft_flow_offload_rule(struct nft_trans *trans,
enum flow_cls_command command)
{
if (flow)
proto = flow->proto;
- nft_flow_offload_common_init(&cls_flow.common, proto, &extack);
+ nft_flow_offload_common_init(&cls_flow.common, proto,
+ basechain->ops.priority, &extack);
cls_flow.command = command;
cls_flow.cookie = (unsigned long) rule;
if (flow)
return 0;
}
+static int nft_block_setup(struct nft_base_chain *basechain,
+ struct flow_block_offload *bo,
+ enum flow_block_command cmd)
+{
+ int err;
+
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ err = nft_flow_offload_bind(bo, basechain);
+ break;
+ case FLOW_BLOCK_UNBIND:
+ err = nft_flow_offload_unbind(bo, basechain);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int nft_block_offload_cmd(struct nft_base_chain *chain,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo = {};
+ int err;
+
+ bo.net = dev_net(dev);
+ bo.block = &chain->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+ if (err < 0)
+ return err;
+
+ return nft_block_setup(chain, &bo, cmd);
+}
+
+static void nft_indr_block_ing_cmd(struct net_device *dev,
+ struct nft_base_chain *chain,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command cmd)
+{
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo = {};
+
+ if (!chain)
+ return;
+
+ bo.net = dev_net(dev);
+ bo.block = &chain->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
+
+ nft_block_setup(chain, &bo, cmd);
+}
+
+static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ struct flow_block_offload bo = {};
+ struct netlink_ext_ack extack = {};
+
+ bo.net = dev_net(dev);
+ bo.block = &chain->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ flow_indr_block_call(dev, &bo, cmd);
+
+ if (list_empty(&bo.cb_list))
+ return -EOPNOTSUPP;
+
+ return nft_block_setup(chain, &bo, cmd);
+}
+
#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
static int nft_flow_offload_chain(struct nft_trans *trans,
enum flow_block_command cmd)
{
struct nft_chain *chain = trans->ctx.chain;
- struct netlink_ext_ack extack = {};
- struct flow_block_offload bo = {};
struct nft_base_chain *basechain;
struct net_device *dev;
- int err;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
dev = basechain->ops.dev;
- if (!dev || !dev->netdev_ops->ndo_setup_tc)
+ if (!dev)
return -EOPNOTSUPP;
/* Only default policy to accept is supported for now. */
nft_trans_chain_policy(trans) != NF_ACCEPT)
return -EOPNOTSUPP;
- bo.command = cmd;
- bo.block = &basechain->flow_block;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
-
- err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
- if (err < 0)
- return err;
-
- switch (cmd) {
- case FLOW_BLOCK_BIND:
- err = nft_flow_offload_bind(&bo, basechain);
- break;
- case FLOW_BLOCK_UNBIND:
- err = nft_flow_offload_unbind(&bo, basechain);
- break;
- }
-
- return err;
+ if (dev->netdev_ops->ndo_setup_tc)
+ return nft_block_offload_cmd(basechain, dev, cmd);
+ else
+ return nft_indr_block_offload_cmd(basechain, dev, cmd);
}
int nft_flow_rule_offload_commit(struct net *net)
return err;
}
+
+void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command command)
+{
+ struct net *net = dev_net(dev);
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (table->family != NFPROTO_NETDEV)
+ continue;
+
+ list_for_each_entry_rcu(chain, &table->chains, list) {
+ if (nft_is_base_chain(chain)) {
+ struct nft_base_chain *basechain;
+
+ basechain = nft_base_chain(chain);
+ if (!strncmp(basechain->dev_name, dev->name,
+ IFNAMSIZ)) {
+ nft_indr_block_ing_cmd(dev, basechain,
+ cb, cb_priv,
+ command);
+ return;
+ }
+ }
+ }
+ }
+}
int (*verify_packet)(struct rxrpc_call *, struct sk_buff *,
unsigned int, unsigned int, rxrpc_seq_t, u16);
+ /* Free crypto request on a call */
+ void (*free_call_crypto)(struct rxrpc_call *);
+
/* Locate the data in a received packet that has been verified. */
void (*locate_data)(struct rxrpc_call *, struct sk_buff *,
unsigned int *, unsigned int *);
*/
struct rxrpc_local {
struct rcu_head rcu;
- atomic_t usage;
+ atomic_t active_users; /* Number of users of the local endpoint */
+ atomic_t usage; /* Number of references to the structure */
struct rxrpc_net *rxnet; /* The network ns in which this resides */
struct list_head link;
struct socket *socket; /* my UDP socket */
unsigned long expect_term_by; /* When we expect call termination by */
u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
+ struct skcipher_request *cipher_req; /* Packet cipher request buffer */
struct timer_list timer; /* Combined event timer */
struct work_struct processor; /* Event processor */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
/* receive-phase ACK management */
u8 ackr_reason; /* reason to ACK */
- u16 ackr_skew; /* skew on packet being ACK'd */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
rxrpc_serial_t ackr_first_seq; /* first sequence number received */
rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
/*
* call_event.c
*/
- void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
+ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool,
enum rxrpc_propose_ack_trace);
void rxrpc_process_call(struct work_struct *);
struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *);
struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *);
void rxrpc_put_local(struct rxrpc_local *);
+ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *);
+ void rxrpc_unuse_local(struct rxrpc_local *);
void rxrpc_queue_local(struct rxrpc_local *);
void rxrpc_destroy_all_locals(struct rxrpc_net *);
[TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
};
-static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
- [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
-};
-
static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_PRIOMAP] = {
.len = sizeof(struct tc_mqprio_qopt)
spin_unlock_bh(qdisc_lock(sch));
free_sched:
- kfree(new_admin);
+ if (new_admin)
+ call_rcu(&new_admin->rcu, taprio_free_sched_cb);
return err;
}
* @transmitq: queue for sent, non-acked messages
* @backlogq: queue for messages waiting to be sent
* @snt_nxt: next sequence number to use for outbound messages
- * @prev_from: sequence number of most previous retransmission request
- * @stale_limit: time when repeated identical retransmits must force link reset
* @ackers: # of peers that needs to ack each packet before it can be released
* @acked: # last packet acked by a certain peer. Used for broadcast.
* @rcv_nxt: next sequence number to expect for inbound messages
u16 limit;
} backlog[5];
u16 snd_nxt;
- u16 prev_from;
u16 window;
- unsigned long stale_limit;
/* Reception */
u16 rcv_nxt;
/* Fragmentation/reassembly */
struct sk_buff *reasm_buf;
+ struct sk_buff *reasm_tnlmsg;
/* Broadcast */
u16 ackers;
*/
static void link_prepare_wakeup(struct tipc_link *l)
{
+ struct sk_buff_head *wakeupq = &l->wakeupq;
+ struct sk_buff_head *inputq = l->inputq;
struct sk_buff *skb, *tmp;
- int imp, i = 0;
+ struct sk_buff_head tmpq;
+ int avail[5] = {0,};
+ int imp = 0;
+
+ __skb_queue_head_init(&tmpq);
+
+ for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++)
+ avail[imp] = l->backlog[imp].limit - l->backlog[imp].len;
- skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
+ skb_queue_walk_safe(wakeupq, skb, tmp) {
imp = TIPC_SKB_CB(skb)->chain_imp;
- if (l->backlog[imp].len < l->backlog[imp].limit) {
- skb_unlink(skb, &l->wakeupq);
- skb_queue_tail(l->inputq, skb);
- } else if (i++ > 10) {
- break;
- }
+ if (avail[imp] <= 0)
+ continue;
+ avail[imp]--;
+ __skb_unlink(skb, wakeupq);
+ __skb_queue_tail(&tmpq, skb);
}
+
+ spin_lock_bh(&inputq->lock);
+ skb_queue_splice_tail(&tmpq, inputq);
+ spin_unlock_bh(&inputq->lock);
+
}
void tipc_link_reset(struct tipc_link *l)
l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
kfree_skb(l->reasm_buf);
+ kfree_skb(l->reasm_tnlmsg);
kfree_skb(l->failover_reasm_skb);
l->reasm_buf = NULL;
+ l->reasm_tnlmsg = NULL;
l->failover_reasm_skb = NULL;
l->rcv_unacked = 0;
l->snd_nxt = 1;
int rc = 0;
if (unlikely(msg_size(hdr) > mtu)) {
- skb_queue_purge(list);
+ pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n",
+ skb_queue_len(list), msg_user(hdr),
+ msg_type(hdr), msg_size(hdr), mtu);
+ __skb_queue_purge(list);
return -EMSGSIZE;
}
if (likely(skb_queue_len(transmq) < maxwin)) {
_skb = skb_clone(skb, GFP_ATOMIC);
if (!_skb) {
- skb_queue_purge(list);
+ __skb_queue_purge(list);
return -ENOBUFS;
}
__skb_dequeue(list);
* link_retransmit_failure() - Detect repeated retransmit failures
* @l: tipc link sender
* @r: tipc link receiver (= l in case of unicast)
- * @from: seqno of the 1st packet in retransmit request
* @rc: returned code
*
* Return: true if the repeated retransmit failures happens, otherwise
* false
*/
static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
- u16 from, int *rc)
+ int *rc)
{
struct sk_buff *skb = skb_peek(&l->transmq);
struct tipc_msg *hdr;
if (!skb)
return false;
- hdr = buf_msg(skb);
- /* Detect repeated retransmit failures on same packet */
- if (r->prev_from != from) {
- r->prev_from = from;
- r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
- } else if (time_after(jiffies, r->stale_limit)) {
- pr_warn("Retransmission failure on link <%s>\n", l->name);
- link_print(l, "State of link ");
- pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
- msg_user(hdr), msg_type(hdr), msg_size(hdr),
- msg_errcode(hdr));
- pr_info("sqno %u, prev: %x, src: %x\n",
- msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
-
- trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
- trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
- trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+ if (!TIPC_SKB_CB(skb)->retr_cnt)
+ return false;
- if (link_is_bc_sndlink(l))
- *rc = TIPC_LINK_DOWN_EVT;
+ if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
+ msecs_to_jiffies(r->tolerance)))
+ return false;
+
+ hdr = buf_msg(skb);
+ if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr)))
+ return false;
+ pr_warn("Retransmission failure on link <%s>\n", l->name);
+ link_print(l, "State of link ");
+ pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
+ msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
+ pr_info("sqno %u, prev: %x, dest: %x\n",
+ msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr));
+ pr_info("retr_stamp %d, retr_cnt %d\n",
+ jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp),
+ TIPC_SKB_CB(skb)->retr_cnt);
+
+ trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
+ trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
+ trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+
+ if (link_is_bc_sndlink(l)) {
+ r->state = LINK_RESET;
+ *rc = TIPC_LINK_DOWN_EVT;
+ } else {
*rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
- return true;
}
- return false;
+ return true;
}
/* tipc_link_bc_retrans() - retransmit zero or more packets
trace_tipc_link_retrans(r, from, to, &l->transmq);
- if (link_retransmit_failure(l, r, from, &rc))
+ if (link_retransmit_failure(l, r, &rc))
return rc;
skb_queue_walk(&l->transmq, skb) {
continue;
if (more(msg_seqno(hdr), to))
break;
- if (link_is_bc_sndlink(l)) {
- if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
- continue;
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
- }
+
+ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
+ continue;
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
_skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC);
if (!_skb)
return 0;
_skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, _skb);
l->stats.retransmitted++;
+
+ /* Increase actual retrans counter & mark first time */
+ if (!TIPC_SKB_CB(skb)->retr_cnt++)
+ TIPC_SKB_CB(skb)->retr_stamp = jiffies;
}
return 0;
}
struct sk_buff_head *inputq)
{
struct sk_buff **reasm_skb = &l->failover_reasm_skb;
+ struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg;
struct sk_buff_head *fdefq = &l->failover_deferdq;
struct tipc_msg *hdr = buf_msg(skb);
struct sk_buff *iskb;
int rc = 0;
u16 seqno;
- /* SYNCH_MSG */
- if (msg_type(hdr) == SYNCH_MSG)
- goto drop;
+ if (msg_type(hdr) == SYNCH_MSG) {
+ kfree_skb(skb);
+ return 0;
+ }
- /* FAILOVER_MSG */
- if (!tipc_msg_extract(skb, &iskb, &ipos)) {
- pr_warn_ratelimited("Cannot extract FAILOVER_MSG, defq: %d\n",
- skb_queue_len(fdefq));
- return rc;
+ /* Not a fragment? */
+ if (likely(!msg_nof_fragms(hdr))) {
+ if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) {
+ pr_warn_ratelimited("Unable to extract msg, defq: %d\n",
+ skb_queue_len(fdefq));
+ return 0;
+ }
+ kfree_skb(skb);
+ } else {
+ /* Set fragment type for buf_append */
+ if (msg_fragm_no(hdr) == 1)
+ msg_set_type(hdr, FIRST_FRAGMENT);
+ else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr))
+ msg_set_type(hdr, FRAGMENT);
+ else
+ msg_set_type(hdr, LAST_FRAGMENT);
+
+ if (!tipc_buf_append(reasm_tnlmsg, &skb)) {
+ /* Successful but non-complete reassembly? */
+ if (*reasm_tnlmsg || link_is_bc_rcvlink(l))
+ return 0;
+ pr_warn_ratelimited("Unable to reassemble tunnel msg\n");
+ return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ }
+ iskb = skb;
}
do {
seqno = buf_seqno(iskb);
-
if (unlikely(less(seqno, l->drop_point))) {
kfree_skb(iskb);
continue;
}
-
if (unlikely(seqno != l->drop_point)) {
__tipc_skb_queue_sorted(fdefq, seqno, iskb);
continue;
}
l->drop_point++;
-
if (!tipc_data_input(l, iskb, inputq))
rc |= tipc_link_input(l, iskb, inputq, reasm_skb);
if (unlikely(rc))
break;
} while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point)));
-drop:
- kfree_skb(skb);
return rc;
}
struct tipc_msg *hdr;
u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
u16 ack = l->rcv_nxt - 1;
+ bool passed = false;
u16 seqno, n = 0;
int rc = 0;
- if (gap && link_retransmit_failure(l, l, acked + 1, &rc))
- return rc;
-
skb_queue_walk_safe(&l->transmq, skb, tmp) {
seqno = buf_seqno(skb);
__skb_unlink(skb, &l->transmq);
kfree_skb(skb);
} else if (less_eq(seqno, acked + gap)) {
- /* retransmit skb */
+ /* First, check if repeated retrans failures occurs? */
+ if (!passed && link_retransmit_failure(l, l, &rc))
+ return rc;
+ passed = true;
+
+ /* retransmit skb if unrestricted*/
if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
continue;
TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
-
- _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
+ _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE,
+ GFP_ATOMIC);
if (!_skb)
continue;
hdr = buf_msg(_skb);
_skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, _skb);
l->stats.retransmitted++;
+
+ /* Increase actual retrans counter & mark first time */
+ if (!TIPC_SKB_CB(skb)->retr_cnt++)
+ TIPC_SKB_CB(skb)->retr_stamp = jiffies;
} else {
/* retry with Gap ACK blocks if any */
if (!ga || n >= ga->gack_cnt)
struct sk_buff *skb;
u32 dnode = l->addr;
- skb_queue_head_init(&tnlq);
+ __skb_queue_head_init(&tnlq);
skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG,
INT_H_SIZE, BASIC_H_SIZE,
dnode, onode, 0, 0, 0);
struct sk_buff *skb, *tnlskb;
struct tipc_msg *hdr, tnlhdr;
struct sk_buff_head *queue = &l->transmq;
- struct sk_buff_head tmpxq, tnlq;
+ struct sk_buff_head tmpxq, tnlq, frags;
u16 pktlen, pktcnt, seqno = l->snd_nxt;
+ bool pktcnt_need_update = false;
+ u16 syncpt;
+ int rc;
if (!tnl)
return;
- skb_queue_head_init(&tnlq);
- skb_queue_head_init(&tmpxq);
+ __skb_queue_head_init(&tnlq);
+ __skb_queue_head_init(&tmpxq);
+ __skb_queue_head_init(&frags);
/* At least one packet required for safe algorithm => add dummy */
skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
pr_warn("%sunable to create tunnel packet\n", link_co_err);
return;
}
- skb_queue_tail(&tnlq, skb);
+ __skb_queue_tail(&tnlq, skb);
tipc_link_xmit(l, &tnlq, &tmpxq);
__skb_queue_purge(&tmpxq);
+ /* Link Synching:
+ * From now on, send only one single ("dummy") SYNCH message
+ * to peer. The SYNCH message does not contain any data, just
+ * a header conveying the synch point to the peer.
+ */
+ if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
+ tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG,
+ INT_H_SIZE, 0, l->addr,
+ tipc_own_addr(l->net),
+ 0, 0, 0);
+ if (!tnlskb) {
+ pr_warn("%sunable to create dummy SYNCH_MSG\n",
+ link_co_err);
+ return;
+ }
+
+ hdr = buf_msg(tnlskb);
+ syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1;
+ msg_set_syncpt(hdr, syncpt);
+ msg_set_bearer_id(hdr, l->peer_bearer_id);
+ __skb_queue_tail(&tnlq, tnlskb);
+ tipc_link_xmit(tnl, &tnlq, xmitq);
+ return;
+ }
+
/* Initialize reusable tunnel packet header */
tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
mtyp, INT_H_SIZE, l->addr);
if (queue == &l->backlogq)
msg_set_seqno(hdr, seqno++);
pktlen = msg_size(hdr);
+
+ /* Tunnel link MTU is not large enough? This could be
+ * due to:
+ * 1) Link MTU has just changed or set differently;
+ * 2) Or FAILOVER on the top of a SYNCH message
+ *
+ * The 2nd case should not happen if peer supports
+ * TIPC_TUNNEL_ENHANCED
+ */
+ if (pktlen > tnl->mtu - INT_H_SIZE) {
+ if (mtyp == FAILOVER_MSG &&
+ (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
+ rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu,
+ &frags);
+ if (rc) {
+ pr_warn("%sunable to frag msg: rc %d\n",
+ link_co_err, rc);
+ return;
+ }
+ pktcnt += skb_queue_len(&frags) - 1;
+ pktcnt_need_update = true;
+ skb_queue_splice_tail_init(&frags, &tnlq);
+ continue;
+ }
+ /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED
+ * => Just warn it and return!
+ */
+ pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n",
+ link_co_err, msg_user(hdr),
+ msg_type(hdr), msg_size(hdr));
+ return;
+ }
+
msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC);
if (!tnlskb) {
goto tnl;
}
+ if (pktcnt_need_update)
+ skb_queue_walk(&tnlq, skb) {
+ hdr = buf_msg(skb);
+ msg_set_msgcnt(hdr, pktcnt);
+ }
+
tipc_link_xmit(tnl, &tnlq, xmitq);
if (mtyp == FAILOVER_MSG) {
i += scnprintf(buf + i, sz - i, " %x", l->peer_caps);
i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt);
i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt);
- i += scnprintf(buf + i, sz - i, " %u", l->prev_from);
+ i += scnprintf(buf + i, sz - i, " %u", 0);
i += scnprintf(buf + i, sz - i, " %u", 0);
i += scnprintf(buf + i, sz - i, " %u", l->acked);
#define TIPC_MEDIA_INFO_OFFSET 5
struct tipc_skb_cb {
- u32 bytes_read;
- u32 orig_member;
struct sk_buff *tail;
unsigned long nxt_retr;
- bool validated;
+ unsigned long retr_stamp;
+ u32 bytes_read;
+ u32 orig_member;
u16 chain_imp;
u16 ackers;
+ u16 retr_cnt;
+ bool validated;
};
#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
msg_set_bits(m, 4, 16, 0xffff, n);
}
+static inline u32 msg_nof_fragms(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 0, 0xffff);
+}
+
+static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 4, 0, 0xffff, n);
+}
+
+static inline u32 msg_fragm_no(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 16, 0xffff);
+}
+
static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 4, 16, 0xffff, n);
}
-
static inline u16 msg_next_sent(struct tipc_msg *m)
{
return msg_bits(m, 4, 0, 0xffff);
msg_set_bits(m, 9, 16, 0xffff, n);
}
+static inline u16 msg_syncpt(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_syncpt(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
static inline u32 msg_conn_ack(struct tipc_msg *m)
{
return msg_bits(m, 9, 16, 0xffff);
bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
u32 mtu, u32 dnode);
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
+int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
+ int pktmax, struct sk_buff_head *frags);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
skb_frag_t *frag;
frag = &record->frags[record->num_frags - 1];
- if (frag->page.p == pfrag->page &&
- frag->page_offset + frag->size == pfrag->offset) {
- frag->size += size;
+ if (skb_frag_page(frag) == pfrag->page &&
+ skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) {
+ skb_frag_size_add(frag, size);
} else {
++frag;
- frag->page.p = pfrag->page;
- frag->page_offset = pfrag->offset;
- frag->size = size;
+ __skb_frag_set_page(frag, pfrag->page);
+ skb_frag_off_set(frag, pfrag->offset);
+ skb_frag_size_set(frag, size);
++record->num_frags;
get_page(pfrag->page);
}
frag = &record->frags[i];
sg_unmark_end(&offload_ctx->sg_tx_data[i]);
sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag),
- frag->size, frag->page_offset);
- sk_mem_charge(sk, frag->size);
+ skb_frag_size(frag), skb_frag_off(frag));
+ sk_mem_charge(sk, skb_frag_size(frag));
get_page(skb_frag_page(frag));
}
sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]);
frag = &record->frags[0];
__skb_frag_set_page(frag, pfrag->page);
- frag->page_offset = pfrag->offset;
+ skb_frag_off_set(frag, pfrag->offset);
skb_frag_size_set(frag, prepend_size);
get_page(pfrag->page);
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
- int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
struct tls_record_info *record = ctx->open_record;
+ int tls_push_record_flags;
struct page_frag *pfrag;
size_t orig_size = size;
u32 max_open_record_len;
if (sk->sk_err)
return -sk->sk_err;
+ flags |= MSG_SENDPAGE_DECRYPTED;
+ tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
+
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
if (tls_is_partially_sent_record(tls_ctx)) {
rc = tls_push_partial_record(sk, tls_ctx, flags);
gfp_t sk_allocation = sk->sk_allocation;
sk->sk_allocation = GFP_ATOMIC;
- tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL);
+ tls_push_partial_record(sk, ctx,
+ MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_SENDPAGE_DECRYPTED);
sk->sk_allocation = sk_allocation;
}
}
BPF_MAP_TYPE_QUEUE,
BPF_MAP_TYPE_STACK,
BPF_MAP_TYPE_SK_STORAGE,
+ BPF_MAP_TYPE_DEVMAP_HASH,
};
/* Note that tracing related programs such as
* If no cookie has been set yet, generate a new cookie. Once
* generated, the socket cookie remains stable for the life of the
* socket. This helper can be useful for monitoring per socket
- * networking traffic statistics as it provides a unique socket
- * identifier per namespace.
+ * networking traffic statistics as it provides a global socket
+ * identifier that can be assumed unique.
* Return
* A 8-byte long non-decreasing number on success, or 0 if the
* socket field is missing inside *skb*.
* **-EPERM** if no permission to send the *sig*.
*
* **-EAGAIN** if bpf program can try again.
+ *
+ * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * Description
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+ *
+ * *iph* points to the start of the IPv4 or IPv6 header, while
+ * *iph_len* contains **sizeof**\ (**struct iphdr**) or
+ * **sizeof**\ (**struct ip6hdr**).
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header.
+ *
+ * Return
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** SYN cookie cannot be issued due to error
+ *
+ * **-ENOENT** SYN cookie should not be issued (no SYN flood)
+ *
+ * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
+ *
+ * **-EPROTONOSUPPORT** IP packet version is not 4 or 6
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(strtoul), \
FN(sk_storage_get), \
FN(sk_storage_delete), \
- FN(send_signal),
+ FN(send_signal), \
+ FN(tcp_gen_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
+#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)
+
struct bpf_flow_keys {
__u16 nhoff;
__u16 thoff;
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
};
+ __u32 flags;
+ __be32 flow_label;
};
struct bpf_func_info {
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/vfs.h>
+#include <sys/utsname.h>
#include <tools/libc_compat.h>
#include <libelf.h>
#include <gelf.h>
#include "btf.h"
#include "str_error.h"
#include "libbpf_internal.h"
+#include "hashmap.h"
#ifndef EM_BPF
#define EM_BPF 247
static libbpf_print_fn_t __libbpf_pr = __base_pr;
-void libbpf_set_print(libbpf_print_fn_t fn)
+libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
{
+ libbpf_print_fn_t old_print_fn = __libbpf_pr;
+
__libbpf_pr = fn;
+ return old_print_fn;
}
__printf(2, 3)
bpf_program_clear_priv_t clear_priv;
enum bpf_attach_type expected_attach_type;
- int btf_fd;
void *func_info;
__u32 func_info_rec_size;
__u32 func_info_cnt;
prog->instances.nr = -1;
zfree(&prog->instances.fds);
- zclose(prog->btf_fd);
zfree(&prog->func_info);
zfree(&prog->line_info);
}
prog->instances.fds = NULL;
prog->instances.nr = -1;
prog->type = BPF_PROG_TYPE_UNSPEC;
- prog->btf_fd = -1;
return 0;
errout:
return 0;
}
-static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
- __u32 id)
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
{
const struct btf_type *t = btf__type_by_id(btf, id);
- while (true) {
- switch (BTF_INFO_KIND(t->info)) {
- case BTF_KIND_VOLATILE:
- case BTF_KIND_CONST:
- case BTF_KIND_RESTRICT:
- case BTF_KIND_TYPEDEF:
- t = btf__type_by_id(btf, t->type);
- break;
- default:
- return t;
- }
+ if (res_id)
+ *res_id = id;
+
+ while (btf_is_mod(t) || btf_is_typedef(t)) {
+ if (res_id)
+ *res_id = t->type;
+ t = btf__type_by_id(btf, t->type);
}
+
+ return t;
}
/*
static bool get_map_field_int(const char *map_name, const struct btf *btf,
const struct btf_type *def,
const struct btf_member *m, __u32 *res) {
- const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
+ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
const char *name = btf__name_by_offset(btf, m->name_off);
const struct btf_array *arr_info;
const struct btf_type *arr_t;
- if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+ if (!btf_is_ptr(t)) {
pr_warning("map '%s': attr '%s': expected PTR, got %u.\n",
- map_name, name, BTF_INFO_KIND(t->info));
+ map_name, name, btf_kind(t));
return false;
}
map_name, name, t->type);
return false;
}
- if (BTF_INFO_KIND(arr_t->info) != BTF_KIND_ARRAY) {
+ if (!btf_is_array(arr_t)) {
pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n",
- map_name, name, BTF_INFO_KIND(arr_t->info));
+ map_name, name, btf_kind(arr_t));
return false;
}
- arr_info = (const void *)(arr_t + 1);
+ arr_info = btf_array(arr_t);
*res = arr_info->nelems;
return true;
}
struct bpf_map *map;
int vlen, i;
- vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx;
+ vi = btf_var_secinfos(sec) + var_idx;
var = btf__type_by_id(obj->btf, vi->type);
- var_extra = (const void *)(var + 1);
+ var_extra = btf_var(var);
map_name = btf__name_by_offset(obj->btf, var->name_off);
- vlen = BTF_INFO_VLEN(var->info);
+ vlen = btf_vlen(var);
if (map_name == NULL || map_name[0] == '\0') {
pr_warning("map #%d: empty name.\n", var_idx);
pr_warning("map '%s' BTF data is corrupted.\n", map_name);
return -EINVAL;
}
- if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) {
+ if (!btf_is_var(var)) {
pr_warning("map '%s': unexpected var kind %u.\n",
- map_name, BTF_INFO_KIND(var->info));
+ map_name, btf_kind(var));
return -EINVAL;
}
if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
return -EOPNOTSUPP;
}
- def = skip_mods_and_typedefs(obj->btf, var->type);
- if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) {
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+ if (!btf_is_struct(def)) {
pr_warning("map '%s': unexpected def kind %u.\n",
- map_name, BTF_INFO_KIND(var->info));
+ map_name, btf_kind(var));
return -EINVAL;
}
if (def->size > vi->size) {
pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
map_name, map->sec_idx, map->sec_offset);
- vlen = BTF_INFO_VLEN(def->info);
- m = (const void *)(def + 1);
+ vlen = btf_vlen(def);
+ m = btf_members(def);
for (i = 0; i < vlen; i++, m++) {
const char *name = btf__name_by_offset(obj->btf, m->name_off);
map_name, m->type);
return -EINVAL;
}
- if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+ if (!btf_is_ptr(t)) {
pr_warning("map '%s': key spec is not PTR: %u.\n",
- map_name, BTF_INFO_KIND(t->info));
+ map_name, btf_kind(t));
return -EINVAL;
}
sz = btf__resolve_size(obj->btf, t->type);
map_name, m->type);
return -EINVAL;
}
- if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+ if (!btf_is_ptr(t)) {
pr_warning("map '%s': value spec is not PTR: %u.\n",
- map_name, BTF_INFO_KIND(t->info));
+ map_name, btf_kind(t));
return -EINVAL;
}
sz = btf__resolve_size(obj->btf, t->type);
nr_types = btf__get_nr_types(obj->btf);
for (i = 1; i <= nr_types; i++) {
t = btf__type_by_id(obj->btf, i);
- if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
+ if (!btf_is_datasec(t))
continue;
name = btf__name_by_offset(obj->btf, t->name_off);
if (strcmp(name, MAPS_ELF_SEC) == 0) {
return -ENOENT;
}
- vlen = BTF_INFO_VLEN(sec->info);
+ vlen = btf_vlen(sec);
for (i = 0; i < vlen; i++) {
err = bpf_object__init_user_btf_map(obj, sec, i,
obj->efile.btf_maps_shndx,
struct btf *btf = obj->btf;
struct btf_type *t;
int i, j, vlen;
- __u16 kind;
if (!obj->btf || (has_func && has_datasec))
return;
for (i = 1; i <= btf__get_nr_types(btf); i++) {
t = (struct btf_type *)btf__type_by_id(btf, i);
- kind = BTF_INFO_KIND(t->info);
- if (!has_datasec && kind == BTF_KIND_VAR) {
+ if (!has_datasec && btf_is_var(t)) {
/* replace VAR with INT */
t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
/*
* original variable took less than 4 bytes
*/
t->size = 1;
- *(int *)(t+1) = BTF_INT_ENC(0, 0, 8);
- } else if (!has_datasec && kind == BTF_KIND_DATASEC) {
+ *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
+ } else if (!has_datasec && btf_is_datasec(t)) {
/* replace DATASEC with STRUCT */
- struct btf_var_secinfo *v = (void *)(t + 1);
- struct btf_member *m = (void *)(t + 1);
+ const struct btf_var_secinfo *v = btf_var_secinfos(t);
+ struct btf_member *m = btf_members(t);
struct btf_type *vt;
char *name;
name++;
}
- vlen = BTF_INFO_VLEN(t->info);
+ vlen = btf_vlen(t);
t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
for (j = 0; j < vlen; j++, v++, m++) {
/* order of field assignments is important */
vt = (void *)btf__type_by_id(btf, v->type);
m->name_off = vt->name_off;
}
- } else if (!has_func && kind == BTF_KIND_FUNC_PROTO) {
+ } else if (!has_func && btf_is_func_proto(t)) {
/* replace FUNC_PROTO with ENUM */
- vlen = BTF_INFO_VLEN(t->info);
+ vlen = btf_vlen(t);
t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
t->size = sizeof(__u32); /* kernel enforced */
- } else if (!has_func && kind == BTF_KIND_FUNC) {
+ } else if (!has_func && btf_is_func(t)) {
/* replace FUNC with TYPEDEF */
t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
}
(long long) sym.st_value, sym.st_name, name);
shdr_idx = sym.st_shndx;
+ insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+ pr_debug("relocation: insn_idx=%u, shdr_idx=%u\n",
+ insn_idx, shdr_idx);
+
+ if (shdr_idx >= SHN_LORESERVE) {
+ pr_warning("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n",
+ name, shdr_idx, insn_idx,
+ insns[insn_idx].code);
+ return -LIBBPF_ERRNO__RELOC;
+ }
if (!bpf_object__relo_in_known_section(obj, shdr_idx)) {
pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n",
prog->section_name, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
- insn_idx = rel.r_offset / sizeof(struct bpf_insn);
- pr_debug("relocation: insn_idx=%u\n", insn_idx);
-
if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
pr_warning("incorrect bpf_call opcode\n");
prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
}
- if (!insn_offset)
- prog->btf_fd = btf__fd(obj->btf);
-
return 0;
}
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+ __u32 type_id; /* struct/union type or array element type */
+ __u32 idx; /* field index or array index */
+ const char *name; /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+ const struct btf *btf;
+ /* high-level spec: named fields and array indices only */
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* high-level spec length */
+ int len;
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+ /* raw spec length */
+ int raw_len;
+ /* field byte offset represented by spec */
+ __u32 offset;
+};
+
+static bool str_is_empty(const char *s)
+{
+ return !s || !s[0];
+}
+
+/*
+ * Turn bpf_offset_reloc into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field offset (in bytes), specified by accessor string. Low-level spec
+ * captures every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ * struct sample {
+ * int __unimportant;
+ * struct {
+ * int __1;
+ * int __2;
+ * int a[7];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ *
+ * int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ * - field 'a' access (corresponds to '2' in low-level spec);
+ * - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ */
+static int bpf_core_spec_parse(const struct btf *btf,
+ __u32 type_id,
+ const char *spec_str,
+ struct bpf_core_spec *spec)
+{
+ int access_idx, parsed_len, i;
+ const struct btf_type *t;
+ const char *name;
+ __u32 id;
+ __s64 sz;
+
+ if (str_is_empty(spec_str) || *spec_str == ':')
+ return -EINVAL;
+
+ memset(spec, 0, sizeof(*spec));
+ spec->btf = btf;
+
+ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+ while (*spec_str) {
+ if (*spec_str == ':')
+ ++spec_str;
+ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+ return -EINVAL;
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+ spec_str += parsed_len;
+ spec->raw_spec[spec->raw_len++] = access_idx;
+ }
+
+ if (spec->raw_len == 0)
+ return -EINVAL;
+
+ /* first spec value is always reloc type array index */
+ t = skip_mods_and_typedefs(btf, type_id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[0];
+ spec->spec[0].type_id = id;
+ spec->spec[0].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->offset = access_idx * sz;
+
+ for (i = 1; i < spec->raw_len; i++) {
+ t = skip_mods_and_typedefs(btf, id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[i];
+
+ if (btf_is_composite(t)) {
+ const struct btf_member *m;
+ __u32 offset;
+
+ if (access_idx >= btf_vlen(t))
+ return -EINVAL;
+ if (btf_member_bitfield_size(t, access_idx))
+ return -EINVAL;
+
+ offset = btf_member_bit_offset(t, access_idx);
+ if (offset % 8)
+ return -EINVAL;
+ spec->offset += offset / 8;
+
+ m = btf_members(t) + access_idx;
+ if (m->name_off) {
+ name = btf__name_by_offset(btf, m->name_off);
+ if (str_is_empty(name))
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->spec[spec->len].name = name;
+ spec->len++;
+ }
+
+ id = m->type;
+ } else if (btf_is_array(t)) {
+ const struct btf_array *a = btf_array(t);
+
+ t = skip_mods_and_typedefs(btf, a->type, &id);
+ if (!t || access_idx >= a->nelems)
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->offset += access_idx * sz;
+ } else {
+ pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
+ type_id, spec_str, i, id, btf_kind(t));
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static bool bpf_core_is_flavor_sep(const char *s)
+{
+ /* check X___Y name pattern, where X and Y are not underscores */
+ return s[0] != '_' && /* X */
+ s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
+ s[4] != '_'; /* Y */
+}
+
+/* Given 'some_struct_name___with_flavor' return the length of a name prefix
+ * before last triple underscore. Struct name part after last triple
+ * underscore is ignored by BPF CO-RE relocation during relocation matching.
+ */
+static size_t bpf_core_essential_name_len(const char *name)
+{
+ size_t n = strlen(name);
+ int i;
+
+ for (i = n - 5; i >= 0; i--) {
+ if (bpf_core_is_flavor_sep(name + i))
+ return i + 1;
+ }
+ return n;
+}
+
+/* dynamically sized list of type IDs */
+struct ids_vec {
+ __u32 *data;
+ int len;
+};
+
+static void bpf_core_free_cands(struct ids_vec *cand_ids)
+{
+ free(cand_ids->data);
+ free(cand_ids);
+}
+
+static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
+ __u32 local_type_id,
+ const struct btf *targ_btf)
+{
+ size_t local_essent_len, targ_essent_len;
+ const char *local_name, *targ_name;
+ const struct btf_type *t;
+ struct ids_vec *cand_ids;
+ __u32 *new_ids;
+ int i, err, n;
+
+ t = btf__type_by_id(local_btf, local_type_id);
+ if (!t)
+ return ERR_PTR(-EINVAL);
+
+ local_name = btf__name_by_offset(local_btf, t->name_off);
+ if (str_is_empty(local_name))
+ return ERR_PTR(-EINVAL);
+ local_essent_len = bpf_core_essential_name_len(local_name);
+
+ cand_ids = calloc(1, sizeof(*cand_ids));
+ if (!cand_ids)
+ return ERR_PTR(-ENOMEM);
+
+ n = btf__get_nr_types(targ_btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(targ_btf, i);
+ targ_name = btf__name_by_offset(targ_btf, t->name_off);
+ if (str_is_empty(targ_name))
+ continue;
+
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+
+ if (strncmp(local_name, targ_name, local_essent_len) == 0) {
+ pr_debug("[%d] %s: found candidate [%d] %s\n",
+ local_type_id, local_name, i, targ_name);
+ new_ids = realloc(cand_ids->data, cand_ids->len + 1);
+ if (!new_ids) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ cand_ids->data = new_ids;
+ cand_ids->data[cand_ids->len++] = i;
+ }
+ }
+ return cand_ids;
+err_out:
+ bpf_core_free_cands(cand_ids);
+ return ERR_PTR(err);
+}
+
+/* Check two types for compatibility, skipping const/volatile/restrict and
+ * typedefs, to ensure we are relocating offset to the compatible entities:
+ * - any two STRUCTs/UNIONs are compatible and can be mixed;
+ * - any two FWDs are compatible;
+ * - any two PTRs are always compatible;
+ * - for ENUMs, check sizes, names are ignored;
+ * - for INT, size and bitness should match, signedness is ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+ __u32 local_id,
+ const struct btf *targ_btf,
+ __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+
+recur:
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+ return 1;
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_FWD:
+ case BTF_KIND_PTR:
+ return 1;
+ case BTF_KIND_ENUM:
+ return local_type->size == targ_type->size;
+ case BTF_KIND_INT:
+ return btf_int_offset(local_type) == 0 &&
+ btf_int_offset(targ_type) == 0 &&
+ local_type->size == targ_type->size &&
+ btf_int_bits(local_type) == btf_int_bits(targ_type);
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ default:
+ pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n",
+ btf_kind(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+ const struct bpf_core_accessor *local_acc,
+ const struct btf *targ_btf,
+ __u32 targ_id,
+ struct bpf_core_spec *spec,
+ __u32 *next_targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ const struct btf_member *local_member, *m;
+ const char *local_name, *targ_name;
+ __u32 local_id;
+ int i, n, found;
+
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+ if (!btf_is_composite(targ_type))
+ return 0;
+
+ local_id = local_acc->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ local_member = btf_members(local_type) + local_acc->idx;
+ local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+ n = btf_vlen(targ_type);
+ m = btf_members(targ_type);
+ for (i = 0; i < n; i++, m++) {
+ __u32 offset;
+
+ /* bitfield relocations not supported */
+ if (btf_member_bitfield_size(targ_type, i))
+ continue;
+ offset = btf_member_bit_offset(targ_type, i);
+ if (offset % 8)
+ continue;
+
+ /* too deep struct/union/array nesting */
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ /* speculate this member will be the good one */
+ spec->offset += offset / 8;
+ spec->raw_spec[spec->raw_len++] = i;
+
+ targ_name = btf__name_by_offset(targ_btf, m->name_off);
+ if (str_is_empty(targ_name)) {
+ /* embedded struct/union, we need to go deeper */
+ found = bpf_core_match_member(local_btf, local_acc,
+ targ_btf, m->type,
+ spec, next_targ_id);
+ if (found) /* either found or error */
+ return found;
+ } else if (strcmp(local_name, targ_name) == 0) {
+ /* matching named field */
+ struct bpf_core_accessor *targ_acc;
+
+ targ_acc = &spec->spec[spec->len++];
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+
+ *next_targ_id = m->type;
+ found = bpf_core_fields_are_compat(local_btf,
+ local_member->type,
+ targ_btf, m->type);
+ if (!found)
+ spec->len--; /* pop accessor */
+ return found;
+ }
+ /* member turned out not to be what we looked for */
+ spec->offset -= offset / 8;
+ spec->raw_len--;
+ }
+
+ return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+ const struct btf *targ_btf, __u32 targ_id,
+ struct bpf_core_spec *targ_spec)
+{
+ const struct btf_type *targ_type;
+ const struct bpf_core_accessor *local_acc;
+ struct bpf_core_accessor *targ_acc;
+ int i, sz, matched;
+
+ memset(targ_spec, 0, sizeof(*targ_spec));
+ targ_spec->btf = targ_btf;
+
+ local_acc = &local_spec->spec[0];
+ targ_acc = &targ_spec->spec[0];
+
+ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+ &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+
+ if (local_acc->name) {
+ matched = bpf_core_match_member(local_spec->btf,
+ local_acc,
+ targ_btf, targ_id,
+ targ_spec, &targ_id);
+ if (matched <= 0)
+ return matched;
+ } else {
+ /* for i=0, targ_id is already treated as array element
+ * type (because it's the original struct), for others
+ * we should find array element type first
+ */
+ if (i > 0) {
+ const struct btf_array *a;
+
+ if (!btf_is_array(targ_type))
+ return 0;
+
+ a = btf_array(targ_type);
+ if (local_acc->idx >= a->nelems)
+ return 0;
+ if (!skip_mods_and_typedefs(targ_btf, a->type,
+ &targ_id))
+ return -EINVAL;
+ }
+
+ /* too deep struct/union/array nesting */
+ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = local_acc->idx;
+ targ_acc->name = NULL;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+
+ sz = btf__resolve_size(targ_btf, targ_id);
+ if (sz < 0)
+ return sz;
+ targ_spec->offset += local_acc->idx * sz;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ * Expected insn->imm value is provided for validation, as well as the new
+ * relocated value.
+ *
+ * Currently three kinds of BPF instructions are supported:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. *(rX) = <imm> (indirect memory assignment with immediate operand).
+ *
+ * If actual insn->imm value is wrong, bail out.
+ */
+static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off,
+ __u32 orig_off, __u32 new_off)
+{
+ struct bpf_insn *insn;
+ int insn_idx;
+ __u8 class;
+
+ if (insn_off % sizeof(struct bpf_insn))
+ return -EINVAL;
+ insn_idx = insn_off / sizeof(struct bpf_insn);
+
+ insn = &prog->insns[insn_idx];
+ class = BPF_CLASS(insn->code);
+
+ if (class == BPF_ALU || class == BPF_ALU64) {
+ if (BPF_SRC(insn->code) != BPF_K)
+ return -EINVAL;
+ if (insn->imm != orig_off)
+ return -EINVAL;
+ insn->imm = new_off;
+ pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n",
+ bpf_program__title(prog, false),
+ insn_idx, orig_off, new_off);
+ } else {
+ pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
+ bpf_program__title(prog, false),
+ insn_idx, insn->code, insn->src_reg, insn->dst_reg,
+ insn->off, insn->imm);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct btf *btf_load_raw(const char *path)
+{
+ struct btf *btf;
+ size_t read_cnt;
+ struct stat st;
+ void *data;
+ FILE *f;
+
+ if (stat(path, &st))
+ return ERR_PTR(-errno);
+
+ data = malloc(st.st_size);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ f = fopen(path, "rb");
+ if (!f) {
+ btf = ERR_PTR(-errno);
+ goto cleanup;
+ }
+
+ read_cnt = fread(data, 1, st.st_size, f);
+ fclose(f);
+ if (read_cnt < st.st_size) {
+ btf = ERR_PTR(-EBADF);
+ goto cleanup;
+ }
+
+ btf = btf__new(data, read_cnt);
+
+cleanup:
+ free(data);
+ return btf;
+}
+
+/*
+ * Probe few well-known locations for vmlinux kernel image and try to load BTF
+ * data out of it to use for target BTF.
+ */
+static struct btf *bpf_core_find_kernel_btf(void)
+{
+ struct {
+ const char *path_fmt;
+ bool raw_btf;
+ } locations[] = {
+ /* try canonical vmlinux BTF through sysfs first */
+ { "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
+ /* fall back to trying to find vmlinux ELF on disk otherwise */
+ { "/boot/vmlinux-%1$s" },
+ { "/lib/modules/%1$s/vmlinux-%1$s" },
+ { "/lib/modules/%1$s/build/vmlinux" },
+ { "/usr/lib/modules/%1$s/kernel/vmlinux" },
+ { "/usr/lib/debug/boot/vmlinux-%1$s" },
+ { "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
+ { "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
+ };
+ char path[PATH_MAX + 1];
+ struct utsname buf;
+ struct btf *btf;
+ int i;
+
+ uname(&buf);
+
+ for (i = 0; i < ARRAY_SIZE(locations); i++) {
+ snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
+
+ if (access(path, R_OK))
+ continue;
+
+ if (locations[i].raw_btf)
+ btf = btf_load_raw(path);
+ else
+ btf = btf__parse_elf(path, NULL);
+
+ pr_debug("loading kernel BTF '%s': %ld\n",
+ path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
+ if (IS_ERR(btf))
+ continue;
+
+ return btf;
+ }
+
+ pr_warning("failed to find valid kernel BTF\n");
+ return ERR_PTR(-ESRCH);
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+ const struct btf_type *t;
+ const char *s;
+ __u32 type_id;
+ int i;
+
+ type_id = spec->spec[0].type_id;
+ t = btf__type_by_id(spec->btf, type_id);
+ s = btf__name_by_offset(spec->btf, t->name_off);
+ libbpf_print(level, "[%u] %s + ", type_id, s);
+
+ for (i = 0; i < spec->raw_len; i++)
+ libbpf_print(level, "%d%s", spec->raw_spec[i],
+ i == spec->raw_len - 1 ? " => " : ":");
+
+ libbpf_print(level, "%u @ &x", spec->offset);
+
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ libbpf_print(level, ".%s", spec->spec[i].name);
+ else
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
+ }
+
+}
+
+static size_t bpf_core_hash_fn(const void *key, void *ctx)
+{
+ return (size_t)key;
+}
+
+static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+ return k1 == k2;
+}
+
+static void *u32_as_hash_key(__u32 x)
+{
+ return (void *)(uintptr_t)x;
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ * Candidate type is a type with the same "essential" name, ignoring
+ * everything after last triple underscore (___). E.g., `sample`,
+ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ * for each other. Names with triple underscore are referred to as
+ * "flavors" and are useful, among other things, to allow to
+ * specify/support incompatible variations of the same kernel struct, which
+ * might differ between different kernel versions and/or build
+ * configurations.
+ *
+ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ * converter, when deduplicated BTF of a kernel still contains more than
+ * one different types with the same name. In that case, ___2, ___3, etc
+ * are appended starting from second name conflict. But start flavors are
+ * also useful to be defined "locally", in BPF program, to extract same
+ * data from incompatible changes between different kernel
+ * versions/configurations. For instance, to handle field renames between
+ * kernel versions, one can use two flavors of the struct name with the
+ * same common name and use conditional relocations to extract that field,
+ * depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ * candidate target type. Matching involves finding corresponding
+ * high-level spec accessors, meaning that all named fields should match,
+ * as well as all array accesses should be within the actual bounds. Also,
+ * types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ * matching the spec. As long as all the specs resolve to the same set of
+ * offsets across all candidates, there is not error. If there is any
+ * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ * imprefection of BTF deduplication, which can cause slight duplication of
+ * the same BTF type, if some directly or indirectly referenced (by
+ * pointer) type gets resolved to different actual types in different
+ * object files. If such situation occurs, deduplicated BTF will end up
+ * with two (or more) structurally identical types, which differ only in
+ * types they refer to through pointer. This should be OK in most cases and
+ * is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ * types in target BTF. It is anticipated that this is overall more
+ * efficient memory-wise and not significantly worse (if not better)
+ * CPU-wise compared to prebuilding a map from all local type names to
+ * a list of candidate type names. It's also sped up by caching resolved
+ * list of matching candidates per each local "root" type ID, that has at
+ * least one bpf_offset_reloc associated with it. This list is shared
+ * between multiple relocations for the same type ID and is updated as some
+ * of the candidates are pruned due to structural incompatibility.
+ */
+static int bpf_core_reloc_offset(struct bpf_program *prog,
+ const struct bpf_offset_reloc *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ const struct btf *targ_btf,
+ struct hashmap *cand_cache)
+{
+ const char *prog_name = bpf_program__title(prog, false);
+ struct bpf_core_spec local_spec, cand_spec, targ_spec;
+ const void *type_key = u32_as_hash_key(relo->type_id);
+ const struct btf_type *local_type, *cand_type;
+ const char *local_name, *cand_name;
+ struct ids_vec *cand_ids;
+ __u32 local_id, cand_id;
+ const char *spec_str;
+ int i, j, err;
+
+ local_id = relo->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ if (!local_type)
+ return -EINVAL;
+
+ local_name = btf__name_by_offset(local_btf, local_type->name_off);
+ if (str_is_empty(local_name))
+ return -EINVAL;
+
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+ if (str_is_empty(spec_str))
+ return -EINVAL;
+
+ err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
+ if (err) {
+ pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
+ prog_name, relo_idx, local_id, local_name, spec_str,
+ err);
+ return -EINVAL;
+ }
+
+ pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
+ cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
+ if (IS_ERR(cand_ids)) {
+ pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
+ prog_name, relo_idx, local_id, local_name,
+ PTR_ERR(cand_ids));
+ return PTR_ERR(cand_ids);
+ }
+ err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
+ if (err) {
+ bpf_core_free_cands(cand_ids);
+ return err;
+ }
+ }
+
+ for (i = 0, j = 0; i < cand_ids->len; i++) {
+ cand_id = cand_ids->data[i];
+ cand_type = btf__type_by_id(targ_btf, cand_id);
+ cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
+
+ err = bpf_core_spec_match(&local_spec, targ_btf,
+ cand_id, &cand_spec);
+ pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
+ prog_name, relo_idx, i, cand_name);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
+ if (err < 0) {
+ pr_warning("prog '%s': relo #%d: matching error: %d\n",
+ prog_name, relo_idx, err);
+ return err;
+ }
+ if (err == 0)
+ continue;
+
+ if (j == 0) {
+ targ_spec = cand_spec;
+ } else if (cand_spec.offset != targ_spec.offset) {
+ /* if there are many candidates, they should all
+ * resolve to the same offset
+ */
+ pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
+ prog_name, relo_idx, cand_spec.offset,
+ targ_spec.offset);
+ return -EINVAL;
+ }
+
+ cand_ids->data[j++] = cand_spec.spec[0].type_id;
+ }
+
+ cand_ids->len = j;
+ if (cand_ids->len == 0) {
+ pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
+ prog_name, relo_idx, local_id, local_name, spec_str);
+ return -ESRCH;
+ }
+
+ err = bpf_core_reloc_insn(prog, relo->insn_off,
+ local_spec.offset, targ_spec.offset);
+ if (err) {
+ pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
+ prog_name, relo_idx, relo->insn_off, err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)
+{
+ const struct btf_ext_info_sec *sec;
+ const struct bpf_offset_reloc *rec;
+ const struct btf_ext_info *seg;
+ struct hashmap_entry *entry;
+ struct hashmap *cand_cache = NULL;
+ struct bpf_program *prog;
+ struct btf *targ_btf;
+ const char *sec_name;
+ int i, err = 0;
+
+ if (targ_btf_path)
+ targ_btf = btf__parse_elf(targ_btf_path, NULL);
+ else
+ targ_btf = bpf_core_find_kernel_btf();
+ if (IS_ERR(targ_btf)) {
+ pr_warning("failed to get target BTF: %ld\n",
+ PTR_ERR(targ_btf));
+ return PTR_ERR(targ_btf);
+ }
+
+ cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
+ if (IS_ERR(cand_cache)) {
+ err = PTR_ERR(cand_cache);
+ goto out;
+ }
+
+ seg = &obj->btf_ext->offset_reloc_info;
+ for_each_btf_ext_sec(seg, sec) {
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+ if (str_is_empty(sec_name)) {
+ err = -EINVAL;
+ goto out;
+ }
+ prog = bpf_object__find_program_by_title(obj, sec_name);
+ if (!prog) {
+ pr_warning("failed to find program '%s' for CO-RE offset relocation\n",
+ sec_name);
+ err = -EINVAL;
+ goto out;
+ }
+
+ pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
+ sec_name, sec->num_info);
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = bpf_core_reloc_offset(prog, rec, i, obj->btf,
+ targ_btf, cand_cache);
+ if (err) {
+ pr_warning("prog '%s': relo #%d: failed to relocate: %d\n",
+ sec_name, i, err);
+ goto out;
+ }
+ }
+ }
+
+out:
+ btf__free(targ_btf);
+ if (!IS_ERR_OR_NULL(cand_cache)) {
+ hashmap__for_each_entry(cand_cache, entry, i) {
+ bpf_core_free_cands(entry->value);
+ }
+ hashmap__free(cand_cache);
+ }
+ return err;
+}
+
+static int
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
+{
+ int err = 0;
+
+ if (obj->btf_ext->offset_reloc_info.len)
+ err = bpf_core_reloc_offsets(obj, targ_btf_path);
+
+ return err;
+}
+
static int
bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
struct reloc_desc *relo)
return 0;
}
-
static int
-bpf_object__relocate(struct bpf_object *obj)
+bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
{
struct bpf_program *prog;
size_t i;
int err;
+ if (obj->btf_ext) {
+ err = bpf_object__relocate_core(obj, targ_btf_path);
+ if (err) {
+ pr_warning("failed to perform CO-RE relocations: %d\n",
+ err);
+ return err;
+ }
+ }
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
char *cp, errmsg[STRERR_BUFSIZE];
int log_buf_size = BPF_LOG_BUF_SIZE;
char *log_buf;
- int ret;
+ int btf_fd, ret;
if (!insns || !insns_cnt)
return -EINVAL;
load_attr.license = license;
load_attr.kern_version = kern_version;
load_attr.prog_ifindex = prog->prog_ifindex;
- load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0;
+ /* if .BTF.ext was loaded, kernel supports associated BTF for prog */
+ if (prog->obj->btf_ext)
+ btf_fd = bpf_object__btf_fd(prog->obj);
+ else
+ btf_fd = -1;
+ load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0;
load_attr.func_info = prog->func_info;
load_attr.func_info_rec_size = prog->func_info_rec_size;
load_attr.func_info_cnt = prog->func_info_cnt;
obj->loaded = true;
CHECK_ERR(bpf_object__create_maps(obj), err, out);
- CHECK_ERR(bpf_object__relocate(obj), err, out);
+ CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out);
CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
return 0;
static const char *fcpu = "/sys/devices/system/cpu/possible";
int len = 0, n = 0, il = 0, ir = 0;
unsigned int start = 0, end = 0;
+ int tmp_cpus = 0;
static int cpus;
char buf[128];
int error = 0;
int fd = -1;
- if (cpus > 0)
- return cpus;
+ tmp_cpus = READ_ONCE(cpus);
+ if (tmp_cpus > 0)
+ return tmp_cpus;
fd = open(fcpu, O_RDONLY);
if (fd < 0) {
}
buf[len] = '\0';
- for (ir = 0, cpus = 0; ir <= len; ir++) {
+ for (ir = 0, tmp_cpus = 0; ir <= len; ir++) {
/* Each sub string separated by ',' has format \d+-\d+ or \d+ */
if (buf[ir] == ',' || buf[ir] == '\0') {
buf[ir] = '\0';
} else if (n == 1) {
end = start;
}
- cpus += end - start + 1;
+ tmp_cpus += end - start + 1;
il = ir + 1;
}
}
- if (cpus <= 0) {
- pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu);
+ if (tmp_cpus <= 0) {
+ pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu);
return -EINVAL;
}
- return cpus;
+
+ WRITE_ONCE(cpus, tmp_cpus);
+ return tmp_cpus;
}