From: David S. Miller Date: Mon, 19 Aug 2019 18:54:03 +0000 (-0700) Subject: Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net X-Git-Tag: v5.4-rc1~131^2~210 X-Git-Url: https://asedeno.scripts.mit.edu/gitweb/?a=commitdiff_plain;h=446bf64b613c4433dac4b15f4eaf326beaad3c8e;hp=-c;p=linux.git Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net Merge conflict of mlx5 resolved using instructions in merge commit 9566e650bf7fdf58384bb06df634f7531ca3a97e. Signed-off-by: David S. Miller --- 446bf64b613c4433dac4b15f4eaf326beaad3c8e diff --combined MAINTAINERS index 96d3e60697f5,08176d64eed5..a406947b369e --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -183,7 -183,7 +183,7 @@@ M: Realtek linux nic maintainers L: netdev@vger.kernel.org S: Maintained - F: drivers/net/ethernet/realtek/r8169.c + F: drivers/net/ethernet/realtek/r8169* 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER M: Greg Kroah-Hartman @@@ -938,14 -938,6 +938,14 @@@ S: Supporte F: drivers/mux/adgs1408.c F: Documentation/devicetree/bindings/mux/adi,adgs1408.txt +ANALOG DEVICES INC ADIN DRIVER +M: Alexandru Ardelean +L: netdev@vger.kernel.org +W: http://ez.analog.com/community/linux-device-drivers +S: Supported +F: drivers/net/phy/adin.c +F: Documentation/devicetree/bindings/net/adi,adin.yaml + ANALOG DEVICES INC ADIS DRIVER LIBRARY M: Alexandru Ardelean S: Supported @@@ -3643,12 -3635,9 +3643,12 @@@ S: Maintaine F: Documentation/devicetree/bindings/net/can/ F: drivers/net/can/ F: include/linux/can/dev.h +F: include/linux/can/led.h +F: include/linux/can/rx-offload.h F: include/linux/can/platform/ F: include/uapi/linux/can/error.h F: include/uapi/linux/can/netlink.h +F: include/uapi/linux/can/vxcan.h CAN NETWORK LAYER M: Oliver Hartkopp @@@ -3661,8 -3650,6 +3661,8 @@@ S: Maintaine F: Documentation/networking/can.rst F: net/can/ F: include/linux/can/core.h +F: include/linux/can/skb.h +F: include/net/netns/can.h F: include/uapi/linux/can.h F: include/uapi/linux/can/bcm.h F: include/uapi/linux/can/raw.h @@@ -6078,7 -6065,7 +6078,7 @@@ M: Florian Fainelli L: netdev@vger.kernel.org S: Maintained - F: Documentation/ABI/testing/sysfs-bus-mdio + F: Documentation/ABI/testing/sysfs-class-net-phydev F: Documentation/devicetree/bindings/net/ethernet-phy.yaml F: Documentation/devicetree/bindings/net/mdio* F: Documentation/networking/phy.rst @@@ -6357,7 -6344,7 +6357,7 @@@ FPGA MANAGER FRAMEWOR M: Moritz Fischer L: linux-fpga@vger.kernel.org S: Maintained - T: git git://git.kernel.org/pub/scm/linux/kernel/git/atull/linux-fpga.git + T: git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git Q: http://patchwork.kernel.org/project/linux-fpga/list/ F: Documentation/fpga/ F: Documentation/driver-api/fpga/ @@@ -6390,7 -6377,7 +6390,7 @@@ FRAMEBUFFER LAYE M: Bartlomiej Zolnierkiewicz L: dri-devel@lists.freedesktop.org L: linux-fbdev@vger.kernel.org - T: git git://github.com/bzolnier/linux.git + T: git git://anongit.freedesktop.org/drm/drm-misc Q: http://patchwork.kernel.org/project/linux-fbdev/list/ S: Maintained F: Documentation/fb/ @@@ -6454,6 -6441,14 +6454,14 @@@ S: Maintaine F: drivers/perf/fsl_imx8_ddr_perf.c F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt + FREESCALE IMX I2C DRIVER + M: Oleksij Rempel + R: Pengutronix Kernel Team + L: linux-i2c@vger.kernel.org + S: Maintained + F: drivers/i2c/busses/i2c-imx.c + F: Documentation/devicetree/bindings/i2c/i2c-imx.txt + FREESCALE IMX LPI2C DRIVER M: Dong Aisheng L: linux-i2c@vger.kernel.org @@@ -7465,7 -7460,7 +7473,7 @@@ F: drivers/net/hyperv F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c F: drivers/video/fbdev/hyperv_fb.c - F: drivers/iommu/hyperv_iommu.c + F: drivers/iommu/hyperv-iommu.c F: net/vmw_vsock/hyperv_transport.c F: include/clocksource/hyperv_timer.h F: include/linux/hyperv.h @@@ -8055,6 -8050,7 +8063,7 @@@ S: Maintaine F: drivers/video/fbdev/i810/ INTEL ASoC DRIVERS + M: Cezary Rojewski M: Pierre-Louis Bossart M: Liam Girdwood M: Jie Yang @@@ -8076,6 -8072,13 +8085,13 @@@ T: git git://git.code.sf.net/p/intel-sa S: Supported F: drivers/scsi/isci/ + INTEL CPU family model numbers + M: Tony Luck + M: x86@kernel.org + L: linux-kernel@vger.kernel.org + S: Supported + F: arch/x86/include/asm/intel-family.h + INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets) M: Jani Nikula M: Joonas Lahtinen @@@ -8427,7 -8430,6 +8443,6 @@@ L: linux-xfs@vger.kernel.or L: linux-fsdevel@vger.kernel.org T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git S: Supported - F: fs/iomap.c F: fs/iomap/ F: include/linux/iomap.h @@@ -11156,7 -11158,6 +11171,7 @@@ S: Maintaine W: https://fedorahosted.org/dropwatch/ F: net/core/drop_monitor.c F: include/uapi/linux/net_dropmon.h +F: include/net/drop_monitor.h NETWORKING DRIVERS M: "David S. Miller" @@@ -11336,6 -11337,7 +11351,6 @@@ F: include/net/nfc F: include/uapi/linux/nfc.h F: drivers/nfc/ F: include/linux/platform_data/nfcmrvl.h -F: include/linux/platform_data/nxp-nci.h F: Documentation/devicetree/bindings/net/nfc/ NFS, SUNRPC, AND LOCKD CLIENTS @@@ -13230,7 -13232,7 +13245,7 @@@ M: Manish Chopra @@@ -16097,7 -16099,7 +16112,7 @@@ S: Maintaine F: drivers/net/ethernet/ti/netcp* TI PCM3060 ASoC CODEC DRIVER - M: Kirill Marinushkin + M: Kirill Marinushkin L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/sound/pcm3060.txt diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c index 94be97b7952c,8dce4069472b..4c790ffa1a73 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@@ -116,9 -116,6 +116,9 @@@ enum board_idx BCM57508, BCM57504, BCM57502, + BCM57508_NPAR, + BCM57504_NPAR, + BCM57502_NPAR, BCM58802, BCM58804, BCM58808, @@@ -164,9 -161,6 +164,9 @@@ static const struct [BCM57508] = { "Broadcom BCM57508 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" }, [BCM57504] = { "Broadcom BCM57504 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" }, [BCM57502] = { "Broadcom BCM57502 NetXtreme-E 10Gb/25Gb/50Gb Ethernet" }, + [BCM57508_NPAR] = { "Broadcom BCM57508 NetXtreme-E Ethernet Partition" }, + [BCM57504_NPAR] = { "Broadcom BCM57504 NetXtreme-E Ethernet Partition" }, + [BCM57502_NPAR] = { "Broadcom BCM57502 NetXtreme-E Ethernet Partition" }, [BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" }, [BCM58804] = { "Broadcom BCM58804 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, @@@ -215,12 -209,6 +215,12 @@@ static const struct pci_device_id bnxt_ { PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 }, { PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 }, { PCI_VDEVICE(BROADCOM, 0x1752), .driver_data = BCM57502 }, + { PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57508_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1801), .driver_data = BCM57504_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57502_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57508_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1804), .driver_data = BCM57504_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57502_NPAR }, { PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 }, { PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 }, #ifdef CONFIG_BNXT_SRIOV @@@ -840,41 -828,16 +840,41 @@@ static inline int bnxt_alloc_rx_page(st return 0; } -static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 cp_cons, - u32 agg_bufs) +static struct rx_agg_cmp *bnxt_get_agg(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr, + u16 cp_cons, u16 curr) +{ + struct rx_agg_cmp *agg; + + cp_cons = RING_CMP(ADV_RAW_CMP(cp_cons, curr)); + agg = (struct rx_agg_cmp *) + &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + return agg; +} + +static struct rx_agg_cmp *bnxt_get_tpa_agg_p5(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr, + u16 agg_id, u16 curr) +{ + struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[agg_id]; + + return &tpa_info->agg_arr[curr]; +} + +static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx, + u16 start, u32 agg_bufs, bool tpa) { struct bnxt_napi *bnapi = cpr->bnapi; struct bnxt *bp = bnapi->bp; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; u16 prod = rxr->rx_agg_prod; u16 sw_prod = rxr->rx_sw_agg_prod; + bool p5_tpa = false; u32 i; + if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa) + p5_tpa = true; + for (i = 0; i < agg_bufs; i++) { u16 cons; struct rx_agg_cmp *agg; @@@ -882,10 -845,8 +882,10 @@@ struct rx_bd *prod_bd; struct page *page; - agg = (struct rx_agg_cmp *) - &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + if (p5_tpa) + agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, start + i); + else + agg = bnxt_get_agg(bp, cpr, idx, start + i); cons = agg->rx_agg_cmp_opaque; __clear_bit(cons, rxr->rx_agg_bmap); @@@ -913,6 -874,7 +913,6 @@@ prod = NEXT_RX_AGG(prod); sw_prod = NEXT_RX_AGG(sw_prod); - cp_cons = NEXT_CMP(cp_cons); } rxr->rx_agg_prod = prod; rxr->rx_sw_agg_prod = sw_prod; @@@ -926,7 -888,7 +926,7 @@@ static struct sk_buff *bnxt_rx_page_skb { unsigned int payload = offset_and_len >> 16; unsigned int len = offset_and_len & 0xffff; - struct skb_frag_struct *frag; + skb_frag_t *frag; struct page *page = data; u16 prod = rxr->rx_prod; struct sk_buff *skb; @@@ -957,7 -919,7 +957,7 @@@ frag = &skb_shinfo(skb)->frags[0]; skb_frag_size_sub(frag, payload); - frag->page_offset += payload; + skb_frag_off_add(frag, payload); skb->data_len -= payload; skb->tail += payload; @@@ -995,19 -957,15 +995,19 @@@ static struct sk_buff *bnxt_rx_skb(stru static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, - struct sk_buff *skb, u16 cp_cons, - u32 agg_bufs) + struct sk_buff *skb, u16 idx, + u32 agg_bufs, bool tpa) { struct bnxt_napi *bnapi = cpr->bnapi; struct pci_dev *pdev = bp->pdev; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; u16 prod = rxr->rx_agg_prod; + bool p5_tpa = false; u32 i; + if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa) + p5_tpa = true; + for (i = 0; i < agg_bufs; i++) { u16 cons, frag_len; struct rx_agg_cmp *agg; @@@ -1015,10 -973,8 +1015,10 @@@ struct page *page; dma_addr_t mapping; - agg = (struct rx_agg_cmp *) - &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + if (p5_tpa) + agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, i); + else + agg = bnxt_get_agg(bp, cpr, idx, i); cons = agg->rx_agg_cmp_opaque; frag_len = (le32_to_cpu(agg->rx_agg_cmp_len_flags_type) & RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT; @@@ -1052,7 -1008,7 +1052,7 @@@ * allocated already. */ rxr->rx_agg_prod = prod; - bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs - i); + bnxt_reuse_rx_agg_bufs(cpr, idx, i, agg_bufs - i, tpa); return NULL; } @@@ -1065,6 -1021,7 +1065,6 @@@ skb->truesize += PAGE_SIZE; prod = NEXT_RX_AGG(prod); - cp_cons = NEXT_CMP(cp_cons); } rxr->rx_agg_prod = prod; return skb; @@@ -1124,10 -1081,9 +1124,10 @@@ static int bnxt_discard_rx(struct bnxt } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) { struct rx_tpa_end_cmp *tpa_end = cmp; - agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) & - RX_TPA_END_CMP_AGG_BUFS) >> - RX_TPA_END_CMP_AGG_BUFS_SHIFT; + if (bp->flags & BNXT_FLAG_CHIP_P5) + return 0; + + agg_bufs = TPA_END_AGG_BUFS(tpa_end); } if (agg_bufs) { @@@ -1164,60 -1120,26 +1164,60 @@@ static void bnxt_sched_reset(struct bnx rxr->rx_next_cons = 0xffff; } +static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id) +{ + struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map; + u16 idx = agg_id & MAX_TPA_P5_MASK; + + if (test_bit(idx, map->agg_idx_bmap)) + idx = find_first_zero_bit(map->agg_idx_bmap, + BNXT_AGG_IDX_BMAP_SIZE); + __set_bit(idx, map->agg_idx_bmap); + map->agg_id_tbl[agg_id] = idx; + return idx; +} + +static void bnxt_free_agg_idx(struct bnxt_rx_ring_info *rxr, u16 idx) +{ + struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map; + + __clear_bit(idx, map->agg_idx_bmap); +} + +static u16 bnxt_lookup_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id) +{ + struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map; + + return map->agg_id_tbl[agg_id]; +} + static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, struct rx_tpa_start_cmp *tpa_start, struct rx_tpa_start_cmp_ext *tpa_start1) { - u8 agg_id = TPA_START_AGG_ID(tpa_start); - u16 cons, prod; - struct bnxt_tpa_info *tpa_info; struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf; + struct bnxt_tpa_info *tpa_info; + u16 cons, prod, agg_id; struct rx_bd *prod_bd; dma_addr_t mapping; + if (bp->flags & BNXT_FLAG_CHIP_P5) { + agg_id = TPA_START_AGG_ID_P5(tpa_start); + agg_id = bnxt_alloc_agg_idx(rxr, agg_id); + } else { + agg_id = TPA_START_AGG_ID(tpa_start); + } cons = tpa_start->rx_tpa_start_cmp_opaque; prod = rxr->rx_prod; cons_rx_buf = &rxr->rx_buf_ring[cons]; prod_rx_buf = &rxr->rx_buf_ring[prod]; tpa_info = &rxr->rx_tpa[agg_id]; - if (unlikely(cons != rxr->rx_next_cons)) { - netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n", - cons, rxr->rx_next_cons); + if (unlikely(cons != rxr->rx_next_cons || + TPA_START_ERROR(tpa_start))) { + netdev_warn(bp->dev, "TPA cons %x, expected cons %x, error code %x\n", + cons, rxr->rx_next_cons, + TPA_START_ERROR_CODE(tpa_start1)); bnxt_sched_reset(bp, rxr); return; } @@@ -1262,7 -1184,6 +1262,7 @@@ tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2); tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata); tpa_info->hdr_info = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_hdr_info); + tpa_info->agg_count = 0; rxr->rx_prod = NEXT_RX(prod); cons = NEXT_RX(cons); @@@ -1274,37 -1195,13 +1274,37 @@@ cons_rx_buf->data = NULL; } -static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 cp_cons, - u32 agg_bufs) +static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 idx, u32 agg_bufs) { if (agg_bufs) - bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs); + bnxt_reuse_rx_agg_bufs(cpr, idx, 0, agg_bufs, true); } +#ifdef CONFIG_INET +static void bnxt_gro_tunnel(struct sk_buff *skb, __be16 ip_proto) +{ + struct udphdr *uh = NULL; + + if (ip_proto == htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + if (iph->protocol == IPPROTO_UDP) + uh = (struct udphdr *)(iph + 1); + } else { + struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; + + if (iph->nexthdr == IPPROTO_UDP) + uh = (struct udphdr *)(iph + 1); + } + if (uh) { + if (uh->check) + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + else + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } +} +#endif + static struct sk_buff *bnxt_gro_func_5731x(struct bnxt_tpa_info *tpa_info, int payload_off, int tcp_ts, struct sk_buff *skb) @@@ -1362,39 -1259,28 +1362,39 @@@ } if (inner_mac_off) { /* tunnel */ - struct udphdr *uh = NULL; __be16 proto = *((__be16 *)(skb->data + outer_ip_off - ETH_HLEN - 2)); - if (proto == htons(ETH_P_IP)) { - struct iphdr *iph = (struct iphdr *)skb->data; + bnxt_gro_tunnel(skb, proto); + } +#endif + return skb; +} - if (iph->protocol == IPPROTO_UDP) - uh = (struct udphdr *)(iph + 1); - } else { - struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; +static struct sk_buff *bnxt_gro_func_5750x(struct bnxt_tpa_info *tpa_info, + int payload_off, int tcp_ts, + struct sk_buff *skb) +{ +#ifdef CONFIG_INET + u16 outer_ip_off, inner_ip_off, inner_mac_off; + u32 hdr_info = tpa_info->hdr_info; + int iphdr_len, nw_off; - if (iph->nexthdr == IPPROTO_UDP) - uh = (struct udphdr *)(iph + 1); - } - if (uh) { - if (uh->check) - skb_shinfo(skb)->gso_type |= - SKB_GSO_UDP_TUNNEL_CSUM; - else - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } + inner_ip_off = BNXT_TPA_INNER_L3_OFF(hdr_info); + inner_mac_off = BNXT_TPA_INNER_L2_OFF(hdr_info); + outer_ip_off = BNXT_TPA_OUTER_L3_OFF(hdr_info); + + nw_off = inner_ip_off - ETH_HLEN; + skb_set_network_header(skb, nw_off); + iphdr_len = (tpa_info->flags2 & RX_TPA_START_CMP_FLAGS2_IP_TYPE) ? + sizeof(struct ipv6hdr) : sizeof(struct iphdr); + skb_set_transport_header(skb, nw_off + iphdr_len); + + if (inner_mac_off) { /* tunnel */ + __be16 proto = *((__be16 *)(skb->data + outer_ip_off - + ETH_HLEN - 2)); + + bnxt_gro_tunnel(skb, proto); } #endif return skb; @@@ -1441,8 -1327,28 +1441,8 @@@ static struct sk_buff *bnxt_gro_func_57 return NULL; } - if (nw_off) { /* tunnel */ - struct udphdr *uh = NULL; - - if (skb->protocol == htons(ETH_P_IP)) { - struct iphdr *iph = (struct iphdr *)skb->data; - - if (iph->protocol == IPPROTO_UDP) - uh = (struct udphdr *)(iph + 1); - } else { - struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; - - if (iph->nexthdr == IPPROTO_UDP) - uh = (struct udphdr *)(iph + 1); - } - if (uh) { - if (uh->check) - skb_shinfo(skb)->gso_type |= - SKB_GSO_UDP_TUNNEL_CSUM; - else - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } - } + if (nw_off) /* tunnel */ + bnxt_gro_tunnel(skb, skb->protocol); #endif return skb; } @@@ -1465,10 -1371,9 +1465,10 @@@ static inline struct sk_buff *bnxt_gro_ skb_shinfo(skb)->gso_size = le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len); skb_shinfo(skb)->gso_type = tpa_info->gso_type; - payload_off = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) & - RX_TPA_END_CMP_PAYLOAD_OFFSET) >> - RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT; + if (bp->flags & BNXT_FLAG_CHIP_P5) + payload_off = TPA_END_PAYLOAD_OFF_P5(tpa_end1); + else + payload_off = TPA_END_PAYLOAD_OFF(tpa_end); skb = bp->gro_func(tpa_info, payload_off, TPA_END_GRO_TS(tpa_end), skb); if (likely(skb)) tcp_gro_complete(skb); @@@ -1496,14 -1401,14 +1496,14 @@@ static inline struct sk_buff *bnxt_tpa_ { struct bnxt_napi *bnapi = cpr->bnapi; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; - u8 agg_id = TPA_END_AGG_ID(tpa_end); u8 *data_ptr, agg_bufs; - u16 cp_cons = RING_CMP(*raw_cons); unsigned int len; struct bnxt_tpa_info *tpa_info; dma_addr_t mapping; struct sk_buff *skb; + u16 idx = 0, agg_id; void *data; + bool gro; if (unlikely(bnapi->in_reset)) { int rc = bnxt_discard_rx(bp, cpr, raw_cons, tpa_end); @@@ -1513,43 -1418,26 +1513,43 @@@ return NULL; } - tpa_info = &rxr->rx_tpa[agg_id]; + if (bp->flags & BNXT_FLAG_CHIP_P5) { + agg_id = TPA_END_AGG_ID_P5(tpa_end); + agg_id = bnxt_lookup_agg_idx(rxr, agg_id); + agg_bufs = TPA_END_AGG_BUFS_P5(tpa_end1); + tpa_info = &rxr->rx_tpa[agg_id]; + if (unlikely(agg_bufs != tpa_info->agg_count)) { + netdev_warn(bp->dev, "TPA end agg_buf %d != expected agg_bufs %d\n", + agg_bufs, tpa_info->agg_count); + agg_bufs = tpa_info->agg_count; + } + tpa_info->agg_count = 0; + *event |= BNXT_AGG_EVENT; + bnxt_free_agg_idx(rxr, agg_id); + idx = agg_id; + gro = !!(bp->flags & BNXT_FLAG_GRO); + } else { + agg_id = TPA_END_AGG_ID(tpa_end); + agg_bufs = TPA_END_AGG_BUFS(tpa_end); + tpa_info = &rxr->rx_tpa[agg_id]; + idx = RING_CMP(*raw_cons); + if (agg_bufs) { + if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons)) + return ERR_PTR(-EBUSY); + + *event |= BNXT_AGG_EVENT; + idx = NEXT_CMP(idx); + } + gro = !!TPA_END_GRO(tpa_end); + } data = tpa_info->data; data_ptr = tpa_info->data_ptr; prefetch(data_ptr); len = tpa_info->len; mapping = tpa_info->mapping; - agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) & - RX_TPA_END_CMP_AGG_BUFS) >> RX_TPA_END_CMP_AGG_BUFS_SHIFT; - - if (agg_bufs) { - if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons)) - return ERR_PTR(-EBUSY); - - *event |= BNXT_AGG_EVENT; - cp_cons = NEXT_CMP(cp_cons); - } - if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) { - bnxt_abort_tpa(cpr, cp_cons, agg_bufs); + bnxt_abort_tpa(cpr, idx, agg_bufs); if (agg_bufs > MAX_SKB_FRAGS) netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n", agg_bufs, (int)MAX_SKB_FRAGS); @@@ -1559,7 -1447,7 +1559,7 @@@ if (len <= bp->rx_copy_thresh) { skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { - bnxt_abort_tpa(cpr, cp_cons, agg_bufs); + bnxt_abort_tpa(cpr, idx, agg_bufs); return NULL; } } else { @@@ -1568,7 -1456,7 +1568,7 @@@ new_data = __bnxt_alloc_rx_data(bp, &new_mapping, GFP_ATOMIC); if (!new_data) { - bnxt_abort_tpa(cpr, cp_cons, agg_bufs); + bnxt_abort_tpa(cpr, idx, agg_bufs); return NULL; } @@@ -1583,7 -1471,7 +1583,7 @@@ if (!skb) { kfree(data); - bnxt_abort_tpa(cpr, cp_cons, agg_bufs); + bnxt_abort_tpa(cpr, idx, agg_bufs); return NULL; } skb_reserve(skb, bp->rx_offset); @@@ -1591,7 -1479,7 +1591,7 @@@ } if (agg_bufs) { - skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs); + skb = bnxt_rx_pages(bp, cpr, skb, idx, agg_bufs, true); if (!skb) { /* Page reuse already handled by bnxt_rx_pages(). */ return NULL; @@@ -1620,24 -1508,12 +1620,24 @@@ (tpa_info->flags2 & RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3; } - if (TPA_END_GRO(tpa_end)) + if (gro) skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb); return skb; } +static void bnxt_tpa_agg(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + struct rx_agg_cmp *rx_agg) +{ + u16 agg_id = TPA_AGG_AGG_ID(rx_agg); + struct bnxt_tpa_info *tpa_info; + + agg_id = bnxt_lookup_agg_idx(rxr, agg_id); + tpa_info = &rxr->rx_tpa[agg_id]; + BUG_ON(tpa_info->agg_count >= MAX_SKB_FRAGS); + tpa_info->agg_arr[tpa_info->agg_count++] = *rx_agg; +} + static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi, struct sk_buff *skb) { @@@ -1679,13 -1555,6 +1679,13 @@@ static int bnxt_rx_pkt(struct bnxt *bp rxcmp = (struct rx_cmp *) &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + cmp_type = RX_CMP_TYPE(rxcmp); + + if (cmp_type == CMP_TYPE_RX_TPA_AGG_CMP) { + bnxt_tpa_agg(bp, rxr, (struct rx_agg_cmp *)rxcmp); + goto next_rx_no_prod_no_len; + } + tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons); cp_cons = RING_CMP(tmp_raw_cons); rxcmp1 = (struct rx_cmp_ext *) @@@ -1694,6 -1563,8 +1694,6 @@@ if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) return -EBUSY; - cmp_type = RX_CMP_TYPE(rxcmp); - prod = rxr->rx_prod; if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) { @@@ -1752,8 -1623,7 +1752,8 @@@ bnxt_reuse_rx_data(rxr, cons, data); if (agg_bufs) - bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs); + bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, agg_bufs, + false); rc = -EIO; if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) { @@@ -1776,8 -1646,7 +1776,8 @@@ bnxt_reuse_rx_data(rxr, cons, data); if (!skb) { if (agg_bufs) - bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs); + bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, + agg_bufs, false); rc = -ENOMEM; goto next_rx; } @@@ -1797,7 -1666,7 +1797,7 @@@ } if (agg_bufs) { - skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs); + skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs, false); if (!skb) { rc = -ENOMEM; goto next_rx; @@@ -2152,9 -2021,9 +2152,9 @@@ static void __bnxt_poll_work_done(struc if (bnapi->events & BNXT_RX_EVENT) { struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; - bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); if (bnapi->events & BNXT_AGG_EVENT) bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); } bnapi->events = 0; } @@@ -2456,11 -2325,10 +2456,11 @@@ static void bnxt_free_rx_skbs(struct bn max_agg_idx = bp->rx_agg_nr_pages * RX_DESC_CNT; for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; + struct bnxt_tpa_idx_map *map; int j; if (rxr->rx_tpa) { - for (j = 0; j < MAX_TPA; j++) { + for (j = 0; j < bp->max_tpa; j++) { struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[j]; u8 *data = tpa_info->data; @@@ -2527,9 -2395,6 +2527,9 @@@ __free_page(rxr->rx_page); rxr->rx_page = NULL; } + map = rxr->rx_tpa_idx_map; + if (map) + memset(map->agg_idx_bmap, 0, sizeof(map->agg_idx_bmap)); } } @@@ -2618,61 -2483,6 +2618,61 @@@ static int bnxt_alloc_ring(struct bnxt return 0; } +static void bnxt_free_tpa_info(struct bnxt *bp) +{ + int i; + + for (i = 0; i < bp->rx_nr_rings; i++) { + struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; + + kfree(rxr->rx_tpa_idx_map); + rxr->rx_tpa_idx_map = NULL; + if (rxr->rx_tpa) { + kfree(rxr->rx_tpa[0].agg_arr); + rxr->rx_tpa[0].agg_arr = NULL; + } + kfree(rxr->rx_tpa); + rxr->rx_tpa = NULL; + } +} + +static int bnxt_alloc_tpa_info(struct bnxt *bp) +{ + int i, j, total_aggs = 0; + + bp->max_tpa = MAX_TPA; + if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (!bp->max_tpa_v2) + return 0; + bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5); + total_aggs = bp->max_tpa * MAX_SKB_FRAGS; + } + + for (i = 0; i < bp->rx_nr_rings; i++) { + struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; + struct rx_agg_cmp *agg; + + rxr->rx_tpa = kcalloc(bp->max_tpa, sizeof(struct bnxt_tpa_info), + GFP_KERNEL); + if (!rxr->rx_tpa) + return -ENOMEM; + + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + continue; + agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL); + rxr->rx_tpa[0].agg_arr = agg; + if (!agg) + return -ENOMEM; + for (j = 1; j < bp->max_tpa; j++) + rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS; + rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map), + GFP_KERNEL); + if (!rxr->rx_tpa_idx_map) + return -ENOMEM; + } + return 0; +} + static void bnxt_free_rx_rings(struct bnxt *bp) { int i; @@@ -2680,7 -2490,6 +2680,7 @@@ if (!bp->rx_ring) return; + bnxt_free_tpa_info(bp); for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring; @@@ -2694,6 -2503,9 +2694,6 @@@ page_pool_destroy(rxr->page_pool); rxr->page_pool = NULL; - kfree(rxr->rx_tpa); - rxr->rx_tpa = NULL; - kfree(rxr->rx_agg_bmap); rxr->rx_agg_bmap = NULL; @@@ -2727,7 -2539,7 +2727,7 @@@ static int bnxt_alloc_rx_page_pool(stru static int bnxt_alloc_rx_rings(struct bnxt *bp) { - int i, rc, agg_rings = 0, tpa_rings = 0; + int i, rc = 0, agg_rings = 0; if (!bp->rx_ring) return -ENOMEM; @@@ -2735,6 -2547,9 +2735,6 @@@ if (bp->flags & BNXT_FLAG_AGG_RINGS) agg_rings = 1; - if (bp->flags & BNXT_FLAG_TPA) - tpa_rings = 1; - for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring; @@@ -2776,11 -2591,17 +2776,11 @@@ rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL); if (!rxr->rx_agg_bmap) return -ENOMEM; - - if (tpa_rings) { - rxr->rx_tpa = kcalloc(MAX_TPA, - sizeof(struct bnxt_tpa_info), - GFP_KERNEL); - if (!rxr->rx_tpa) - return -ENOMEM; - } } } - return 0; + if (bp->flags & BNXT_FLAG_TPA) + rc = bnxt_alloc_tpa_info(bp); + return rc; } static void bnxt_free_tx_rings(struct bnxt *bp) @@@ -3132,7 -2953,7 +3132,7 @@@ static int bnxt_init_one_rx_ring(struc u8 *data; dma_addr_t mapping; - for (i = 0; i < MAX_TPA; i++) { + for (i = 0; i < bp->max_tpa; i++) { data = __bnxt_alloc_rx_data(bp, &mapping, GFP_KERNEL); if (!data) @@@ -3647,7 -3468,7 +3647,7 @@@ static void bnxt_free_ring_stats(struc if (!bp->bnapi) return; - size = sizeof(struct ctx_hw_stats); + size = bp->hw_ring_stats_size; for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; @@@ -3666,7 -3487,7 +3666,7 @@@ static int bnxt_alloc_stats(struct bnx u32 size, i; struct pci_dev *pdev = bp->pdev; - size = sizeof(struct ctx_hw_stats); + size = bp->hw_ring_stats_size; for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; @@@ -4593,7 -4414,6 +4593,7 @@@ static int bnxt_hwrm_clear_vnic_filter( static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags) { struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; + u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX; struct hwrm_vnic_tpa_cfg_input req = {0}; if (vnic->fw_vnic_id == INVALID_HW_RING_ID) @@@ -4633,14 -4453,9 +4633,14 @@@ nsegs = (MAX_SKB_FRAGS - n) / n; } - segs = ilog2(nsegs); + if (bp->flags & BNXT_FLAG_CHIP_P5) { + segs = MAX_TPA_SEGS_P5; + max_aggs = bp->max_tpa; + } else { + segs = ilog2(nsegs); + } req.max_agg_segs = cpu_to_le16(segs); - req.max_aggs = cpu_to_le16(VNIC_TPA_CFG_REQ_MAX_AGGS_MAX); + req.max_aggs = cpu_to_le16(max_aggs); req.min_agg_len = cpu_to_le32(512); } @@@ -5000,12 -4815,6 +5000,12 @@@ static int bnxt_hwrm_vnic_qcaps(struct if (flags & VNIC_QCAPS_RESP_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP) bp->flags |= BNXT_FLAG_ROCE_MIRROR_CAP; + bp->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported); + if (bp->max_tpa_v2) + bp->hw_ring_stats_size = + sizeof(struct ctx_hw_stats_ext); + else + bp->hw_ring_stats_size = sizeof(struct ctx_hw_stats); } mutex_unlock(&bp->hwrm_cmd_lock); return rc; @@@ -5255,6 -5064,7 +5255,7 @@@ static void bnxt_set_db(struct bnxt *bp static int bnxt_hwrm_ring_alloc(struct bnxt *bp) { + bool agg_rings = !!(bp->flags & BNXT_FLAG_AGG_RINGS); int i, rc = 0; u32 type; @@@ -5330,7 -5140,9 +5331,9 @@@ if (rc) goto err_out; bnxt_set_db(bp, &rxr->rx_db, type, map_idx, ring->fw_ring_id); - bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); + /* If we have agg rings, post agg buffers first. */ + if (!agg_rings) + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id; if (bp->flags & BNXT_FLAG_CHIP_P5) { struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; @@@ -5349,7 -5161,7 +5352,7 @@@ } } - if (bp->flags & BNXT_FLAG_AGG_RINGS) { + if (agg_rings) { type = HWRM_RING_ALLOC_AGG; for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; @@@ -5365,6 -5177,7 +5368,7 @@@ bnxt_set_db(bp, &rxr->rx_agg_db, type, map_idx, ring->fw_ring_id); bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); bp->grp_info[grp_idx].agg_fw_ring_id = ring->fw_ring_id; } } @@@ -6203,7 -6016,6 +6207,7 @@@ static int bnxt_hwrm_stat_ctx_alloc(str bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1); + req.stats_dma_length = cpu_to_le16(bp->hw_ring_stats_size); req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000); mutex_lock(&bp->hwrm_cmd_lock); @@@ -7208,19 -7020,29 +7212,29 @@@ static void bnxt_hwrm_clear_vnic_rss(st bnxt_hwrm_vnic_set_rss(bp, i, false); } - static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path, - bool irq_re_init) + static void bnxt_clear_vnic(struct bnxt *bp) { - if (bp->vnic_info) { - bnxt_hwrm_clear_vnic_filter(bp); + if (!bp->vnic_info) + return; + + bnxt_hwrm_clear_vnic_filter(bp); + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) { /* clear all RSS setting before free vnic ctx */ bnxt_hwrm_clear_vnic_rss(bp); bnxt_hwrm_vnic_ctx_free(bp); - /* before free the vnic, undo the vnic tpa settings */ - if (bp->flags & BNXT_FLAG_TPA) - bnxt_set_tpa(bp, false); - bnxt_hwrm_vnic_free(bp); } + /* before free the vnic, undo the vnic tpa settings */ + if (bp->flags & BNXT_FLAG_TPA) + bnxt_set_tpa(bp, false); + bnxt_hwrm_vnic_free(bp); + if (bp->flags & BNXT_FLAG_CHIP_P5) + bnxt_hwrm_vnic_ctx_free(bp); + } + + static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path, + bool irq_re_init) + { + bnxt_clear_vnic(bp); bnxt_hwrm_ring_free(bp, close_path); bnxt_hwrm_ring_grp_free(bp); if (irq_re_init) { @@@ -9484,8 -9306,7 +9498,8 @@@ static int bnxt_set_features(struct net if (changes & BNXT_FLAG_TPA) { update_tpa = true; if ((bp->flags & BNXT_FLAG_TPA) == 0 || - (flags & BNXT_FLAG_TPA) == 0) + (flags & BNXT_FLAG_TPA) == 0 || + (bp->flags & BNXT_FLAG_CHIP_P5)) re_init = true; } @@@ -9495,8 -9316,9 +9509,8 @@@ if (flags != bp->flags) { u32 old_flags = bp->flags; - bp->flags = flags; - if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { + bp->flags = flags; if (update_tpa) bnxt_set_ring_params(bp); return rc; @@@ -9504,14 -9326,12 +9518,14 @@@ if (re_init) { bnxt_close_nic(bp, false, false); + bp->flags = flags; if (update_tpa) bnxt_set_ring_params(bp); return bnxt_open_nic(bp, false, false); } if (update_tpa) { + bp->flags = flags; rc = bnxt_set_tpa(bp, (flags & BNXT_FLAG_TPA) ? true : false); @@@ -9908,68 -9728,6 +9922,68 @@@ static void bnxt_init_dflt_coal(struct bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; } +static int bnxt_fw_init_one_p1(struct bnxt *bp) +{ + int rc; + + bp->fw_cap = 0; + rc = bnxt_hwrm_ver_get(bp); + if (rc) + return rc; + + if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) { + rc = bnxt_alloc_kong_hwrm_resources(bp); + if (rc) + bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL; + } + + if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) || + bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) { + rc = bnxt_alloc_hwrm_short_cmd_req(bp); + if (rc) + return rc; + } + rc = bnxt_hwrm_func_reset(bp); + if (rc) + return -ENODEV; + + bnxt_hwrm_fw_set_time(bp); + return 0; +} + +static int bnxt_fw_init_one_p2(struct bnxt *bp) +{ + int rc; + + /* Get the MAX capabilities for this function */ + rc = bnxt_hwrm_func_qcaps(bp); + if (rc) { + netdev_err(bp->dev, "hwrm query capability failure rc: %x\n", + rc); + return -ENODEV; + } + + rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp); + if (rc) + netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n", + rc); + + rc = bnxt_hwrm_func_drv_rgtr(bp); + if (rc) + return -ENODEV; + + rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0); + if (rc) + return -ENODEV; + + bnxt_hwrm_func_qcfg(bp); + bnxt_hwrm_vnic_qcaps(bp); + bnxt_hwrm_port_led_qcaps(bp); + bnxt_ethtool_init(bp); + bnxt_dcb_init(bp); + return 0; +} + static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) { int rc; @@@ -10925,18 -10683,32 +10939,18 @@@ static int bnxt_init_one(struct pci_de goto init_err_pci_clean; mutex_init(&bp->hwrm_cmd_lock); - rc = bnxt_hwrm_ver_get(bp); + + rc = bnxt_fw_init_one_p1(bp); if (rc) goto init_err_pci_clean; - if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) { - rc = bnxt_alloc_kong_hwrm_resources(bp); - if (rc) - bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL; - } - - if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) || - bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) { - rc = bnxt_alloc_hwrm_short_cmd_req(bp); - if (rc) - goto init_err_pci_clean; - } - if (BNXT_CHIP_P5(bp)) bp->flags |= BNXT_FLAG_CHIP_P5; - rc = bnxt_hwrm_func_reset(bp); + rc = bnxt_fw_init_one_p2(bp); if (rc) goto init_err_pci_clean; - bnxt_hwrm_fw_set_time(bp); - dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | @@@ -10974,14 -10746,41 +10988,14 @@@ bp->gro_func = bnxt_gro_func_5730x; if (BNXT_CHIP_P4(bp)) bp->gro_func = bnxt_gro_func_5731x; + else if (BNXT_CHIP_P5(bp)) + bp->gro_func = bnxt_gro_func_5750x; } if (!BNXT_CHIP_P4_PLUS(bp)) bp->flags |= BNXT_FLAG_DOUBLE_DB; - rc = bnxt_hwrm_func_drv_rgtr(bp); - if (rc) - goto init_err_pci_clean; - - rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0); - if (rc) - goto init_err_pci_clean; - bp->ulp_probe = bnxt_ulp_probe; - rc = bnxt_hwrm_queue_qportcfg(bp); - if (rc) { - netdev_err(bp->dev, "hwrm query qportcfg failure rc: %x\n", - rc); - rc = -1; - goto init_err_pci_clean; - } - /* Get the MAX capabilities for this function */ - rc = bnxt_hwrm_func_qcaps(bp); - if (rc) { - netdev_err(bp->dev, "hwrm query capability failure rc: %x\n", - rc); - rc = -1; - goto init_err_pci_clean; - } - - rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp); - if (rc) - netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n", - rc); - rc = bnxt_init_mac_addr(bp); if (rc) { dev_err(&pdev->dev, "Unable to initialize mac address.\n"); @@@ -10995,6 -10794,11 +11009,6 @@@ if (rc) goto init_err_pci_clean; } - bnxt_hwrm_func_qcfg(bp); - bnxt_hwrm_vnic_qcaps(bp); - bnxt_hwrm_port_led_qcaps(bp); - bnxt_ethtool_init(bp); - bnxt_dcb_init(bp); /* MTU range: 60 - FW defined max */ dev->min_mtu = ETH_ZLEN; @@@ -11130,7 -10934,8 +11144,7 @@@ shutdown_exit #ifdef CONFIG_PM_SLEEP static int bnxt_suspend(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); - struct net_device *dev = pci_get_drvdata(pdev); + struct net_device *dev = dev_get_drvdata(device); struct bnxt *bp = netdev_priv(dev); int rc = 0; @@@ -11146,7 -10951,8 +11160,7 @@@ static int bnxt_resume(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); - struct net_device *dev = pci_get_drvdata(pdev); + struct net_device *dev = dev_get_drvdata(device); struct bnxt *bp = netdev_priv(dev); int rc = 0; diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 3a3d8a9be5ed,8445a0cce849..b624174c8594 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@@ -137,44 -137,7 +137,44 @@@ reset_coalesce return rc; } -#define BNXT_NUM_STATS 22 +static const char * const bnxt_ring_stats_str[] = { + "rx_ucast_packets", + "rx_mcast_packets", + "rx_bcast_packets", + "rx_discards", + "rx_drops", + "rx_ucast_bytes", + "rx_mcast_bytes", + "rx_bcast_bytes", + "tx_ucast_packets", + "tx_mcast_packets", + "tx_bcast_packets", + "tx_discards", + "tx_drops", + "tx_ucast_bytes", + "tx_mcast_bytes", + "tx_bcast_bytes", +}; + +static const char * const bnxt_ring_tpa_stats_str[] = { + "tpa_packets", + "tpa_bytes", + "tpa_events", + "tpa_aborts", +}; + +static const char * const bnxt_ring_tpa2_stats_str[] = { + "rx_tpa_eligible_pkt", + "rx_tpa_eligible_bytes", + "rx_tpa_pkt", + "rx_tpa_bytes", + "rx_tpa_errors", +}; + +static const char * const bnxt_ring_sw_stats_str[] = { + "rx_l4_csum_errors", + "missed_irqs", +}; #define BNXT_RX_STATS_ENTRY(counter) \ { BNXT_RX_STATS_OFFSET(counter), __stringify(counter) } @@@ -244,20 -207,6 +244,20 @@@ BNXT_TX_STATS_EXT_COS_ENTRY(6), \ BNXT_TX_STATS_EXT_COS_ENTRY(7) \ +#define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(n) \ + BNXT_RX_STATS_EXT_ENTRY(rx_discard_bytes_cos##n), \ + BNXT_RX_STATS_EXT_ENTRY(rx_discard_packets_cos##n) + +#define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(0), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(1), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(2), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(3), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(4), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(5), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(6), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(7) + #define BNXT_RX_STATS_PRI_ENTRY(counter, n) \ { BNXT_RX_STATS_EXT_OFFSET(counter##_cos0), \ __stringify(counter##_pri##n) } @@@ -403,7 -352,6 +403,7 @@@ static const struct BNXT_RX_STATS_EXT_ENTRY(rx_buffer_passed_threshold), BNXT_RX_STATS_EXT_ENTRY(rx_pcs_symbol_err), BNXT_RX_STATS_EXT_ENTRY(rx_corrected_bits), + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES, }; static const struct { @@@ -469,29 -417,9 +469,29 @@@ static const struct ARRAY_SIZE(bnxt_tx_pkts_pri_arr)) #define BNXT_NUM_PCIE_STATS ARRAY_SIZE(bnxt_pcie_stats_arr) +static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp) +{ + if (BNXT_SUPPORTS_TPA(bp)) { + if (bp->max_tpa_v2) + return ARRAY_SIZE(bnxt_ring_tpa2_stats_str); + return ARRAY_SIZE(bnxt_ring_tpa_stats_str); + } + return 0; +} + +static int bnxt_get_num_ring_stats(struct bnxt *bp) +{ + int num_stats; + + num_stats = ARRAY_SIZE(bnxt_ring_stats_str) + + ARRAY_SIZE(bnxt_ring_sw_stats_str) + + bnxt_get_num_tpa_ring_stats(bp); + return num_stats * bp->cp_nr_rings; +} + static int bnxt_get_num_stats(struct bnxt *bp) { - int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings; + int num_stats = bnxt_get_num_ring_stats(bp); num_stats += BNXT_NUM_SW_FUNC_STATS; @@@ -532,11 -460,10 +532,11 @@@ static void bnxt_get_ethtool_stats(stru { u32 i, j = 0; struct bnxt *bp = netdev_priv(dev); - u32 stat_fields = sizeof(struct ctx_hw_stats) / 8; + u32 stat_fields = ARRAY_SIZE(bnxt_ring_stats_str) + + bnxt_get_num_tpa_ring_stats(bp); if (!bp->bnapi) { - j += BNXT_NUM_STATS * bp->cp_nr_rings + BNXT_NUM_SW_FUNC_STATS; + j += bnxt_get_num_ring_stats(bp) + BNXT_NUM_SW_FUNC_STATS; goto skip_ring_stats; } @@@ -624,39 -551,56 +624,39 @@@ skip_ring_stats static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf) { struct bnxt *bp = netdev_priv(dev); - u32 i; + static const char * const *str; + u32 i, j, num_str; switch (stringset) { - /* The number of strings must match BNXT_NUM_STATS defined above. */ case ETH_SS_STATS: for (i = 0; i < bp->cp_nr_rings; i++) { - sprintf(buf, "[%d]: rx_ucast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_mcast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_bcast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_discards", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_drops", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_ucast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_mcast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_bcast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_ucast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_mcast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_bcast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_discards", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_drops", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_ucast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_mcast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_bcast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tpa_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tpa_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tpa_events", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tpa_aborts", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_l4_csum_errors", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: missed_irqs", i); - buf += ETH_GSTRING_LEN; + num_str = ARRAY_SIZE(bnxt_ring_stats_str); + for (j = 0; j < num_str; j++) { + sprintf(buf, "[%d]: %s", i, + bnxt_ring_stats_str[j]); + buf += ETH_GSTRING_LEN; + } + if (!BNXT_SUPPORTS_TPA(bp)) + goto skip_tpa_stats; + + if (bp->max_tpa_v2) { + num_str = ARRAY_SIZE(bnxt_ring_tpa2_stats_str); + str = bnxt_ring_tpa2_stats_str; + } else { + num_str = ARRAY_SIZE(bnxt_ring_tpa_stats_str); + str = bnxt_ring_tpa_stats_str; + } + for (j = 0; j < num_str; j++) { + sprintf(buf, "[%d]: %s", i, str[j]); + buf += ETH_GSTRING_LEN; + } +skip_tpa_stats: + num_str = ARRAY_SIZE(bnxt_ring_sw_stats_str); + for (j = 0; j < num_str; j++) { + sprintf(buf, "[%d]: %s", i, + bnxt_ring_sw_stats_str[j]); + buf += ETH_GSTRING_LEN; + } } for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) { strcpy(buf, bnxt_sw_func_stats[i].string); @@@ -2072,21 -2016,19 +2072,19 @@@ static int bnxt_flash_package_from_file mutex_lock(&bp->hwrm_cmd_lock); hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) - goto flash_pkg_exit; - - if (resp->error_code) { + if (hwrm_err) { u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err; - if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { + if (resp->error_code && error_code == + NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { install.flags |= cpu_to_le16( NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) - goto flash_pkg_exit; } + if (hwrm_err) + goto flash_pkg_exit; } if (resp->result) { diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index dd99c55d9a88,d692251ee252..ae6a47dd7dc9 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@@ -3236,8 -3236,10 +3236,10 @@@ static ssize_t blocked_fl_write(struct return -ENOMEM; err = bitmap_parse_user(ubuf, count, t, adap->sge.egr_sz); - if (err) + if (err) { + kvfree(t); return err; + } bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz); kvfree(t); @@@ -3529,6 -3531,7 +3531,6 @@@ int t4_setup_debugfs(struct adapter *ad { int i; u32 size = 0; - struct dentry *de; static struct t4_debugfs_entry t4_debugfs_files[] = { { "cim_la", &cim_la_fops, 0400, 0 }, @@@ -3639,8 -3642,8 +3641,8 @@@ } } - de = debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap, - &flash_debugfs_fops, adap->params.sf_size); + debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap, + &flash_debugfs_fops, adap->params.sf_size); debugfs_create_bool("use_backdoor", 0600, adap->debugfs_root, &adap->use_bd); debugfs_create_bool("trace_rss", 0600, diff --combined drivers/net/ethernet/ibm/ibmvnic.c index 81a05ea38237,cebd20f3128d..07efa2b40003 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@@ -1485,7 -1485,7 +1485,7 @@@ static netdev_tx_t ibmvnic_xmit(struct memcpy(dst + cur, page_address(skb_frag_page(frag)) + - frag->page_offset, skb_frag_size(frag)); + skb_frag_off(frag), skb_frag_size(frag)); cur += skb_frag_size(frag); } } else { @@@ -1568,6 -1568,8 +1568,8 @@@ lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num], (u64)tx_buff->indir_dma, (u64)num_entries); + dma_unmap_single(dev, tx_buff->indir_dma, + sizeof(tx_buff->indir_arr), DMA_TO_DEVICE); } else { tx_buff->num_entries = num_entries; lpar_rc = send_subcrq(adapter, handle_array[queue_num], @@@ -2788,7 -2790,6 +2790,6 @@@ static int ibmvnic_complete_tx(struct i union sub_crq *next; int index; int i, j; - u8 *first; restart_loop: while (pending_scrq(adapter, scrq)) { @@@ -2818,14 -2819,6 +2819,6 @@@ txbuff->data_dma[j] = 0; } - /* if sub_crq was sent indirectly */ - first = &txbuff->indir_arr[0].generic.first; - if (*first == IBMVNIC_CRQ_CMD) { - dma_unmap_single(dev, txbuff->indir_dma, - sizeof(txbuff->indir_arr), - DMA_TO_DEVICE); - *first = 0; - } if (txbuff->last_frag) { dev_kfree_skb_any(txbuff->skb); diff --combined drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index dc7b128c780e,7882148abb43..17b7ae9f46ec --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@@ -1785,7 -1785,7 +1785,7 @@@ static bool ixgbe_is_non_eop(struct ixg static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; unsigned char *va; unsigned int pull_len; @@@ -1807,7 -1807,7 +1807,7 @@@ /* update all of the pointers */ skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; + skb_frag_off_add(frag, pull_len); skb->data_len -= pull_len; skb->tail += pull_len; } @@@ -1840,11 -1840,11 +1840,11 @@@ static void ixgbe_dma_sync_frag(struct skb_headlen(skb), DMA_FROM_DEVICE); } else { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; dma_sync_single_range_for_cpu(rx_ring->dev, IXGBE_CB(skb)->dma, - frag->page_offset, + skb_frag_off(frag), skb_frag_size(frag), DMA_FROM_DEVICE); } @@@ -7897,11 -7897,8 +7897,8 @@@ static void ixgbe_service_task(struct w return; } if (ixgbe_check_fw_error(adapter)) { - if (!test_bit(__IXGBE_DOWN, &adapter->state)) { - rtnl_lock(); + if (!test_bit(__IXGBE_DOWN, &adapter->state)) unregister_netdev(adapter->netdev); - rtnl_unlock(); - } ixgbe_service_event_complete(adapter); return; } @@@ -8186,7 -8183,7 +8183,7 @@@ static int ixgbe_tx_map(struct ixgbe_ri struct sk_buff *skb = first->skb; struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; - struct skb_frag_struct *frag; + skb_frag_t *frag; dma_addr_t dma; unsigned int data_len, size; u32 tx_flags = first->tx_flags; @@@ -8605,8 -8602,7 +8602,8 @@@ netdev_tx_t ixgbe_xmit_frame_ring(struc * otherwise try next time */ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); + count += TXD_USE_COUNT(skb_frag_size( + &skb_shinfo(skb)->frags[f])); if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) { tx_ring->tx_stats.tx_busy++; diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h index 0807992090b8,65bec19a438f..8cf548c7ad9c --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@@ -184,8 -184,13 +184,13 @@@ static inline int mlx5e_get_max_num_cha struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + union { + struct { + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[0]; + }; + u8 tls_progress_params_ctx[0]; + }; }; struct mlx5e_rx_wqe_ll { @@@ -351,7 -356,6 +356,7 @@@ enum MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, MLX5E_SQ_STATE_TLS, + MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, }; struct mlx5e_sq_wqe_info { @@@ -476,6 -480,8 +481,6 @@@ struct mlx5e_xdp_mpwqe struct mlx5e_tx_wqe *wqe; u8 ds_count; u8 pkt_count; - u8 max_ds_count; - u8 complete; u8 inline_on; }; @@@ -1099,6 -1105,8 +1104,8 @@@ u32 mlx5e_ethtool_get_rxfh_key_size(str u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); + int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash); void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, @@@ -1127,6 -1135,7 +1134,6 @@@ void mlx5e_build_rq_params(struct mlx5_ struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels); -u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 6e54fefea410,c7f86453c638..817c6ea7e349 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@@ -1,6 -1,7 +1,6 @@@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2019 Mellanox Technologies. */ -#include #include "reporter.h" #include "lib/eq.h" @@@ -75,26 -76,21 +75,21 @@@ static int mlx5e_tx_reporter_err_cqe_re u8 state; int err; - if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) - return 0; - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); if (err) { netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", sq->sqn, err); - return err; + goto out; } - if (state != MLX5_SQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return -EINVAL; - } + if (state != MLX5_SQC_STATE_ERR) + goto out; mlx5e_tx_disable_queue(sq->txq); err = mlx5e_wait_for_sq_flush(sq); if (err) - return err; + goto out; /* At this point, no new packets will arrive from the stack as TXQ is * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all @@@ -103,20 -99,24 +98,24 @@@ err = mlx5e_sq_to_ready(sq, state); if (err) - return err; + goto out; mlx5e_reset_txqsq_cc_pc(sq); sq->stats->recover++; + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); mlx5e_activate_txqsq(sq); return 0; + out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + return err; } static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter, char *err_str, struct mlx5e_tx_err_ctx *err_ctx) { - if (IS_ERR_OR_NULL(tx_reporter)) { + if (!tx_reporter) { netdev_err(err_ctx->sq->channel->netdev, err_str); return err_ctx->recover(err_ctx->sq); } @@@ -288,27 -288,23 +287,27 @@@ static const struct devlink_health_repo int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) { + struct devlink_health_reporter *reporter; struct mlx5_core_dev *mdev = priv->mdev; struct devlink *devlink = priv_to_devlink(mdev); - priv->tx_reporter = + reporter = devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, true, priv); - if (IS_ERR(priv->tx_reporter)) + if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", - PTR_ERR(priv->tx_reporter)); - return IS_ERR_OR_NULL(priv->tx_reporter); + PTR_ERR(reporter)); + return PTR_ERR(reporter); + } + priv->tx_reporter = reporter; + return 0; } void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) { - if (IS_ERR_OR_NULL(priv->tx_reporter)) + if (!priv->tx_reporter) return; devlink_health_reporter_destroy(priv->tx_reporter); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index f701e4f3c076,7f78c004d12f..2c4d1f415968 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@@ -60,28 -60,24 +60,28 @@@ int mlx5e_open_xsk(struct mlx5e_priv *p struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, struct mlx5e_channel *c) { - struct mlx5e_channel_param cparam = {}; + struct mlx5e_channel_param *cparam; struct dim_cq_moder icocq_moder = {}; int err; if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) return -EINVAL; - mlx5e_build_xsk_cparam(priv, params, xsk, &cparam); + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!cparam) + return -ENOMEM; - err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq); + mlx5e_build_xsk_cparam(priv, params, xsk, cparam); + + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq); if (unlikely(err)) - return err; + goto err_free_cparam; - err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq); + err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq); if (unlikely(err)) goto err_close_rx_cq; - err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq); + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq); if (unlikely(err)) goto err_close_rq; @@@ -91,23 -87,21 +91,23 @@@ * is disabled and then reenabled, but the SQ continues receiving CQEs * from the old UMEM. */ - err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true); + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true); if (unlikely(err)) goto err_close_tx_cq; - err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq); + err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq); if (unlikely(err)) goto err_close_sq; /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be * triggered and NAPI to be called on the correct CPU. */ - err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq); if (unlikely(err)) goto err_close_icocq; + kvfree(cparam); + spin_lock_init(&c->xskicosq_lock); set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); @@@ -129,9 -123,6 +129,9 @@@ err_close_rq err_close_rx_cq: mlx5e_close_cq(&c->xskrq.cq); +err_free_cparam: + kvfree(cparam); + return err; } @@@ -152,7 -143,10 +152,10 @@@ void mlx5e_activate_xsk(struct mlx5e_ch { set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* TX queue is created active. */ + + spin_lock(&c->xskicosq_lock); mlx5e_trigger_irq(&c->xskicosq); + spin_unlock(&c->xskicosq_lock); } void mlx5e_deactivate_xsk(struct mlx5e_channel *c) diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 02530b50609c,20e628c907e5..7347d673f448 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@@ -1081,6 -1081,14 +1081,14 @@@ int mlx5e_ethtool_set_link_ksettings(st link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : mlx5e_port_speed2linkmodes(mdev, speed, !ext); + if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && + autoneg != AUTONEG_ENABLE) { + netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n", + __func__); + err = -EINVAL; + goto out; + } + link_modes = link_modes & eproto.cap; if (!link_modes) { netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", @@@ -1338,6 -1346,9 +1346,9 @@@ int mlx5e_ethtool_set_pauseparam(struc struct mlx5_core_dev *mdev = priv->mdev; int err; + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EOPNOTSUPP; + if (pauseparam->autoneg) return -EINVAL; @@@ -1679,6 -1690,40 +1690,40 @@@ static int mlx5e_get_module_eeprom(stru return 0; } + int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash) + { + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *dev = priv->netdev; + const struct firmware *fw; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, flash->data, &dev->dev); + if (err) + return err; + + dev_hold(dev); + rtnl_unlock(); + + err = mlx5_firmware_flash(mdev, fw, NULL); + release_firmware(fw); + + rtnl_lock(); + dev_put(dev); + return err; + } + + static int mlx5e_flash_device(struct net_device *dev, + struct ethtool_flash *flash) + { + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_flash_device(priv, flash); + } + static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { @@@ -1913,27 -1958,21 +1958,27 @@@ static u32 mlx5e_get_priv_flags(struct return priv->channels.params.pflags; } -#ifndef CONFIG_MLX5_EN_RXNFC -/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS - * otherwise this function will be defined from en_fs_ethtool.c - */ static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev); - if (info->cmd != ETHTOOL_GRXRINGS) - return -EOPNOTSUPP; - /* ring_count is needed by ethtool -x */ - info->data = priv->channels.params.num_channels; - return 0; + /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part + * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc, + * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc + * is compiled out via CONFIG_MLX5_EN_RXNFC=n. + */ + if (info->cmd == ETHTOOL_GRXRINGS) { + info->data = priv->channels.params.num_channels; + return 0; + } + + return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs); +} + +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + return mlx5e_ethtool_set_rxnfc(dev, cmd); } -#endif const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, @@@ -1954,7 -1993,9 +1999,7 @@@ .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, -#ifdef CONFIG_MLX5_EN_RXNFC .set_rxnfc = mlx5e_set_rxnfc, -#endif .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, @@@ -1965,6 -2006,7 +2010,7 @@@ .set_wol = mlx5e_set_wol, .get_module_info = mlx5e_get_module_info, .get_module_eeprom = mlx5e_get_module_eeprom, + .flash_device = mlx5e_flash_device, .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9a2fcef6e7f0,9d5f6e56188f..0c8e847a9eee --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@@ -1130,8 -1130,6 +1130,8 @@@ static int mlx5e_alloc_txqsq(struct mlx sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) { @@@ -1323,7 -1321,6 +1323,6 @@@ err_free_txqsq void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); - clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); netdev_tx_reset_queue(sq->txq); netif_tx_start_queue(sq->txq); @@@ -2324,7 -2321,7 +2323,7 @@@ int mlx5e_open_channels(struct mlx5e_pr goto err_close_channels; } - if (!IS_ERR_OR_NULL(priv->tx_reporter)) + if (priv->tx_reporter) devlink_health_reporter_state_update(priv->tx_reporter, DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); @@@ -3425,7 -3422,7 +3424,7 @@@ out #ifdef CONFIG_MLX5_ESWITCH static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, struct flow_cls_offload *cls_flower, - int flags) + unsigned long flags) { switch (cls_flower->command) { case FLOW_CLS_REPLACE: @@@ -3445,12 -3442,12 +3444,12 @@@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); struct mlx5e_priv *priv = cb_priv; switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | - MLX5E_TC_NIC_OFFLOAD); + return mlx5e_setup_tc_cls_flower(priv, type_data, flags); default: return -EOPNOTSUPP; } @@@ -3643,7 -3640,7 +3642,7 @@@ static int set_feature_tc_num_filters(s { struct mlx5e_priv *priv = netdev_priv(netdev); - if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) { + if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; @@@ -3784,10 -3781,9 +3783,10 @@@ static netdev_features_t mlx5e_fix_feat netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); } if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { - features &= ~NETIF_F_LRO; - if (params->lro_en) + if (features & NETIF_F_LRO) { netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); + features &= ~NETIF_F_LRO; + } } if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { @@@ -3954,8 -3950,7 +3953,8 @@@ int mlx5e_hwstamp_set(struct mlx5e_pri case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: /* Disable CQE compression */ - netdev_warn(priv->netdev, "Disabling cqe compression"); + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); err = mlx5e_modify_rx_cqe_compression_locked(priv, false); if (err) { netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); @@@ -4773,7 -4768,7 +4772,7 @@@ void mlx5e_build_nic_params(struct mlx5 mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); /* TX inline */ - params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); /* RSS */ mlx5e_build_rss_params(rss_params, params->num_channels); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5be3da621499,00b2d4a86159..c57f7533a6d0 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@@ -38,8 -38,6 +38,8 @@@ #include #include #include +#include +#include #include #include #include @@@ -67,20 -65,19 +67,20 @@@ struct mlx5_nic_flow_attr struct mlx5_fc *counter; }; -#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1) +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) enum { - MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS, - MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS, - MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD, - MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD, - MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE), - MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1), - MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2), - MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3), - MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4), - MLX5E_TC_FLOW_NOT_READY = BIT(MLX5E_TC_FLOW_BASE + 5), + MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, + MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, + MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, + MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, + MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, + MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, + MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, }; #define MLX5E_TC_MAX_SPLITS 1 @@@ -103,7 -100,6 +103,7 @@@ * container_of(helper item, containing struct type, helper field[index]) */ struct encap_flow_item { + struct mlx5e_encap_entry *e; /* attached encap instance */ struct list_head list; int index; }; @@@ -112,7 -108,7 +112,7 @@@ struct mlx5e_tc_flow struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; - u16 flags; + unsigned long flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; /* Flow can be associated with multiple encap IDs. * The number of encaps is bounded by the number of supported @@@ -120,14 -116,10 +120,14 @@@ */ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5e_tc_flow *peer_flow; + struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ + struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ struct list_head hairpin; /* flows sharing the same hairpin */ struct list_head peer; /* flows with peer flow */ struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ + refcount_t refcnt; + struct rcu_head rcu_head; union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@@ -165,20 -157,12 +165,20 @@@ struct mlx5e_hairpin_entry /* a node of a hash table which keeps all the hairpin entries */ struct hlist_node hairpin_hlist; + /* protects flows list */ + spinlock_t flows_lock; /* flows sharing the same hairpin */ struct list_head flows; + /* hpe's that were not fully initialized when dead peer update event + * function traversed them. + */ + struct list_head dead_peer_wait_list; u16 peer_vhca_id; u8 prio; struct mlx5e_hairpin *hp; + refcount_t refcnt; + struct completion res_ready; }; struct mod_hdr_key { @@@ -190,93 -174,16 +190,93 @@@ struct mlx5e_mod_hdr_entry /* a node of a hash table which keeps all the mod_hdr entries */ struct hlist_node mod_hdr_hlist; + /* protects flows list */ + spinlock_t flows_lock; /* flows sharing the same mod_hdr entry */ struct list_head flows; struct mod_hdr_key key; u32 mod_hdr_id; + + refcount_t refcnt; + struct completion res_ready; + int compl_result; }; #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) +{ + if (!flow || !refcount_inc_not_zero(&flow->refcnt)) + return ERR_PTR(-EINVAL); + return flow; +} + +static void mlx5e_flow_put(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + if (refcount_dec_and_test(&flow->refcnt)) { + mlx5e_tc_del_flow(priv, flow); + kfree_rcu(flow, rcu_head); + } +} + +static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before setting bit. */ + smp_mb__before_atomic(); + set_bit(flag, &flow->flags); +} + +#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) + +static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, + unsigned long flag) +{ + /* test_and_set_bit() provides all necessary barriers */ + return test_and_set_bit(flag, &flow->flags); +} + +#define flow_flag_test_and_set(flow, flag) \ + __flow_flag_test_and_set(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before clearing bit. */ + smp_mb__before_atomic(); + clear_bit(flag, &flow->flags); +} + +#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + bool ret = test_bit(flag, &flow->flags); + + /* Read fields of flow structure only after checking flags. */ + smp_mb__after_atomic(); + return ret; +} + +#define flow_flag_test(flow, flag) __flow_flag_test(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, ESWITCH); +} + +static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, OFFLOADED); +} + static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key) { return jhash(key->actions, @@@ -292,62 -199,15 +292,62 @@@ static inline int cmp_mod_hdr_info(stru return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ); } +static struct mod_hdr_tbl * +get_mod_hdr_table(struct mlx5e_priv *priv, int namespace) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr : + &priv->fs.tc.mod_hdr; +} + +static struct mlx5e_mod_hdr_entry * +mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key) +{ + struct mlx5e_mod_hdr_entry *mh, *found = NULL; + + hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, key)) { + refcount_inc(&mh->refcnt); + found = mh; + break; + } + } + + return found; +} + +static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv, + struct mlx5e_mod_hdr_entry *mh, + int namespace) +{ + struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace); + + if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock)) + return; + hash_del(&mh->mod_hdr_hlist); + mutex_unlock(&tbl->lock); + + WARN_ON(!list_empty(&mh->flows)); + if (mh->compl_result > 0) + mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); + + kfree(mh); +} + +static int get_flow_name_space(struct mlx5e_tc_flow *flow) +{ + return mlx5e_is_eswitch_flow(flow) ? + MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; +} static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int num_actions, actions_size, namespace, err; struct mlx5e_mod_hdr_entry *mh; + struct mod_hdr_tbl *tbl; struct mod_hdr_key key; - bool found = false; u32 hash_key; num_actions = parse_attr->num_mod_hdr_actions; @@@ -358,82 -218,80 +358,82 @@@ hash_key = hash_mod_hdr_info(&key); - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - namespace = MLX5_FLOW_NAMESPACE_FDB; - hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh, - mod_hdr_hlist, hash_key) { - if (!cmp_mod_hdr_info(&mh->key, &key)) { - found = true; - break; - } - } - } else { - namespace = MLX5_FLOW_NAMESPACE_KERNEL; - hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh, - mod_hdr_hlist, hash_key) { - if (!cmp_mod_hdr_info(&mh->key, &key)) { - found = true; - break; - } - } - } + namespace = get_flow_name_space(flow); + tbl = get_mod_hdr_table(priv, namespace); + + mutex_lock(&tbl->lock); + mh = mlx5e_mod_hdr_get(tbl, &key, hash_key); + if (mh) { + mutex_unlock(&tbl->lock); + wait_for_completion(&mh->res_ready); - if (found) + if (mh->compl_result < 0) { + err = -EREMOTEIO; + goto attach_header_err; + } goto attach_flow; + } mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL); - if (!mh) + if (!mh) { + mutex_unlock(&tbl->lock); return -ENOMEM; + } mh->key.actions = (void *)mh + sizeof(*mh); memcpy(mh->key.actions, key.actions, actions_size); mh->key.num_actions = num_actions; + spin_lock_init(&mh->flows_lock); INIT_LIST_HEAD(&mh->flows); + refcount_set(&mh->refcnt, 1); + init_completion(&mh->res_ready); + + hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key); + mutex_unlock(&tbl->lock); err = mlx5_modify_header_alloc(priv->mdev, namespace, mh->key.num_actions, mh->key.actions, &mh->mod_hdr_id); - if (err) - goto out_err; - - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) - hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); - else - hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); + if (err) { + mh->compl_result = err; + goto alloc_header_err; + } + mh->compl_result = 1; + complete_all(&mh->res_ready); attach_flow: + flow->mh = mh; + spin_lock(&mh->flows_lock); list_add(&flow->mod_hdr, &mh->flows); - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + spin_unlock(&mh->flows_lock); + if (mlx5e_is_eswitch_flow(flow)) flow->esw_attr->mod_hdr_id = mh->mod_hdr_id; else flow->nic_attr->mod_hdr_id = mh->mod_hdr_id; return 0; -out_err: - kfree(mh); +alloc_header_err: + complete_all(&mh->res_ready); +attach_header_err: + mlx5e_mod_hdr_put(priv, mh, namespace); return err; } static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->mod_hdr.next; + /* flow wasn't fully initialized */ + if (!flow->mh) + return; + spin_lock(&flow->mh->flows_lock); list_del(&flow->mod_hdr); + spin_unlock(&flow->mh->flows_lock); - if (list_empty(next)) { - struct mlx5e_mod_hdr_entry *mh; - - mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows); - - mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); - hash_del(&mh->mod_hdr_hlist); - kfree(mh); - } + mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow)); + flow->mh = NULL; } static @@@ -697,35 -555,13 +697,35 @@@ static struct mlx5e_hairpin_entry *mlx5 hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, hairpin_hlist, hash_key) { - if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) + if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { + refcount_inc(&hpe->refcnt); return hpe; + } } return NULL; } +static void mlx5e_hairpin_put(struct mlx5e_priv *priv, + struct mlx5e_hairpin_entry *hpe) +{ + /* no more hairpin flows for us, release the hairpin pair */ + if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock)) + return; + hash_del(&hpe->hairpin_hlist); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + + if (!IS_ERR_OR_NULL(hpe->hp)) { + netdev_dbg(priv->netdev, "del hairpin: peer %s\n", + dev_name(hpe->hp->pair->peer_mdev->device)); + + mlx5e_hairpin_destroy(hpe->hp); + } + + WARN_ON(!list_empty(&hpe->flows)); + kfree(hpe); +} + #define UNKNOWN_MATCH_PRIO 8 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, @@@ -791,37 -627,17 +791,37 @@@ static int mlx5e_hairpin_flow_add(struc extack); if (err) return err; + + mutex_lock(&priv->fs.tc.hairpin_tbl_lock); hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); - if (hpe) + if (hpe) { + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + wait_for_completion(&hpe->res_ready); + + if (IS_ERR(hpe->hp)) { + err = -EREMOTEIO; + goto out_err; + } goto attach_flow; + } hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); - if (!hpe) + if (!hpe) { + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); return -ENOMEM; + } + spin_lock_init(&hpe->flows_lock); INIT_LIST_HEAD(&hpe->flows); + INIT_LIST_HEAD(&hpe->dead_peer_wait_list); hpe->peer_vhca_id = peer_id; hpe->prio = match_prio; + refcount_set(&hpe->refcnt, 1); + init_completion(&hpe->res_ready); + + hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, + hash_hairpin_info(peer_id, match_prio)); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); params.log_data_size = 15; params.log_data_size = min_t(u8, params.log_data_size, @@@ -843,11 -659,9 +843,11 @@@ params.num_channels = link_speed64; hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); + hpe->hp = hp; + complete_all(&hpe->res_ready); if (IS_ERR(hp)) { err = PTR_ERR(hp); - goto create_hairpin_err; + goto out_err; } netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", @@@ -855,39 -669,46 +855,39 @@@ dev_name(hp->pair->peer_mdev->device), hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); - hpe->hp = hp; - hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, - hash_hairpin_info(peer_id, match_prio)); - attach_flow: if (hpe->hp->num_channels > 1) { - flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS; + flow_flag_set(flow, HAIRPIN_RSS); flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; } else { flow->nic_attr->hairpin_tirn = hpe->hp->tirn; } + + flow->hpe = hpe; + spin_lock(&hpe->flows_lock); list_add(&flow->hairpin, &hpe->flows); + spin_unlock(&hpe->flows_lock); return 0; -create_hairpin_err: - kfree(hpe); +out_err: + mlx5e_hairpin_put(priv, hpe); return err; } static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->hairpin.next; + /* flow wasn't fully initialized */ + if (!flow->hpe) + return; + spin_lock(&flow->hpe->flows_lock); list_del(&flow->hairpin); + spin_unlock(&flow->hpe->flows_lock); - /* no more hairpin flows for us, release the hairpin pair */ - if (list_empty(next)) { - struct mlx5e_hairpin_entry *hpe; - - hpe = list_entry(next, struct mlx5e_hairpin_entry, flows); - - netdev_dbg(priv->netdev, "del hairpin: peer %s\n", - dev_name(hpe->hp->pair->peer_mdev->device)); - - mlx5e_hairpin_destroy(hpe->hp); - hash_del(&hpe->hairpin_hlist); - kfree(hpe); - } + mlx5e_hairpin_put(priv, flow->hpe); + flow->hpe = NULL; } static int @@@ -906,17 -727,18 +906,17 @@@ mlx5e_tc_add_nic_flow(struct mlx5e_pri .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; - bool table_created = false; int err, dest_ix = 0; flow_context->flags |= FLOW_CONTEXT_HAS_TAG; flow_context->flow_tag = attr->flow_tag; - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { + if (flow_flag_test(flow, HAIRPIN)) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); - if (err) { - goto err_add_hairpin_flow; - } - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { + if (err) + return err; + + if (flow_flag_test(flow, HAIRPIN_RSS)) { dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[dest_ix].ft = attr->hairpin_ft; } else { @@@ -932,9 -754,10 +932,9 @@@ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_fc_create; - } + if (IS_ERR(counter)) + return PTR_ERR(counter); + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[dest_ix].counter_id = mlx5_fc_id(counter); dest_ix++; @@@ -946,10 -769,9 +946,10 @@@ flow_act.modify_id = attr->mod_hdr_id; kfree(parse_attr->mod_hdr_actions); if (err) - goto err_create_mod_hdr_id; + return err; } + mutex_lock(&priv->fs.tc.t_lock); if (IS_ERR_OR_NULL(priv->fs.tc.t)) { int tc_grp_size, tc_tbl_size; u32 max_flow_counter; @@@ -969,13 -791,15 +969,13 @@@ MLX5E_TC_TABLE_NUM_GROUPS, MLX5E_TC_FT_LEVEL, 0); if (IS_ERR(priv->fs.tc.t)) { + mutex_unlock(&priv->fs.tc.t_lock); NL_SET_ERR_MSG_MOD(extack, "Failed to create tc offload table\n"); netdev_err(priv->netdev, "Failed to create tc offload table\n"); - err = PTR_ERR(priv->fs.tc.t); - goto err_create_ft; + return PTR_ERR(priv->fs.tc.t); } - - table_created = true; } if (attr->match_level != MLX5_MATCH_NONE) @@@ -983,12 -807,29 +983,12 @@@ flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, &flow_act, dest, dest_ix); + mutex_unlock(&priv->fs.tc.t_lock); - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - goto err_add_rule; - } + if (IS_ERR(flow->rule[0])) + return PTR_ERR(flow->rule[0]); return 0; - -err_add_rule: - if (table_created) { - mlx5_destroy_flow_table(priv->fs.tc.t); - priv->fs.tc.t = NULL; - } -err_create_ft: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); -err_create_mod_hdr_id: - mlx5_fc_destroy(dev, counter); -err_fc_create: - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) - mlx5e_hairpin_flow_del(priv, flow); -err_add_hairpin_flow: - return err; } static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, @@@ -998,21 -839,18 +998,21 @@@ struct mlx5_fc *counter = NULL; counter = attr->counter; - mlx5_del_flow_rules(flow->rule[0]); + if (!IS_ERR_OR_NULL(flow->rule[0])) + mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) { + mutex_lock(&priv->fs.tc.t_lock); + if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } + mutex_unlock(&priv->fs.tc.t_lock); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) + if (flow_flag_test(flow, HAIRPIN)) mlx5e_hairpin_flow_del(priv, flow); } @@@ -1047,6 -885,7 +1047,6 @@@ mlx5e_tc_offload_fdb_rules(struct mlx5_ } } - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; return rule; } @@@ -1055,7 -894,7 +1055,7 @@@ mlx5e_tc_unoffload_fdb_rules(struct mlx struct mlx5e_tc_flow *flow, struct mlx5_esw_flow_attr *attr) { - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + flow_flag_clear(flow, OFFLOADED); if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); @@@ -1078,7 -917,7 +1078,7 @@@ mlx5e_tc_offload_to_slow_path(struct ml rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) - flow->flags |= MLX5E_TC_FLOW_SLOW; + flow_flag_set(flow, SLOW); return rule; } @@@ -1093,26 -932,7 +1093,26 @@@ mlx5e_tc_unoffload_from_slow_path(struc slow_attr->split_count = 0; slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); - flow->flags &= ~MLX5E_TC_FLOW_SLOW; + flow_flag_clear(flow, SLOW); +} + +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ +static void unready_flow_add(struct mlx5e_tc_flow *flow, + struct list_head *unready_flows) +{ + flow_flag_set(flow, NOT_READY); + list_add_tail(&flow->unready, unready_flows); +} + +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ +static void unready_flow_del(struct mlx5e_tc_flow *flow) +{ + list_del(&flow->unready); + flow_flag_clear(flow, NOT_READY); } static void add_unready_flow(struct mlx5e_tc_flow *flow) @@@ -1125,24 -945,14 +1125,24 @@@ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &rpriv->uplink_priv; - flow->flags |= MLX5E_TC_FLOW_NOT_READY; - list_add_tail(&flow->unready, &uplink_priv->unready_flows); + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_add(flow, &uplink_priv->unready_flows); + mutex_unlock(&uplink_priv->unready_flows_lock); } static void remove_unready_flow(struct mlx5e_tc_flow *flow) { - list_del(&flow->unready); - flow->flags &= ~MLX5E_TC_FLOW_NOT_READY; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_del(flow); + mutex_unlock(&uplink_priv->unready_flows_lock); } static int @@@ -1170,12 -980,14 +1170,12 @@@ mlx5e_tc_add_fdb_flow(struct mlx5e_pri if (attr->chain > max_chain) { NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); - err = -EOPNOTSUPP; - goto err_max_prio_chain; + return -EOPNOTSUPP; } if (attr->prio > max_prio) { NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); - err = -EOPNOTSUPP; - goto err_max_prio_chain; + return -EOPNOTSUPP; } for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { @@@ -1190,7 -1002,7 +1190,7 @@@ err = mlx5e_attach_encap(priv, flow, out_dev, out_index, extack, &encap_dev, &encap_valid); if (err) - goto err_attach_encap; + return err; out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; @@@ -1200,19 -1012,21 +1200,19 @@@ err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) - goto err_add_vlan; + return err; if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); kfree(parse_attr->mod_hdr_actions); if (err) - goto err_mod_hdr; + return err; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(attr->counter_dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_create_counter; - } + if (IS_ERR(counter)) + return PTR_ERR(counter); attr->counter = counter; } @@@ -1230,12 -1044,27 +1230,12 @@@ flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); } - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - goto err_add_rule; - } + if (IS_ERR(flow->rule[0])) + return PTR_ERR(flow->rule[0]); + else + flow_flag_set(flow, OFFLOADED); return 0; - -err_add_rule: - mlx5_fc_destroy(attr->counter_dev, counter); -err_create_counter: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); -err_mod_hdr: - mlx5_eswitch_del_vlan_action(esw, attr); -err_add_vlan: - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) - if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) - mlx5e_detach_encap(priv, flow, out_index); -err_attach_encap: -err_max_prio_chain: - return err; } static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) @@@ -1259,14 -1088,14 +1259,14 @@@ static void mlx5e_tc_del_fdb_flow(struc struct mlx5_esw_flow_attr slow_attr; int out_index; - if (flow->flags & MLX5E_TC_FLOW_NOT_READY) { + if (flow_flag_test(flow, NOT_READY)) { remove_unready_flow(flow); kvfree(attr->parse_attr); return; } - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - if (flow->flags & MLX5E_TC_FLOW_SLOW) + if (mlx5e_is_offloaded_flow(flow)) { + if (flow_flag_test(flow, SLOW)) mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); else mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); @@@ -1294,9 -1123,9 +1294,9 @@@ void mlx5e_tc_encap_flows_add(struct ml { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr slow_attr, *esw_attr; + struct encap_flow_item *efi, *tmp; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; @@@ -1313,14 -1142,11 +1313,14 @@@ e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(priv); - list_for_each_entry(efi, &e->flows, list) { + list_for_each_entry_safe(efi, tmp, &e->flows, list) { bool all_flow_encaps_valid = true; int i; flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + esw_attr = flow->esw_attr; spec = &esw_attr->parse_attr->spec; @@@ -1340,23 -1166,19 +1340,23 @@@ } /* Do not offload flows with unresolved neighbors */ if (!all_flow_encaps_valid) - continue; + goto loop_cont; /* update from slow path rule to encap rule */ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); - continue; + goto loop_cont; } mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */ flow->rule[0] = rule; + /* was unset when slow path rule removed */ + flow_flag_set(flow, OFFLOADED); + +loop_cont: + mlx5e_flow_put(priv, flow); } } @@@ -1365,17 -1187,14 +1365,17 @@@ void mlx5e_tc_encap_flows_del(struct ml { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr slow_attr; + struct encap_flow_item *efi, *tmp; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - list_for_each_entry(efi, &e->flows, list) { + list_for_each_entry_safe(efi, tmp, &e->flows, list) { flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + spec = &flow->esw_attr->parse_attr->spec; /* update from encap rule to slow path rule */ @@@ -1387,16 -1206,12 +1387,16 @@@ err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", err); - continue; + goto loop_cont; } mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr); - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */ flow->rule[0] = rule; + /* was unset when fast path rule removed */ + flow_flag_set(flow, OFFLOADED); + +loop_cont: + mlx5e_flow_put(priv, flow); } /* we know that the encap is valid */ @@@ -1406,7 -1221,7 +1406,7 @@@ static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (mlx5e_is_eswitch_flow(flow)) return flow->esw_attr->counter; else return flow->nic_attr->counter; @@@ -1433,32 -1248,21 +1433,32 @@@ void mlx5e_tc_update_neigh_used_value(s return; list_for_each_entry(e, &nhe->encap_list, encap_list) { - struct encap_flow_item *efi; - if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) + struct encap_flow_item *efi, *tmp; + + if (!(e->flags & MLX5_ENCAP_ENTRY_VALID) || + !mlx5e_encap_take(e)) continue; - list_for_each_entry(efi, &e->flows, list) { + + list_for_each_entry_safe(efi, tmp, &e->flows, list) { flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + + if (mlx5e_is_offloaded_flow(flow)) { counter = mlx5e_tc_get_counter(flow); lastuse = mlx5_fc_query_lastuse(counter); if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + mlx5e_flow_put(netdev_priv(e->out_dev), flow); neigh_used = true; break; } } + + mlx5e_flow_put(netdev_priv(e->out_dev), flow); } + + mlx5e_encap_put(netdev_priv(e->out_dev), e); if (neigh_used) break; } @@@ -1478,66 -1282,40 +1478,66 @@@ } } -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) +static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { - struct list_head *next = flow->encaps[out_index].list.next; + WARN_ON(!list_empty(&e->flows)); + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); - list_del(&flow->encaps[out_index].list); - if (list_empty(next)) { - struct mlx5e_encap_entry *e; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); - e = list_entry(next, struct mlx5e_encap_entry, flows); - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); + kfree(e->encap_header); + kfree(e); +} - if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - hash_del_rcu(&e->encap_hlist); - kfree(e->encap_header); - kfree(e); + if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) + return; + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, int out_index) +{ + struct mlx5e_encap_entry *e = flow->encaps[out_index].e; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + /* flow wasn't fully initialized */ + if (!e) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->encaps[out_index].list); + flow->encaps[out_index].e = NULL; + if (!refcount_dec_and_test(&e->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; } + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); } static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; - if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || - !(flow->flags & MLX5E_TC_FLOW_DUP)) + if (!flow_flag_test(flow, ESWITCH) || + !flow_flag_test(flow, DUP)) return; mutex_lock(&esw->offloads.peer_mutex); list_del(&flow->peer); mutex_unlock(&esw->offloads.peer_mutex); - flow->flags &= ~MLX5E_TC_FLOW_DUP; + flow_flag_clear(flow, DUP); mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); kvfree(flow->peer_flow); @@@ -1561,7 -1339,7 +1561,7 @@@ static void mlx5e_tc_del_fdb_peer_flow( static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + if (mlx5e_is_eswitch_flow(flow)) { mlx5e_tc_del_fdb_peer_flow(flow); mlx5e_tc_del_fdb_flow(priv, flow); } else { @@@ -1702,7 -1480,7 +1702,7 @@@ static int __parse_cls_flower(struct ml struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct net_device *filter_dev, - u8 *match_level, u8 *tunnel_match_level) + u8 *inner_match_level, u8 *outer_match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@@ -1717,8 -1495,9 +1717,9 @@@ struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0; + u8 *match_level; - *match_level = MLX5_MATCH_NONE; + match_level = outer_match_level; if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_META) | @@@ -1746,12 -1525,14 +1747,14 @@@ } if (mlx5e_get_tc_tun(filter_dev)) { - if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) + if (parse_tunnel_attr(priv, spec, f, filter_dev, + outer_match_level)) return -EOPNOTSUPP; - /* In decap flow, header pointers should point to the inner + /* At this point, header pointers should point to the inner * headers, outer header were already set by parse_tunnel_attr */ + match_level = inner_match_level; headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, spec); headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, @@@ -2053,37 -1834,41 +2056,43 @@@ static int parse_cls_flower(struct mlx5 struct flow_cls_offload *f, struct net_device *filter_dev) { + u8 inner_match_level, outer_match_level, non_tunnel_match_level; struct netlink_ext_ack *extack = f->common.extack; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; - u8 match_level, tunnel_match_level = MLX5_MATCH_NONE; struct mlx5_eswitch_rep *rep; + bool is_eswitch_flow; int err; - err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level); + inner_match_level = MLX5_MATCH_NONE; + outer_match_level = MLX5_MATCH_NONE; + + err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, + &outer_match_level); + non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? + outer_match_level : inner_match_level; - if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { + is_eswitch_flow = mlx5e_is_eswitch_flow(flow); + if (!err && is_eswitch_flow) { rep = rpriv->rep; if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && - esw->offloads.inline_mode < match_level)) { + esw->offloads.inline_mode < non_tunnel_match_level)) { NL_SET_ERR_MSG_MOD(extack, "Flow is not offloaded due to min inline setting"); netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", - match_level, esw->offloads.inline_mode); + non_tunnel_match_level, esw->offloads.inline_mode); return -EOPNOTSUPP; } } - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + if (is_eswitch_flow) { - flow->esw_attr->match_level = match_level; - flow->esw_attr->tunnel_match_level = tunnel_match_level; + flow->esw_attr->inner_match_level = inner_match_level; + flow->esw_attr->outer_match_level = outer_match_level; } else { - flow->nic_attr->match_level = match_level; + flow->nic_attr->match_level = non_tunnel_match_level; } return err; @@@ -2600,15 -2385,14 +2609,15 @@@ static bool actions_match_supported(str { u32 actions; - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (mlx5e_is_eswitch_flow(flow)) actions = flow->esw_attr->action; else actions = flow->nic_attr->action; - if (flow->flags & MLX5E_TC_FLOW_EGRESS && + if (flow_flag_test(flow, EGRESS) && !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP))) + (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || + (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) return false; if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) @@@ -2758,7 -2542,7 +2767,7 @@@ static int parse_tc_nic_actions(struct if (priv->netdev->netdev_ops == peer_dev->netdev_ops && same_hw_devs(priv, netdev_priv(peer_dev))) { parse_attr->mirred_ifindex[0] = peer_dev->ifindex; - flow->flags |= MLX5E_TC_FLOW_HAIRPIN; + flow_flag_set(flow, HAIRPIN); action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; } else { @@@ -2845,31 -2629,6 +2854,31 @@@ static bool is_merged_eswitch_dev(struc +bool mlx5e_encap_take(struct mlx5e_encap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static struct mlx5e_encap_entry * +mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e; + struct encap_key e_key; + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; + if (!cmp_encap_info(&e_key, key) && + mlx5e_encap_take(e)) + return e; + } + + return NULL; +} + static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct net_device *mirred_dev, @@@ -2882,10 -2641,11 +2891,10 @@@ struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; const struct ip_tunnel_info *tun_info; - struct encap_key key, e_key; + struct encap_key key; struct mlx5e_encap_entry *e; unsigned short family; uintptr_t hash_key; - bool found = false; int err = 0; parse_attr = attr->parse_attr; @@@ -2900,59 -2660,42 +2909,59 @@@ hash_key = hash_encap_info(&key); - hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, - encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info->key; - e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, &key)) { - found = true; - break; - } - } + mutex_lock(&esw->offloads.encap_tbl_lock); + e = mlx5e_encap_get(priv, &key, hash_key); /* must verify if encap is valid or not */ - if (found) + if (e) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + wait_for_completion(&e->res_ready); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + if (e->compl_result) { + err = -EREMOTEIO; + goto out_err; + } goto attach_flow; + } e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) - return -ENOMEM; + if (!e) { + err = -ENOMEM; + goto out_err; + } + + refcount_set(&e->refcnt, 1); + init_completion(&e->res_ready); e->tun_info = tun_info; err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); - if (err) + if (err) { + kfree(e); + e = NULL; goto out_err; + } INIT_LIST_HEAD(&e->flows); + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + mutex_unlock(&esw->offloads.encap_tbl_lock); if (family == AF_INET) err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); - if (err) + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + complete_all(&e->res_ready); + if (err) { + e->compl_result = err; goto out_err; - - hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + } attach_flow: + flow->encaps[out_index].e = e; list_add(&flow->encaps[out_index].list, &e->flows); flow->encaps[out_index].index = out_index; *encap_dev = e->out_dev; @@@ -2963,14 -2706,11 +2972,14 @@@ } else { *encap_valid = false; } + mutex_unlock(&esw->offloads.encap_tbl_lock); return err; out_err: - kfree(e); + mutex_unlock(&esw->offloads.encap_tbl_lock); + if (e) + mlx5e_encap_put(priv, e); return err; } @@@ -3150,16 -2890,12 +3159,16 @@@ static int parse_tc_fdb_actions(struct if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); + struct net_device *uplink_upper; + rcu_read_lock(); + uplink_upper = + netdev_master_upper_dev_get_rcu(uplink_dev); if (uplink_upper && netif_is_lag_master(uplink_upper) && uplink_upper == out_dev) out_dev = uplink_dev; + rcu_read_unlock(); if (is_vlan_dev(out_dev)) { err = add_vlan_push_action(priv, attr, @@@ -3330,19 -3066,19 +3339,19 @@@ return 0; } -static void get_flags(int flags, u16 *flow_flags) +static void get_flags(int flags, unsigned long *flow_flags) { - u16 __flow_flags = 0; + unsigned long __flow_flags = 0; - if (flags & MLX5E_TC_INGRESS) - __flow_flags |= MLX5E_TC_FLOW_INGRESS; - if (flags & MLX5E_TC_EGRESS) - __flow_flags |= MLX5E_TC_FLOW_EGRESS; + if (flags & MLX5_TC_FLAG(INGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); + if (flags & MLX5_TC_FLAG(EGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); - if (flags & MLX5E_TC_ESW_OFFLOAD) - __flow_flags |= MLX5E_TC_FLOW_ESWITCH; - if (flags & MLX5E_TC_NIC_OFFLOAD) - __flow_flags |= MLX5E_TC_FLOW_NIC; + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); + if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); *flow_flags = __flow_flags; } @@@ -3354,13 -3090,12 +3363,13 @@@ static const struct rhashtable_params t .automatic_shrinking = true, }; -static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, + unsigned long flags) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *uplink_rpriv; - if (flags & MLX5E_TC_ESW_OFFLOAD) { + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); return &uplink_rpriv->uplink_priv.tc_ht; } else /* NIC offload */ @@@ -3371,7 -3106,7 +3380,7 @@@ static bool is_peer_flow_needed(struct { struct mlx5_esw_flow_attr *attr = flow->esw_attr; bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && - flow->flags & MLX5E_TC_FLOW_INGRESS; + flow_flag_test(flow, INGRESS); bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, @@@ -3390,13 -3125,13 +3399,13 @@@ static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, - struct flow_cls_offload *f, u16 flow_flags, + struct flow_cls_offload *f, unsigned long flow_flags, struct mlx5e_tc_flow_parse_attr **__parse_attr, struct mlx5e_tc_flow **__flow) { struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; - int err; + int out_index, err; flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); @@@ -3408,11 -3143,6 +3417,11 @@@ flow->cookie = f->cookie; flow->flags = flow_flags; flow->priv = priv; + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + INIT_LIST_HEAD(&flow->encaps[out_index].list); + INIT_LIST_HEAD(&flow->mod_hdr); + INIT_LIST_HEAD(&flow->hairpin); + refcount_set(&flow->refcnt, 1); *__flow = flow; *__parse_attr = parse_attr; @@@ -3437,7 -3167,7 +3446,7 @@@ mlx5e_flow_esw_attr_init(struct mlx5_es esw_attr->parse_attr = parse_attr; esw_attr->chain = f->common.chain_index; - esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; + esw_attr->prio = f->common.prio; esw_attr->in_rep = in_rep; esw_attr->in_mdev = in_mdev; @@@ -3452,7 -3182,7 +3461,7 @@@ static struct mlx5e_tc_flow * __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5_eswitch_rep *in_rep, struct mlx5_core_dev *in_mdev) @@@ -3463,7 -3193,7 +3472,7 @@@ struct mlx5e_tc_flow *flow; int attr_size, err; - flow_flags |= MLX5E_TC_FLOW_ESWITCH; + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); attr_size = sizeof(struct mlx5_esw_flow_attr); err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, &parse_attr, &flow); @@@ -3495,14 -3225,15 +3504,14 @@@ return flow; err_free: - kfree(flow); - kvfree(parse_attr); + mlx5e_flow_put(priv, flow); out: return ERR_PTR(err); } static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5e_tc_flow *flow, - u16 flow_flags) + unsigned long flow_flags) { struct mlx5e_priv *priv = flow->priv, *peer_priv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; @@@ -3540,7 -3271,7 +3549,7 @@@ } flow->peer_flow = peer_flow; - flow->flags |= MLX5E_TC_FLOW_DUP; + flow_flag_set(flow, DUP); mutex_lock(&esw->offloads.peer_mutex); list_add_tail(&flow->peer, &esw->offloads.peer_flows); mutex_unlock(&esw->offloads.peer_mutex); @@@ -3553,7 -3284,7 +3562,7 @@@ out static int mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { @@@ -3587,7 -3318,7 +3596,7 @@@ out static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { @@@ -3601,7 -3332,7 +3610,7 @@@ if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) return -EOPNOTSUPP; - flow_flags |= MLX5E_TC_FLOW_NIC; + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); attr_size = sizeof(struct mlx5_nic_flow_attr); err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, &parse_attr, &flow); @@@ -3622,14 -3353,14 +3631,14 @@@ if (err) goto err_free; - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + flow_flag_set(flow, OFFLOADED); kvfree(parse_attr); *__flow = flow; return 0; err_free: - kfree(flow); + mlx5e_flow_put(priv, flow); kvfree(parse_attr); out: return err; @@@ -3638,12 -3369,12 +3647,12 @@@ static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - int flags, + unsigned long flags, struct net_device *filter_dev, struct mlx5e_tc_flow **flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - u16 flow_flags; + unsigned long flow_flags; int err; get_flags(flags, &flow_flags); @@@ -3662,16 -3393,14 +3671,16 @@@ } int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct netlink_ext_ack *extack = f->common.extack; struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; int err = 0; - flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + rcu_read_lock(); + flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); + rcu_read_unlock(); if (flow) { NL_SET_ERR_MSG_MOD(extack, "flow cookie already exists, ignoring"); @@@ -3686,62 -3415,51 +3695,62 @@@ if (err) goto out; - err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); + err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); if (err) goto err_free; return 0; err_free: - mlx5e_tc_del_flow(priv, flow); - kfree(flow); + mlx5e_flow_put(priv, flow); out: return err; } -#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS) -#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS) - static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) { - if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK)) - return true; + bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); + bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); - return false; + return flow_flag_test(flow, INGRESS) == dir_ingress && + flow_flag_test(flow, EGRESS) == dir_egress; } int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; + int err; + rcu_read_lock(); flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (!flow || !same_flow_direction(flow, flags)) - return -EINVAL; + if (!flow || !same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag + * set. + */ + if (flow_flag_test_and_set(flow, DELETED)) { + err = -EINVAL; + goto errout; + } rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); + rcu_read_unlock(); - mlx5e_tc_del_flow(priv, flow); - - kfree(flow); + mlx5e_flow_put(priv, flow); return 0; + +errout: + rcu_read_unlock(); + return err; } int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv, flags); @@@ -3751,24 -3469,15 +3760,24 @@@ u64 lastuse = 0; u64 packets = 0; u64 bytes = 0; + int err = 0; - flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (!flow || !same_flow_direction(flow, flags)) - return -EINVAL; + rcu_read_lock(); + flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, + tc_ht_params)); + rcu_read_unlock(); + if (IS_ERR(flow)) + return PTR_ERR(flow); - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + if (!same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + + if (mlx5e_is_offloaded_flow(flow)) { counter = mlx5e_tc_get_counter(flow); if (!counter) - return 0; + goto errout; mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); } @@@ -3780,8 -3489,8 +3789,8 @@@ if (!peer_esw) goto out; - if ((flow->flags & MLX5E_TC_FLOW_DUP) && - (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) { + if (flow_flag_test(flow, DUP) && + flow_flag_test(flow->peer_flow, OFFLOADED)) { u64 bytes2; u64 packets2; u64 lastuse2; @@@ -3800,117 -3509,15 +3809,117 @@@ no_peer_counter mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); out: flow_stats_update(&f->stats, bytes, packets, lastuse); +errout: + mlx5e_flow_put(priv, flow); + return err; +} + +static int apply_police_params(struct mlx5e_priv *priv, u32 rate, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch *esw; + u16 vport_num; + u32 rate_mbps; + int err; + + esw = priv->mdev->priv.eswitch; + /* rate is given in bytes/sec. + * First convert to bits/sec and then round to the nearest mbit/secs. + * mbit means million bits. + * Moreover, if rate is non zero we choose to configure to a minimum of + * 1 mbit/sec. + */ + rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; + vport_num = rpriv->rep->vport; + + err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); + if (err) + NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); + + return err; +} + +static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + const struct flow_action_entry *act; + int err; + int i; + + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); + return -EINVAL; + } + + if (!flow_offload_has_one_action(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_POLICE: + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); + if (err) + return err; + + rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); + return -EOPNOTSUPP; + } + } return 0; } +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + int prio = TC_H_MAJ(ma->common.prio) >> 16; + + if (prio != 1) { + NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); + return -EINVAL; + } + + return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); +} + +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + + return apply_police_params(priv, 0, extack); +} + +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct rtnl_link_stats64 cur_stats; + u64 dbytes; + u64 dpkts; + + cur_stats = priv->stats.vf_vport; + dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; + dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; + rpriv->prev_vf_vport_stats = cur_stats; + flow_stats_update(&ma->stats, dpkts, dbytes, jiffies); +} + static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { struct mlx5_core_dev *peer_mdev = peer_priv->mdev; - struct mlx5e_hairpin_entry *hpe; + struct mlx5e_hairpin_entry *hpe, *tmp; + LIST_HEAD(init_wait_list); u16 peer_vhca_id; int bkt; @@@ -3919,18 -3526,9 +3928,18 @@@ peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); - hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) { - if (hpe->peer_vhca_id == peer_vhca_id) + mutex_lock(&priv->fs.tc.hairpin_tbl_lock); + hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) + if (refcount_inc_not_zero(&hpe->refcnt)) + list_add(&hpe->dead_peer_wait_list, &init_wait_list); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + + list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { + wait_for_completion(&hpe->res_ready); + if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) hpe->hp->pair->peer_gone = true; + + mlx5e_hairpin_put(priv, hpe); } } @@@ -3966,10 -3564,7 +3975,10 @@@ int mlx5e_tc_nic_init(struct mlx5e_pri struct mlx5e_tc_table *tc = &priv->fs.tc; int err; - hash_init(tc->mod_hdr_tbl); + mutex_init(&tc->t_lock); + mutex_init(&tc->mod_hdr.lock); + hash_init(tc->mod_hdr.hlist); + mutex_init(&tc->hairpin_tbl_lock); hash_init(tc->hairpin_tbl); err = rhashtable_init(&tc->ht, &tc_ht_params); @@@ -4001,16 -3596,12 +4010,16 @@@ void mlx5e_tc_nic_cleanup(struct mlx5e_ if (tc->netdevice_nb.notifier_call) unregister_netdevice_notifier(&tc->netdevice_nb); + mutex_destroy(&tc->mod_hdr.lock); + mutex_destroy(&tc->hairpin_tbl_lock); + rhashtable_destroy(&tc->ht); if (!IS_ERR_OR_NULL(tc->t)) { mlx5_destroy_flow_table(tc->t); tc->t = NULL; } + mutex_destroy(&tc->t_lock); } int mlx5e_tc_esw_init(struct rhashtable *tc_ht) @@@ -4023,7 -3614,7 +4032,7 @@@ void mlx5e_tc_esw_cleanup(struct rhasht rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); } -int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags); @@@ -4045,10 -3636,10 +4054,10 @@@ void mlx5e_tc_reoffload_flows_work(stru reoffload_flows_work); struct mlx5e_tc_flow *flow, *tmp; - rtnl_lock(); + mutex_lock(&rpriv->unready_flows_lock); list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) - remove_unready_flow(flow); + unready_flow_del(flow); } - rtnl_unlock(); + mutex_unlock(&rpriv->unready_flows_lock); } diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 86db0e9776da,04685dbb280c..aba9e7a6ad3c --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@@ -35,7 -35,6 +35,7 @@@ #include #include +#include #include #include #include @@@ -102,13 -101,6 +102,13 @@@ struct mlx5_vport_info bool trusted; }; +/* Vport context events */ +enum mlx5_eswitch_vport_event { + MLX5_VPORT_UC_ADDR_CHANGE = BIT(0), + MLX5_VPORT_MC_ADDR_CHANGE = BIT(1), + MLX5_VPORT_PROMISC_CHANGE = BIT(3), +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; @@@ -130,7 -122,7 +130,7 @@@ } qos; bool enabled; - u16 enabled_events; + enum mlx5_eswitch_vport_event enabled_events; }; enum offloads_fdb_flags { @@@ -181,14 -173,13 +181,14 @@@ struct mlx5_esw_offload struct mlx5_eswitch_rep *vport_reps; struct list_head peer_flows; struct mutex peer_mutex; + struct mutex encap_tbl_lock; /* protects encap_tbl */ DECLARE_HASHTABLE(encap_tbl, 8); - DECLARE_HASHTABLE(mod_hdr_tbl, 8); + struct mod_hdr_tbl mod_hdr; DECLARE_HASHTABLE(termtbl_tbl, 8); struct mutex termtbl_mutex; /* protects termtbl hash */ const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; - u64 num_flows; + atomic64_t num_flows; enum devlink_eswitch_encap_mode encap; }; @@@ -216,11 -207,8 +216,11 @@@ enum struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; + /* legacy data structures */ struct mlx5_eswitch_fdb fdb_table; struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct esw_mc_addr mc_promisc; + /* end of legacy */ struct workqueue_struct *work_queue; struct mlx5_vport *vports; u32 flags; @@@ -230,6 -218,7 +230,6 @@@ * and async SRIOV admin state changes */ struct mutex state_lock; - struct esw_mc_addr mc_promisc; struct { bool enabled; @@@ -244,8 -233,8 +244,8 @@@ struct mlx5_esw_functions esw_funcs; }; -void esw_offloads_cleanup(struct mlx5_eswitch *esw); -int esw_offloads_init(struct mlx5_eswitch *esw); +void esw_offloads_disable(struct mlx5_eswitch *esw); +int esw_offloads_enable(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, @@@ -262,8 -251,6 +262,8 @@@ void esw_vport_disable_ingress_acl(stru struct mlx5_vport *vport); void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); @@@ -390,8 -377,8 +390,8 @@@ struct mlx5_esw_flow_attr struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; - u8 match_level; - u8 tunnel_match_level; + u8 inner_match_level; + u8 outer_match_level; struct mlx5_fc *counter; u32 chain; u16 prio; @@@ -526,11 -513,6 +526,11 @@@ void mlx5e_tc_clean_fdb_peer_flows(stru (vport) = &(esw)->vports[i], \ (i) < (esw)->total_vports; (i)++) +#define mlx5_esw_for_all_vports_reverse(esw, i, vport) \ + for ((i) = (esw)->total_vports - 1; \ + (vport) = &(esw)->vports[i], \ + (i) >= MLX5_VPORT_PF; (i)--) + #define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ for ((i) = MLX5_VPORT_FIRST_VF; \ (vport) = &(esw)->vports[(i)], \ @@@ -592,11 -574,6 +592,11 @@@ bool mlx5_eswitch_is_vf_vport(const str void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); +void +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 42cc5001255b,0323fd078271..7d3582ee66b7 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@@ -207,14 -207,10 +207,10 @@@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch_set_rule_source_port(esw, spec, attr); - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { - if (attr->tunnel_match_level != MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; - if (attr->match_level != MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; - } else if (attr->match_level != MLX5_MATCH_NONE) { + if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; - } + if (attr->inner_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; @@@ -233,7 -229,7 +229,7 @@@ if (IS_ERR(rule)) goto err_add_rule; else - esw->offloads.num_flows++; + atomic64_inc(&esw->offloads.num_flows); return rule; @@@ -290,7 -286,7 +286,7 @@@ mlx5_eswitch_add_fwd_rule(struct mlx5_e mlx5_eswitch_set_rule_source_port(esw, spec, attr); spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; - if (attr->match_level != MLX5_MATCH_NONE) + if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); @@@ -298,7 -294,7 +294,7 @@@ if (IS_ERR(rule)) goto add_err; - esw->offloads.num_flows++; + atomic64_inc(&esw->offloads.num_flows); return rule; add_err: @@@ -326,7 -322,7 +322,7 @@@ __mlx5_eswitch_del_rule(struct mlx5_esw mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); } - esw->offloads.num_flows--; + atomic64_dec(&esw->offloads.num_flows); if (fwd_rule) { esw_put_prio_table(esw, attr->chain, attr->prio, 1); @@@ -442,11 -438,9 +438,11 @@@ int mlx5_eswitch_add_vlan_action(struc fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !attr->dest_chain); + mutex_lock(&esw->state_lock); + err = esw_add_vlan_action_check(attr, push, pop, fwd); if (err) - return err; + goto unlock; attr->vlan_handled = false; @@@ -459,11 -453,11 +455,11 @@@ attr->vlan_handled = true; } - return 0; + goto unlock; } if (!push && !pop) - return 0; + goto unlock; if (!(offloads->vlan_push_pop_refcount)) { /* it's the 1st vlan rule, apply global vlan pop policy */ @@@ -488,8 -482,6 +484,8 @@@ skip_set_push out: if (!err) attr->vlan_handled = true; +unlock: + mutex_unlock(&esw->state_lock); return err; } @@@ -512,8 -504,6 +508,8 @@@ int mlx5_eswitch_del_vlan_action(struc pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + mutex_lock(&esw->state_lock); + vport = esw_vlan_action_get_vport(attr, push, pop); if (!push && !pop && fwd) { @@@ -521,7 -511,7 +517,7 @@@ if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--; - return 0; + goto out; } if (push) { @@@ -539,13 -529,12 +535,13 @@@ skip_unset_push: offloads->vlan_push_pop_refcount--; if (offloads->vlan_push_pop_refcount) - return 0; + goto out; /* no more vlan rules, stop global vlan pop policy */ err = esw_set_global_vlan_pop(esw, 0); out: + mutex_unlock(&esw->state_lock); return err; } @@@ -594,15 -583,38 +590,15 @@@ void mlx5_eswitch_del_send_to_vport_rul mlx5_del_flow_rules(rule); } -static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw) +static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) { u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; u8 fdb_to_vport_reg_c_id; int err; - err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, - out, sizeof(out)); - if (err) - return err; - - fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.fdb_to_vport_reg_c_id); - - fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; - MLX5_SET(modify_esw_vport_context_in, in, - esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); - - MLX5_SET(modify_esw_vport_context_in, in, - field_select.fdb_to_vport_reg_c_id, 1); - - return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, - in, sizeof(in)); -} - -static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; - u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; - u8 fdb_to_vport_reg_c_id; - int err; + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, out, sizeof(out)); @@@ -612,10 -624,7 +608,10 @@@ fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, esw_vport_context.fdb_to_vport_reg_c_id); - fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + if (enable) + fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + else + fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); @@@ -1393,9 -1402,10 +1389,9 @@@ void esw_offloads_cleanup_reps(struct m int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vports = esw->total_vports; - struct mlx5_core_dev *dev = esw->dev; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN], rep_type; int vport_index; + u8 rep_type; esw->offloads.vport_reps = kcalloc(total_vports, sizeof(struct mlx5_eswitch_rep), @@@ -1403,9 -1413,12 +1399,9 @@@ if (!esw->offloads.vport_reps) return -ENOMEM; - mlx5_query_mac_address(dev, hw_id); - mlx5_esw_for_all_reps(esw, vport_index, rep) { rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index); rep->vport_index = vport_index; - ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) atomic_set(&rep->rep_data[rep_type].state, @@@ -2107,7 -2120,7 +2103,7 @@@ int mlx5_esw_funcs_changed_handler(stru return NOTIFY_OK; } -int esw_offloads_init(struct mlx5_eswitch *esw) +int esw_offloads_enable(struct mlx5_eswitch *esw) { int err; @@@ -2121,11 -2134,11 +2117,11 @@@ if (err) return err; - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - err = mlx5_eswitch_enable_passing_vport_metadata(esw); - if (err) - goto err_vport_metadata; - } + err = esw_set_passing_vport_metadata(esw, true); + if (err) + goto err_vport_metadata; + + mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); err = esw_offloads_load_all_reps(esw); if (err) @@@ -2139,8 -2152,8 +2135,8 @@@ return 0; err_reps: - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - mlx5_eswitch_disable_passing_vport_metadata(esw); + mlx5_eswitch_disable_pf_vf_vports(esw); + esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_steering_cleanup(esw); return err; @@@ -2165,13 -2178,13 +2161,13 @@@ static int esw_offloads_stop(struct mlx return err; } -void esw_offloads_cleanup(struct mlx5_eswitch *esw) +void esw_offloads_disable(struct mlx5_eswitch *esw) { mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); esw_offloads_unload_all_reps(esw); - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - mlx5_eswitch_disable_passing_vport_metadata(esw); + mlx5_eswitch_disable_pf_vf_vports(esw); + esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } @@@ -2332,7 -2345,7 +2328,7 @@@ int mlx5_devlink_eswitch_inline_mode_se break; } - if (esw->offloads.num_flows > 0) { + if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set inline mode when flows are configured"); return -EOPNOTSUPP; @@@ -2442,7 -2455,7 +2438,7 @@@ int mlx5_devlink_eswitch_encap_mode_set if (esw->offloads.encap == encap) return 0; - if (esw->offloads.num_flows > 0) { + if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set encapsulation when flows are configured"); return -EOPNOTSUPP; diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 1aaab8446270,84a87d059333..150b3a144b83 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@@ -239,8 -239,7 +239,8 @@@ mlxsw_sp_acl_block_lookup(struct mlxsw_ int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_port *mlxsw_sp_port, - bool ingress) + bool ingress, + struct netlink_ext_ack *extack) { struct mlxsw_sp_acl_block_binding *binding; int err; @@@ -248,11 -247,6 +248,11 @@@ if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress))) return -EEXIST; + if (!ingress && block->egress_blocker_rule_count) { + NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules"); + return -EOPNOTSUPP; + } + binding = kzalloc(sizeof(*binding), GFP_KERNEL); if (!binding) return -ENOMEM; @@@ -477,7 -471,7 +477,7 @@@ int mlxsw_sp_acl_rulei_commit(struct ml void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei, unsigned int priority) { - rulei->priority = priority >> 16; + rulei->priority = priority; } void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei, @@@ -678,7 -672,6 +678,7 @@@ int mlxsw_sp_acl_rule_add(struct mlxsw_ { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_block *block = ruleset->ht_key.block; int err; err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei); @@@ -696,14 -689,14 +696,14 @@@ * one, to be directly bound to device. The rest of the * rulesets are bound by "Goto action set". */ - err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, - ruleset->ht_key.block); + err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block); if (err) goto err_ruleset_block_bind; } list_add_tail(&rule->list, &mlxsw_sp->acl->rules); - ruleset->ht_key.block->rule_count++; + block->rule_count++; + block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker; return 0; err_ruleset_block_bind: @@@ -719,9 -712,7 +719,9 @@@ void mlxsw_sp_acl_rule_del(struct mlxsw { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_block *block = ruleset->ht_key.block; + block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker; ruleset->ht_key.block->rule_count--; list_del(&rule->list); if (!ruleset->ht_key.chain_index && diff --combined drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 61fe92719982,337b0cbfd153..c979f38a2e0c --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@@ -1286,7 -1286,7 +1286,7 @@@ myri10ge_vlan_rx(struct net_device *dev { u8 *va; struct vlan_ethhdr *veh; - struct skb_frag_struct *frag; + skb_frag_t *frag; __wsum vsum; va = addr; @@@ -1306,8 -1306,8 +1306,8 @@@ skb->len -= VLAN_HLEN; skb->data_len -= VLAN_HLEN; frag = skb_shinfo(skb)->frags; - frag->page_offset += VLAN_HLEN; - skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN); + skb_frag_off_add(frag, VLAN_HLEN); + skb_frag_size_sub(frag, VLAN_HLEN); } } @@@ -1318,7 -1318,7 +1318,7 @@@ myri10ge_rx_done(struct myri10ge_slice_ { struct myri10ge_priv *mgp = ss->mgp; struct sk_buff *skb; - struct skb_frag_struct *rx_frags; + skb_frag_t *rx_frags; struct myri10ge_rx_buf *rx; int i, idx, remainder, bytes; struct pci_dev *pdev = mgp->pdev; @@@ -1351,7 -1351,7 +1351,7 @@@ return 0; } rx_frags = skb_shinfo(skb)->frags; - /* Fill skb_frag_struct(s) with data from our receive */ + /* Fill skb_frag_t(s) with data from our receive */ for (i = 0, remainder = len; remainder > 0; i++) { myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes); skb_fill_page_desc(skb, i, rx->info[idx].page, @@@ -1364,8 -1364,8 +1364,8 @@@ } /* remove padding */ - rx_frags[0].page_offset += MXGEFW_PAD; - rx_frags[0].size -= MXGEFW_PAD; + skb_frag_off_add(&rx_frags[0], MXGEFW_PAD); + skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD); len -= MXGEFW_PAD; skb->len = len; @@@ -2628,7 -2628,7 +2628,7 @@@ static netdev_tx_t myri10ge_xmit(struc struct myri10ge_slice_state *ss; struct mcp_kreq_ether_send *req; struct myri10ge_tx_buf *tx; - struct skb_frag_struct *frag; + skb_frag_t *frag; struct netdev_queue *netdev_queue; dma_addr_t bus; u32 low; @@@ -3037,6 -3037,7 +3037,6 @@@ static int myri10ge_set_mac_address(str static int myri10ge_change_mtu(struct net_device *dev, int new_mtu) { struct myri10ge_priv *mgp = netdev_priv(dev); - int error = 0; netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu); if (mgp->running) { @@@ -3048,7 -3049,7 +3048,7 @@@ } else dev->mtu = new_mtu; - return error; + return 0; } /* @@@ -3918,7 -3919,7 +3918,7 @@@ static int myri10ge_probe(struct pci_de * setup (if available). */ status = myri10ge_request_irq(mgp); if (status != 0) - goto abort_with_firmware; + goto abort_with_slices; myri10ge_free_irq(mgp); /* Save configuration space to be restored if the diff --combined drivers/net/hyperv/netvsc_drv.c index 86884c863013,e8fce6d715ef..0a6cd2f1111f --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@@ -435,7 -435,7 +435,7 @@@ static u32 init_page_array(void *hdr, u skb_frag_t *frag = skb_shinfo(skb)->frags + i; slots_used += fill_pg_buf(skb_frag_page(frag), - frag->page_offset, + skb_frag_off(frag), skb_frag_size(frag), &pb[slots_used]); } return slots_used; @@@ -449,7 -449,7 +449,7 @@@ static int count_skb_frag_slots(struct for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; unsigned long size = skb_frag_size(frag); - unsigned long offset = frag->page_offset; + unsigned long offset = skb_frag_off(frag); /* Skip unused frames from start of page */ offset &= ~PAGE_MASK; @@@ -1239,12 -1239,15 +1239,15 @@@ static void netvsc_get_stats64(struct n struct rtnl_link_stats64 *t) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev); + struct netvsc_device *nvdev; struct netvsc_vf_pcpu_stats vf_tot; int i; + rcu_read_lock(); + + nvdev = rcu_dereference(ndev_ctx->nvdev); if (!nvdev) - return; + goto out; netdev_stats_to_stats64(t, &net->stats); @@@ -1283,6 -1286,8 +1286,8 @@@ t->rx_packets += packets; t->multicast += multicast; } + out: + rcu_read_unlock(); } static int netvsc_set_mac_addr(struct net_device *ndev, void *p) diff --combined drivers/net/netdevsim/dev.c index c217049552f7,bcc40a236624..c5b026150bf5 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@@ -17,60 -17,16 +17,60 @@@ #include #include +#include +#include +#include +#include #include #include #include #include +#include #include +#include +#include +#include +#include #include "netdevsim.h" static struct dentry *nsim_dev_ddir; +#define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32) + +static ssize_t nsim_dev_take_snapshot_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev *nsim_dev = file->private_data; + void *dummy_data; + int err; + u32 id; + + dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL); + if (!dummy_data) + return -ENOMEM; + + get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE); + + id = devlink_region_shapshot_id_get(priv_to_devlink(nsim_dev)); + err = devlink_region_snapshot_create(nsim_dev->dummy_region, + dummy_data, id, kfree); + if (err) { + pr_err("Failed to create region snapshot\n"); + kfree(dummy_data); + return err; + } + + return count; +} + +static const struct file_operations nsim_dev_take_snapshot_fops = { + .open = simple_open, + .write = nsim_dev_take_snapshot_write, + .llseek = generic_file_llseek, +}; + static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) { char dev_ddir_name[16]; @@@ -84,12 -40,6 +84,12 @@@ return PTR_ERR_OR_ZERO(nsim_dev->ports_ddir) ?: -EINVAL; debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir, &nsim_dev->fw_update_status); + debugfs_create_u32("max_macs", 0600, nsim_dev->ddir, + &nsim_dev->max_macs); + debugfs_create_bool("test1", 0600, nsim_dev->ddir, + &nsim_dev->test1); + debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev, + &nsim_dev_take_snapshot_fops); return 0; } @@@ -123,46 -73,47 +123,47 @@@ static void nsim_dev_port_debugfs_exit( debugfs_remove_recursive(nsim_dev_port->ddir); } + static struct net *nsim_devlink_net(struct devlink *devlink) + { + return &init_net; + } + static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false); } static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB_RULES, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false); } static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false); } static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB_RULES, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false); } static int nsim_dev_resources_register(struct devlink *devlink) { - struct nsim_dev *nsim_dev = devlink_priv(devlink); struct devlink_resource_size_params params = { .size_max = (u64)-1, .size_granularity = 1, .unit = DEVLINK_RESOURCE_UNIT_ENTRY }; + struct net *net = nsim_devlink_net(devlink); int err; u64 n; @@@ -176,8 -127,7 +177,7 @@@ goto out; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true); err = devlink_resource_register(devlink, "fib", n, NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4, ¶ms); @@@ -186,8 -136,7 +186,7 @@@ return err; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB_RULES, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true); err = devlink_resource_register(devlink, "fib-rules", n, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV4, ¶ms); @@@ -206,8 -155,7 +205,7 @@@ goto out; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true); err = devlink_resource_register(devlink, "fib", n, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6, ¶ms); @@@ -216,8 -164,7 +214,7 @@@ return err; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB_RULES, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true); err = devlink_resource_register(devlink, "fib-rules", n, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_IPV6, ¶ms); @@@ -229,308 -176,31 +226,308 @@@ devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB, nsim_dev_ipv4_fib_resource_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB_RULES, nsim_dev_ipv4_fib_rules_res_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB, nsim_dev_ipv6_fib_resource_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB_RULES, nsim_dev_ipv6_fib_rules_res_occ_get, - nsim_dev); + net); out: return err; } +enum nsim_devlink_param_id { + NSIM_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + NSIM_DEVLINK_PARAM_ID_TEST1, +}; + +static const struct devlink_param nsim_devlink_params[] = { + DEVLINK_PARAM_GENERIC(MAX_MACS, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), + DEVLINK_PARAM_DRIVER(NSIM_DEVLINK_PARAM_ID_TEST1, + "test1", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), +}; + +static void nsim_devlink_set_params_init_values(struct nsim_dev *nsim_dev, + struct devlink *devlink) +{ + union devlink_param_value value; + + value.vu32 = nsim_dev->max_macs; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); + value.vbool = nsim_dev->test1; + devlink_param_driverinit_value_set(devlink, + NSIM_DEVLINK_PARAM_ID_TEST1, + value); +} + +static void nsim_devlink_param_load_driverinit_values(struct devlink *devlink) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + union devlink_param_value saved_value; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &saved_value); + if (!err) + nsim_dev->max_macs = saved_value.vu32; + err = devlink_param_driverinit_value_get(devlink, + NSIM_DEVLINK_PARAM_ID_TEST1, + &saved_value); + if (!err) + nsim_dev->test1 = saved_value.vbool; +} + +#define NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX 16 + +static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev, + struct devlink *devlink) +{ + nsim_dev->dummy_region = + devlink_region_create(devlink, "dummy", + NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX, + NSIM_DEV_DUMMY_REGION_SIZE); + return PTR_ERR_OR_ZERO(nsim_dev->dummy_region); +} + +static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev) +{ + devlink_region_destroy(nsim_dev->dummy_region); +} + +struct nsim_trap_item { + void *trap_ctx; + enum devlink_trap_action action; +}; + +struct nsim_trap_data { + struct delayed_work trap_report_dw; + struct nsim_trap_item *trap_items_arr; + struct nsim_dev *nsim_dev; + spinlock_t trap_lock; /* Protects trap_items_arr */ +}; + +/* All driver-specific traps must be documented in + * Documentation/networking/devlink-trap-netdevsim.rst + */ +enum { + NSIM_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX, + NSIM_TRAP_ID_FID_MISS, +}; + +#define NSIM_TRAP_NAME_FID_MISS "fid_miss" + +#define NSIM_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT + +#define NSIM_TRAP_DROP(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + NSIM_TRAP_METADATA) +#define NSIM_TRAP_EXCEPTION(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + NSIM_TRAP_METADATA) +#define NSIM_TRAP_DRIVER_EXCEPTION(_id, _group_id) \ + DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, NSIM_TRAP_ID_##_id, \ + NSIM_TRAP_NAME_##_id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + NSIM_TRAP_METADATA) + +static const struct devlink_trap nsim_traps_arr[] = { + NSIM_TRAP_DROP(SMAC_MC, L2_DROPS), + NSIM_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), + NSIM_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), + NSIM_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS), + NSIM_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS), + NSIM_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS), + NSIM_TRAP_DRIVER_EXCEPTION(FID_MISS, L2_DROPS), + NSIM_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS), + NSIM_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS), + NSIM_TRAP_DROP(TAIL_DROP, BUFFER_DROPS), +}; + +#define NSIM_TRAP_L4_DATA_LEN 100 + +static struct sk_buff *nsim_dev_trap_skb_build(void) +{ + int tot_len, data_len = NSIM_TRAP_L4_DATA_LEN; + struct sk_buff *skb; + struct udphdr *udph; + struct ethhdr *eth; + struct iphdr *iph; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NULL; + tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len; + + eth = skb_put(skb, sizeof(struct ethhdr)); + eth_random_addr(eth->h_dest); + eth_random_addr(eth->h_source); + eth->h_proto = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); + + iph = skb_put(skb, sizeof(struct iphdr)); + iph->protocol = IPPROTO_UDP; + iph->saddr = in_aton("192.0.2.1"); + iph->daddr = in_aton("198.51.100.1"); + iph->version = 0x4; + iph->frag_off = 0; + iph->ihl = 0x5; + iph->tot_len = htons(tot_len); + iph->ttl = 100; + ip_send_check(iph); + + udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len); + get_random_bytes(&udph->source, sizeof(u16)); + get_random_bytes(&udph->dest, sizeof(u16)); + udph->len = htons(sizeof(struct udphdr) + data_len); + + return skb; +} + +static void nsim_dev_trap_report(struct nsim_dev_port *nsim_dev_port) +{ + struct nsim_dev *nsim_dev = nsim_dev_port->ns->nsim_dev; + struct devlink *devlink = priv_to_devlink(nsim_dev); + struct nsim_trap_data *nsim_trap_data; + int i; + + nsim_trap_data = nsim_dev->trap_data; + + spin_lock(&nsim_trap_data->trap_lock); + for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) { + struct nsim_trap_item *nsim_trap_item; + struct sk_buff *skb; + + nsim_trap_item = &nsim_trap_data->trap_items_arr[i]; + if (nsim_trap_item->action == DEVLINK_TRAP_ACTION_DROP) + continue; + + skb = nsim_dev_trap_skb_build(); + if (!skb) + continue; + skb->dev = nsim_dev_port->ns->netdev; + + /* Trapped packets are usually passed to devlink in softIRQ, + * but in this case they are generated in a workqueue. Disable + * softIRQs to prevent lockdep from complaining about + * "incosistent lock state". + */ + local_bh_disable(); + devlink_trap_report(devlink, skb, nsim_trap_item->trap_ctx, + &nsim_dev_port->devlink_port); + local_bh_enable(); + consume_skb(skb); + } + spin_unlock(&nsim_trap_data->trap_lock); +} + +#define NSIM_TRAP_REPORT_INTERVAL_MS 100 + +static void nsim_dev_trap_report_work(struct work_struct *work) +{ + struct nsim_trap_data *nsim_trap_data; + struct nsim_dev_port *nsim_dev_port; + struct nsim_dev *nsim_dev; + + nsim_trap_data = container_of(work, struct nsim_trap_data, + trap_report_dw.work); + nsim_dev = nsim_trap_data->nsim_dev; + + /* For each running port and enabled packet trap, generate a UDP + * packet with a random 5-tuple and report it. + */ + mutex_lock(&nsim_dev->port_list_lock); + list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) { + if (!netif_running(nsim_dev_port->ns->netdev)) + continue; + + nsim_dev_trap_report(nsim_dev_port); + } + mutex_unlock(&nsim_dev->port_list_lock); + + schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, + msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); +} + +static int nsim_dev_traps_init(struct devlink *devlink) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_trap_data *nsim_trap_data; + int err; + + nsim_trap_data = kzalloc(sizeof(*nsim_trap_data), GFP_KERNEL); + if (!nsim_trap_data) + return -ENOMEM; + + nsim_trap_data->trap_items_arr = kcalloc(ARRAY_SIZE(nsim_traps_arr), + sizeof(struct nsim_trap_item), + GFP_KERNEL); + if (!nsim_trap_data->trap_items_arr) { + err = -ENOMEM; + goto err_trap_data_free; + } + + /* The lock is used to protect the action state of the registered + * traps. The value is written by user and read in delayed work when + * iterating over all the traps. + */ + spin_lock_init(&nsim_trap_data->trap_lock); + nsim_trap_data->nsim_dev = nsim_dev; + nsim_dev->trap_data = nsim_trap_data; + + err = devlink_traps_register(devlink, nsim_traps_arr, + ARRAY_SIZE(nsim_traps_arr), NULL); + if (err) + goto err_trap_items_free; + + INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw, + nsim_dev_trap_report_work); + schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, + msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); + + return 0; + +err_trap_items_free: + kfree(nsim_trap_data->trap_items_arr); +err_trap_data_free: + kfree(nsim_trap_data); + return err; +} + +static void nsim_dev_traps_exit(struct devlink *devlink) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw); + devlink_traps_unregister(devlink, nsim_traps_arr, + ARRAY_SIZE(nsim_traps_arr)); + kfree(nsim_dev->trap_data->trap_items_arr); + kfree(nsim_dev->trap_data); +} + static int nsim_dev_reload(struct devlink *devlink, struct netlink_ext_ack *extack) { - struct nsim_dev *nsim_dev = devlink_priv(devlink); enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES }; + struct net *net = nsim_devlink_net(devlink); int i; for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { @@@ -539,13 -209,11 +536,12 @@@ err = devlink_resource_size_get(devlink, res_ids[i], &val); if (!err) { - err = nsim_fib_set_max(nsim_dev->fib_data, - res_ids[i], val, extack); + err = nsim_fib_set_max(net, res_ids[i], val, extack); if (err) return err; } } + nsim_devlink_param_load_driverinit_values(devlink); return 0; } @@@ -590,66 -258,11 +586,66 @@@ static int nsim_dev_flash_update(struc return 0; } +static struct nsim_trap_item * +nsim_dev_trap_item_lookup(struct nsim_dev *nsim_dev, u16 trap_id) +{ + struct nsim_trap_data *nsim_trap_data = nsim_dev->trap_data; + int i; + + for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) { + if (nsim_traps_arr[i].id == trap_id) + return &nsim_trap_data->trap_items_arr[i]; + } + + return NULL; +} + +static int nsim_dev_devlink_trap_init(struct devlink *devlink, + const struct devlink_trap *trap, + void *trap_ctx) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_trap_item *nsim_trap_item; + + nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id); + if (WARN_ON(!nsim_trap_item)) + return -ENOENT; + + nsim_trap_item->trap_ctx = trap_ctx; + nsim_trap_item->action = trap->init_action; + + return 0; +} + +static int +nsim_dev_devlink_trap_action_set(struct devlink *devlink, + const struct devlink_trap *trap, + enum devlink_trap_action action) +{ + struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_trap_item *nsim_trap_item; + + nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id); + if (WARN_ON(!nsim_trap_item)) + return -ENOENT; + + spin_lock(&nsim_dev->trap_data->trap_lock); + nsim_trap_item->action = action; + spin_unlock(&nsim_dev->trap_data->trap_lock); + + return 0; +} + static const struct devlink_ops nsim_dev_devlink_ops = { .reload = nsim_dev_reload, .flash_update = nsim_dev_flash_update, + .trap_init = nsim_dev_devlink_trap_init, + .trap_action_set = nsim_dev_devlink_trap_action_set, }; +#define NSIM_DEV_MAX_MACS_DEFAULT 32 +#define NSIM_DEV_TEST1_DEFAULT true + static struct nsim_dev * nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) { @@@ -667,63 -280,31 +663,55 @@@ INIT_LIST_HEAD(&nsim_dev->port_list); mutex_init(&nsim_dev->port_list_lock); nsim_dev->fw_update_status = true; + nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT; + nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT; - nsim_dev->fib_data = nsim_fib_create(); - if (IS_ERR(nsim_dev->fib_data)) { - err = PTR_ERR(nsim_dev->fib_data); - goto err_devlink_free; - } - err = nsim_dev_resources_register(devlink); if (err) - goto err_fib_destroy; + goto err_devlink_free; err = devlink_register(devlink, &nsim_bus_dev->dev); if (err) goto err_resources_unregister; - err = nsim_dev_debugfs_init(nsim_dev); + err = devlink_params_register(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); if (err) goto err_dl_unregister; + nsim_devlink_set_params_init_values(nsim_dev, devlink); + + err = nsim_dev_dummy_region_init(nsim_dev, devlink); + if (err) + goto err_params_unregister; + + err = nsim_dev_traps_init(devlink); + if (err) + goto err_dummy_region_exit; + + err = nsim_dev_debugfs_init(nsim_dev); + if (err) + goto err_traps_exit; err = nsim_bpf_dev_init(nsim_dev); if (err) goto err_debugfs_exit; + devlink_params_publish(devlink); return nsim_dev; err_debugfs_exit: nsim_dev_debugfs_exit(nsim_dev); +err_traps_exit: + nsim_dev_traps_exit(devlink); +err_dummy_region_exit: + nsim_dev_dummy_region_exit(nsim_dev); +err_params_unregister: + devlink_params_unregister(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); err_dl_unregister: devlink_unregister(devlink); err_resources_unregister: devlink_resources_unregister(devlink, NULL); - err_fib_destroy: - nsim_fib_destroy(nsim_dev->fib_data); err_devlink_free: devlink_free(devlink); return ERR_PTR(err); @@@ -735,13 -316,8 +723,12 @@@ static void nsim_dev_destroy(struct nsi nsim_bpf_dev_exit(nsim_dev); nsim_dev_debugfs_exit(nsim_dev); + nsim_dev_traps_exit(devlink); + nsim_dev_dummy_region_exit(nsim_dev); + devlink_params_unregister(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); devlink_unregister(devlink); devlink_resources_unregister(devlink, NULL); - nsim_fib_destroy(nsim_dev->fib_data); mutex_destroy(&nsim_dev->port_list_lock); devlink_free(devlink); } diff --combined drivers/net/netdevsim/netdevsim.h index 262a6978bbca,9404637d34b7..66bf13765ad0 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@@ -145,7 -145,6 +145,7 @@@ struct nsim_dev_port struct nsim_dev { struct nsim_bus_dev *nsim_bus_dev; struct nsim_fib_data *fib_data; + struct nsim_trap_data *trap_data; struct dentry *ddir; struct dentry *ports_ddir; struct bpf_offload_dev *bpf_dev; @@@ -159,9 -158,6 +159,9 @@@ struct list_head port_list; struct mutex port_list_lock; /* protects port list */ bool fw_update_status; + u32 max_macs; + bool test1; + struct devlink_region *dummy_region; }; int nsim_dev_init(void); @@@ -173,12 -169,10 +173,10 @@@ int nsim_dev_port_add(struct nsim_bus_d int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_index); - struct nsim_fib_data *nsim_fib_create(void); - void nsim_fib_destroy(struct nsim_fib_data *fib_data); - u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, bool max); - int nsim_fib_set_max(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, u64 val, + int nsim_fib_init(void); + void nsim_fib_exit(void); + u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max); + int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, struct netlink_ext_ack *extack); #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) diff --combined drivers/net/phy/at803x.c index d98aa56710a9,6ad8b1c63c34..2aa7b2e60046 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@@ -249,40 -249,28 +249,24 @@@ static int at803x_config_init(struct ph { int ret; - ret = genphy_config_init(phydev); - if (ret < 0) - return ret; - /* The RX and TX delay default is: * after HW reset: RX delay enabled and TX delay disabled * after SW reset: RX delay enabled, while TX delay retains the * value before reset. - * - * So let's first disable the RX and TX delays in PHY and enable - * them based on the mode selected (this also takes care of RGMII - * mode where we expect delays to be disabled) */ - - ret = at803x_disable_rx_delay(phydev); - if (ret < 0) - return ret; - ret = at803x_disable_tx_delay(phydev); - if (ret < 0) - return ret; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { - /* If RGMII_ID or RGMII_RXID are specified enable RX delay, - * otherwise keep it disabled - */ + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) ret = at803x_enable_rx_delay(phydev); - if (ret < 0) - return ret; - } + else + ret = at803x_disable_rx_delay(phydev); + if (ret < 0) + return ret; if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { - /* If RGMII_ID or RGMII_TXID are specified enable TX delay, - * otherwise keep it disabled - */ + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) ret = at803x_enable_tx_delay(phydev); - } + else + ret = at803x_disable_tx_delay(phydev); return ret; } diff --combined drivers/net/phy/phy_device.c index d5db7604d7c4,27ebc2c6c2d0..d347ddcac45b --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@@ -1564,20 -1564,24 +1564,20 @@@ EXPORT_SYMBOL(phy_reset_after_clk_enabl */ static int genphy_config_advert(struct phy_device *phydev) { - u32 advertise; - int bmsr, adv; - int err, changed = 0; + int err, bmsr, changed = 0; + u32 adv; /* Only allow advertising what this PHY supports */ linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); - if (!ethtool_convert_link_mode_to_legacy_u32(&advertise, - phydev->advertising)) - phydev_warn(phydev, "PHY advertising (%*pb) more modes than genphy supports, some modes not advertised.\n", - __ETHTOOL_LINK_MODE_MASK_NBITS, - phydev->advertising); + + adv = linkmode_adv_to_mii_adv_t(phydev->advertising); /* Setup standard advertisement */ err = phy_modify_changed(phydev, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, - ethtool_adv_to_mii_adv_t(advertise)); + adv); if (err < 0) return err; if (err > 0) @@@ -1594,7 -1598,13 +1594,7 @@@ if (!(bmsr & BMSR_ESTATEN)) return changed; - /* Configure gigabit if it's supported */ - adv = 0; - if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - phydev->supported) || - linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - phydev->supported)) - adv = ethtool_adv_to_mii_ctrl1000_t(advertise); + adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); err = phy_modify_changed(phydev, MII_CTRL1000, ADVERTISE_1000FULL | ADVERTISE_1000HALF, @@@ -1671,20 -1681,18 +1671,20 @@@ int genphy_restart_aneg(struct phy_devi EXPORT_SYMBOL(genphy_restart_aneg); /** - * genphy_config_aneg - restart auto-negotiation or write BMCR + * __genphy_config_aneg - restart auto-negotiation or write BMCR * @phydev: target phy_device struct + * @changed: whether autoneg is requested * * Description: If auto-negotiation is enabled, we configure the * advertising, and then restart auto-negotiation. If it is not * enabled, then we write the BMCR. */ -int genphy_config_aneg(struct phy_device *phydev) +int __genphy_config_aneg(struct phy_device *phydev, bool changed) { - int err, changed; + int err; - changed = genphy_config_eee_advert(phydev); + if (genphy_config_eee_advert(phydev)) + changed = true; if (AUTONEG_ENABLE != phydev->autoneg) return genphy_setup_forced(phydev); @@@ -1692,10 -1700,10 +1692,10 @@@ err = genphy_config_advert(phydev); if (err < 0) /* error */ return err; + else if (err) + changed = true; - changed |= err; - - if (changed == 0) { + if (!changed) { /* Advertisement hasn't changed, but maybe aneg was never on to * begin with? Or maybe phy was isolated? */ @@@ -1705,15 -1713,18 +1705,15 @@@ return ctl; if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE)) - changed = 1; /* do restart aneg */ + changed = true; /* do restart aneg */ } /* Only restart aneg if we are advertising something different * than we were before. */ - if (changed > 0) - return genphy_restart_aneg(phydev); - - return 0; + return changed ? genphy_restart_aneg(phydev) : 0; } -EXPORT_SYMBOL(genphy_config_aneg); +EXPORT_SYMBOL(__genphy_config_aneg); /** * genphy_aneg_done - return auto-negotiation status @@@ -1741,7 -1752,17 +1741,17 @@@ EXPORT_SYMBOL(genphy_aneg_done) */ int genphy_update_link(struct phy_device *phydev) { - int status; + int status = 0, bmcr; + + bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; + + /* Autoneg is being started, therefore disregard BMSR value and + * report link as down. + */ + if (bmcr & BMCR_ANRESTART) + goto done; /* The link state is latched low so that momentary link * drops can be detected. Do not double-read the status @@@ -1784,7 -1805,7 +1794,7 @@@ EXPORT_SYMBOL(genphy_update_link) */ int genphy_read_status(struct phy_device *phydev) { - int adv, lpa, lpagb, err, old_link = phydev->link; + int lpa, lpagb, err, old_link = phydev->link; /* Update the link, but return if there was an error */ err = genphy_update_link(phydev); @@@ -1800,18 -1821,19 +1810,18 @@@ phydev->pause = 0; phydev->asym_pause = 0; - linkmode_zero(phydev->lp_advertising); - if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { if (phydev->is_gigabit_capable) { lpagb = phy_read(phydev, MII_STAT1000); if (lpagb < 0) return lpagb; - adv = phy_read(phydev, MII_CTRL1000); - if (adv < 0) - return adv; - if (lpagb & LPA_1000MSFAIL) { + int adv = phy_read(phydev, MII_CTRL1000); + + if (adv < 0) + return adv; + if (adv & CTL1000_ENABLE_MASTER) phydev_err(phydev, "Master/Slave resolution failed, maybe conflicting manual settings?\n"); else @@@ -1885,6 -1907,57 +1895,6 @@@ int genphy_soft_reset(struct phy_devic } EXPORT_SYMBOL(genphy_soft_reset); -int genphy_config_init(struct phy_device *phydev) -{ - int val; - __ETHTOOL_DECLARE_LINK_MODE_MASK(features) = { 0, }; - - linkmode_set_bit_array(phy_basic_ports_array, - ARRAY_SIZE(phy_basic_ports_array), - features); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, features); - linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, features); - - /* Do we support autonegotiation? */ - val = phy_read(phydev, MII_BMSR); - if (val < 0) - return val; - - if (val & BMSR_ANEGCAPABLE) - linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, features); - - if (val & BMSR_100FULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, features); - if (val & BMSR_100HALF) - linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, features); - if (val & BMSR_10FULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, features); - if (val & BMSR_10HALF) - linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, features); - - if (val & BMSR_ESTATEN) { - val = phy_read(phydev, MII_ESTATUS); - if (val < 0) - return val; - - if (val & ESTATUS_1000_TFULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - features); - if (val & ESTATUS_1000_THALF) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - features); - if (val & ESTATUS_1000_XFULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, - features); - } - - linkmode_and(phydev->supported, phydev->supported, features); - linkmode_and(phydev->advertising, phydev->advertising, features); - - return 0; -} -EXPORT_SYMBOL(genphy_config_init); - /** * genphy_read_abilities - read PHY abilities from Clause 22 registers * @phydev: target phy_device struct diff --combined drivers/net/usb/lan78xx.c index 769bb262fbec,f033fee225a1..58f5a219fb65 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@@ -1258,7 -1258,8 +1258,7 @@@ static void lan78xx_status(struct lan78 return; } - memcpy(&intdata, urb->transfer_buffer, 4); - le32_to_cpus(&intdata); + intdata = get_unaligned_le32(urb->transfer_buffer); if (intdata & INT_ENP_PHY_INT) { netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata); @@@ -2729,7 -2730,6 +2729,7 @@@ static struct sk_buff *lan78xx_tx_prep( struct sk_buff *skb, gfp_t flags) { u32 tx_cmd_a, tx_cmd_b; + void *ptr; if (skb_cow_head(skb, TX_OVERHEAD)) { dev_kfree_skb_any(skb); @@@ -2758,9 -2758,13 +2758,9 @@@ tx_cmd_b |= skb_vlan_tag_get(skb) & TX_CMD_B_VTAG_MASK_; } - skb_push(skb, 4); - cpu_to_le32s(&tx_cmd_b); - memcpy(skb->data, &tx_cmd_b, 4); - - skb_push(skb, 4); - cpu_to_le32s(&tx_cmd_a); - memcpy(skb->data, &tx_cmd_a, 4); + ptr = skb_push(skb, 8); + put_unaligned_le32(tx_cmd_a, ptr); + put_unaligned_le32(tx_cmd_b, ptr + 4); return skb; } @@@ -3101,13 -3105,16 +3101,13 @@@ static int lan78xx_rx(struct lan78xx_ne struct sk_buff *skb2; unsigned char *packet; - memcpy(&rx_cmd_a, skb->data, sizeof(rx_cmd_a)); - le32_to_cpus(&rx_cmd_a); + rx_cmd_a = get_unaligned_le32(skb->data); skb_pull(skb, sizeof(rx_cmd_a)); - memcpy(&rx_cmd_b, skb->data, sizeof(rx_cmd_b)); - le32_to_cpus(&rx_cmd_b); + rx_cmd_b = get_unaligned_le32(skb->data); skb_pull(skb, sizeof(rx_cmd_b)); - memcpy(&rx_cmd_c, skb->data, sizeof(rx_cmd_c)); - le16_to_cpus(&rx_cmd_c); + rx_cmd_c = get_unaligned_le16(skb->data); skb_pull(skb, sizeof(rx_cmd_c)); packet = skb->data; @@@ -3785,7 -3792,7 +3785,7 @@@ static int lan78xx_probe(struct usb_int ret = register_netdev(netdev); if (ret != 0) { netif_err(dev, probe, netdev, "couldn't register the device\n"); - goto out3; + goto out4; } usb_set_intfdata(intf, dev); @@@ -3800,12 -3807,14 +3800,14 @@@ ret = lan78xx_phy_init(dev); if (ret < 0) - goto out4; + goto out5; return 0; - out4: + out5: unregister_netdev(netdev); + out4: + usb_free_urb(dev->urb_intr); out3: lan78xx_unbind(dev, intf); out2: diff --combined drivers/net/xen-netback/netback.c index 4679fcf1a1c4,c9262ffeefe4..0020b2e8c279 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@@ -136,12 -136,12 +136,12 @@@ static inline struct xenvif_queue *ubuf static u16 frag_get_pending_idx(skb_frag_t *frag) { - return (u16)frag->page_offset; + return (u16)skb_frag_off(frag); } static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) { - frag->page_offset = pending_idx; + skb_frag_off_set(frag, pending_idx); } static inline pending_ring_idx_t pending_index(unsigned i) @@@ -925,6 -925,7 +925,7 @@@ static void xenvif_tx_build_gops(struc skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; nskb = xenvif_alloc_skb(0); if (unlikely(nskb == NULL)) { + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); xenvif_tx_err(queue, &txreq, extra_count, idx); if (net_ratelimit()) @@@ -940,6 -941,7 +941,7 @@@ if (xenvif_set_skb_gso(queue->vif, skb, gso)) { /* Failure in xenvif_set_skb_gso is fatal. */ + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); kfree_skb(nskb); break; @@@ -1055,7 -1057,7 +1057,7 @@@ static int xenvif_handle_frag_list(stru int j; skb->truesize += skb->data_len; for (j = 0; j < i; j++) - put_page(frags[j].page.p); + put_page(skb_frag_page(&frags[j])); return -ENOMEM; } @@@ -1067,8 -1069,8 +1069,8 @@@ BUG(); offset += len; - frags[i].page.p = page; - frags[i].page_offset = 0; + __skb_frag_set_page(&frags[i], page); + skb_frag_off_set(&frags[i], 0); skb_frag_size_set(&frags[i], len); } @@@ -1653,6 -1655,9 +1655,6 @@@ static int __init netback_init(void #ifdef CONFIG_DEBUG_FS xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL); - if (IS_ERR_OR_NULL(xen_netback_dbg_root)) - pr_warn("Init of debugfs returned %ld!\n", - PTR_ERR(xen_netback_dbg_root)); #endif /* CONFIG_DEBUG_FS */ return 0; diff --combined drivers/s390/net/qeth_core_main.c index 5aa0f1268bca,9c3310c4d61d..0803070246aa --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@@ -544,6 -544,7 +544,7 @@@ static struct qeth_reply *qeth_alloc_re if (reply) { refcount_set(&reply->refcnt, 1); init_completion(&reply->received); + spin_lock_init(&reply->lock); } return reply; } @@@ -799,6 -800,13 +800,13 @@@ static void qeth_issue_next_read_cb(str if (!reply->callback) { rc = 0; + goto no_callback; + } + + spin_lock_irqsave(&reply->lock, flags); + if (reply->rc) { + /* Bail out when the requestor has already left: */ + rc = reply->rc; } else { if (cmd) { reply->offset = (u16)((char *)cmd - (char *)iob->data); @@@ -807,7 -815,9 +815,9 @@@ rc = reply->callback(card, reply, (unsigned long)iob); } } + spin_unlock_irqrestore(&reply->lock, flags); + no_callback: if (rc <= 0) qeth_notify_reply(reply, rc); qeth_put_reply(reply); @@@ -1749,6 -1759,16 +1759,16 @@@ static int qeth_send_control_data(struc rc = (timeout == -ERESTARTSYS) ? -EINTR : -ETIME; qeth_dequeue_reply(card, reply); + + if (reply_cb) { + /* Wait until the callback for a late reply has completed: */ + spin_lock_irq(&reply->lock); + if (rc) + /* Zap any callback that's still pending: */ + reply->rc = rc; + spin_unlock_irq(&reply->lock); + } + if (!rc) rc = reply->rc; qeth_put_reply(reply); @@@ -3515,7 -3535,7 +3535,7 @@@ static int qeth_get_elements_for_frags( int cnt, elements = 0; for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[cnt]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[cnt]; elements += qeth_get_elements_for_range( (addr_t)skb_frag_address(frag), diff --combined drivers/staging/unisys/visornic/visornic_main.c index 6fa7726185de,40dd573e73c3..1d1440d43002 --- a/drivers/staging/unisys/visornic/visornic_main.c +++ b/drivers/staging/unisys/visornic/visornic_main.c @@@ -284,9 -284,9 +284,9 @@@ static int visor_copy_fragsinfo_from_sk for (frag = 0; frag < numfrags; frag++) { count = add_physinfo_entries(page_to_pfn( skb_frag_page(&skb_shinfo(skb)->frags[frag])), - skb_shinfo(skb)->frags[frag].page_offset, - skb_shinfo(skb)->frags[frag].size, count, - frags_max, frags); + skb_frag_off(&skb_shinfo(skb)->frags[frag]), + skb_frag_size(&skb_shinfo(skb)->frags[frag]), + count, frags_max, frags); /* add_physinfo_entries only returns * zero if the frags array is out of room * That should never happen because we @@@ -1750,7 -1750,8 +1750,8 @@@ static int visornic_poll(struct napi_st } /* poll_for_irq - checks the status of the response queue - * @v: Void pointer to the visronic devdata struct. + * @t: pointer to the 'struct timer_list' from which we can retrieve the + * the visornic devdata struct. * * Main function of the vnic_incoming thread. Periodically check the response * queue and drain it if needed. diff --combined include/linux/mlx5/mlx5_ifc.h index da5e7eaed438,b8b570c30b5e..a66ed0abe40e --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@@ -1040,21 -1040,6 +1040,21 @@@ enum MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1, }; +#define MLX5_FC_BULK_SIZE_FACTOR 128 + +enum mlx5_fc_bulk_alloc_bitmask { + MLX5_FC_BULK_128 = (1 << 0), + MLX5_FC_BULK_256 = (1 << 1), + MLX5_FC_BULK_512 = (1 << 2), + MLX5_FC_BULK_1024 = (1 << 3), + MLX5_FC_BULK_2048 = (1 << 4), + MLX5_FC_BULK_4096 = (1 << 5), + MLX5_FC_BULK_8192 = (1 << 6), + MLX5_FC_BULK_16384 = (1 << 7), +}; + +#define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x30]; u8 vhca_id[0x10]; @@@ -1259,8 -1244,7 +1259,8 @@@ u8 reserved_at_2e0[0x7]; u8 max_qp_mcg[0x19]; - u8 reserved_at_300[0x18]; + u8 reserved_at_300[0x10]; + u8 flow_counter_bulk_alloc[0x8]; u8 log_max_mcg[0x8]; u8 reserved_at_320[0x3]; @@@ -2782,7 -2766,7 +2782,7 @@@ struct mlx5_ifc_traffic_counter_bits struct mlx5_ifc_tisc_bits { u8 strict_lag_tx_port_affinity[0x1]; u8 tls_en[0x1]; - u8 reserved_at_1[0x2]; + u8 reserved_at_2[0x2]; u8 lag_tx_port_affinity[0x04]; u8 reserved_at_8[0x4]; @@@ -2957,13 -2941,6 +2957,13 @@@ enum SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, }; +enum { + ELEMENT_TYPE_CAP_MASK_TASR = 1 << 0, + ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1, + ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, +}; + struct mlx5_ifc_scheduling_context_bits { u8 element_type[0x8]; u8 reserved_at_8[0x18]; @@@ -7840,8 -7817,7 +7840,8 @@@ struct mlx5_ifc_alloc_flow_counter_in_b u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x38]; + u8 flow_counter_bulk[0x8]; }; struct mlx5_ifc_add_vxlan_udp_dport_out_bits { @@@ -10078,9 -10054,8 +10078,8 @@@ struct mlx5_ifc_tls_static_params_bits }; struct mlx5_ifc_tls_progress_params_bits { - u8 valid[0x1]; - u8 reserved_at_1[0x7]; - u8 pd[0x18]; + u8 reserved_at_0[0x8]; + u8 tisn[0x18]; u8 next_record_tcp_sn[0x20]; diff --combined include/linux/skbuff.h index 7eb28b72d9ba,ba5583522d24..77c6dc88e95d --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@@ -14,7 -14,6 +14,7 @@@ #include #include #include +#include #include #include #include @@@ -309,45 -308,58 +309,45 @@@ extern int sysctl_max_skb_frags */ #define GSO_BY_FRAGS 0xFFFF -typedef struct skb_frag_struct skb_frag_t; - -struct skb_frag_struct { - struct { - struct page *p; - } page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; - __u32 size; -#else - __u16 page_offset; - __u16 size; -#endif -}; +typedef struct bio_vec skb_frag_t; /** - * skb_frag_size - Returns the size of a skb fragment + * skb_frag_size() - Returns the size of a skb fragment * @frag: skb fragment */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { - return frag->size; + return frag->bv_len; } /** - * skb_frag_size_set - Sets the size of a skb fragment + * skb_frag_size_set() - Sets the size of a skb fragment * @frag: skb fragment * @size: size of fragment */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { - frag->size = size; + frag->bv_len = size; } /** - * skb_frag_size_add - Incrementes the size of a skb fragment by %delta + * skb_frag_size_add() - Increments the size of a skb fragment by @delta * @frag: skb fragment * @delta: value to add */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { - frag->size += delta; + frag->bv_len += delta; } /** - * skb_frag_size_sub - Decrements the size of a skb fragment by %delta + * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta * @frag: skb fragment * @delta: value to subtract */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { - frag->size -= delta; + frag->bv_len -= delta; } /** @@@ -367,7 -379,7 +367,7 @@@ static inline bool skb_frag_must_loop(s * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on - * @f_off: offset from start of f->page.p + * @f_off: offset from start of f->bv_page * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, @@@ -1271,7 -1283,7 +1271,7 @@@ static inline int skb_flow_dissector_bp struct bpf_flow_dissector; bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, - __be16 proto, int nhoff, int hlen); + __be16 proto, int nhoff, int hlen, unsigned int flags); bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, @@@ -1362,6 -1374,14 +1362,14 @@@ static inline void skb_copy_hash(struc to->l4_hash = from->l4_hash; }; + static inline void skb_copy_decrypted(struct sk_buff *to, + const struct sk_buff *from) + { + #ifdef CONFIG_TLS_DEVICE + to->decrypted = from->decrypted; + #endif + } + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@@ -2077,8 -2097,8 +2085,8 @@@ static inline void __skb_fill_page_desc * that not all callers have unique ownership of the page but rely * on page_is_pfmemalloc doing the right thing(tm). */ - frag->page.p = page; - frag->page_offset = off; + frag->bv_page = page; + frag->bv_offset = off; skb_frag_size_set(frag, size); page = compound_head(page); @@@ -2857,46 -2877,6 +2865,46 @@@ static inline void skb_propagate_pfmema skb->pfmemalloc = true; } +/** + * skb_frag_off() - Returns the offset of a skb fragment + * @frag: the paged fragment + */ +static inline unsigned int skb_frag_off(const skb_frag_t *frag) +{ + return frag->bv_offset; +} + +/** + * skb_frag_off_add() - Increments the offset of a skb fragment by @delta + * @frag: skb fragment + * @delta: value to add + */ +static inline void skb_frag_off_add(skb_frag_t *frag, int delta) +{ + frag->bv_offset += delta; +} + +/** + * skb_frag_off_set() - Sets the offset of a skb fragment + * @frag: skb fragment + * @offset: offset of fragment + */ +static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) +{ + frag->bv_offset = offset; +} + +/** + * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment + * @fragto: skb fragment where offset is set + * @fragfrom: skb fragment offset is copied from + */ +static inline void skb_frag_off_copy(skb_frag_t *fragto, + const skb_frag_t *fragfrom) +{ + fragto->bv_offset = fragfrom->bv_offset; +} + /** * skb_frag_page - retrieve the page referred to by a paged fragment * @frag: the paged fragment @@@ -2905,7 -2885,7 +2913,7 @@@ */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->page.p; + return frag->bv_page; } /** @@@ -2963,7 -2943,7 +2971,7 @@@ static inline void skb_frag_unref(struc */ static inline void *skb_frag_address(const skb_frag_t *frag) { - return page_address(skb_frag_page(frag)) + frag->page_offset; + return page_address(skb_frag_page(frag)) + skb_frag_off(frag); } /** @@@ -2979,18 -2959,7 +2987,18 @@@ static inline void *skb_frag_address_sa if (unlikely(!ptr)) return NULL; - return ptr + frag->page_offset; + return ptr + skb_frag_off(frag); +} + +/** + * skb_frag_page_copy() - sets the page in a fragment from another fragment + * @fragto: skb fragment where page is set + * @fragfrom: skb fragment page is copied from + */ +static inline void skb_frag_page_copy(skb_frag_t *fragto, + const skb_frag_t *fragfrom) +{ + fragto->bv_page = fragfrom->bv_page; } /** @@@ -3002,7 -2971,7 +3010,7 @@@ */ static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page) { - frag->page.p = page; + frag->bv_page = page; } /** @@@ -3038,7 -3007,7 +3046,7 @@@ static inline dma_addr_t skb_frag_dma_m enum dma_data_direction dir) { return dma_map_page(dev, skb_frag_page(frag), - frag->page_offset + offset, size, dir); + skb_frag_off(frag) + offset, size, dir); } static inline struct sk_buff *pskb_copy(struct sk_buff *skb, @@@ -3205,10 -3174,10 +3213,10 @@@ static inline bool skb_can_coalesce(str if (skb_zcopy(skb)) return false; if (i) { - const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; return page == skb_frag_page(frag) && - off == frag->page_offset + skb_frag_size(frag); + off == skb_frag_off(frag) + skb_frag_size(frag); } return false; } diff --combined include/net/netfilter/nf_tables.h index dc301e3d6739,475d6f28ca67..e73d16f8b870 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@@ -25,7 -25,6 +25,7 @@@ struct nft_pktinfo struct xt_action_param xt; }; +#if IS_ENABLED(CONFIG_NETFILTER) static inline struct net *nft_net(const struct nft_pktinfo *pkt) { return pkt->xt.state->net; @@@ -58,7 -57,6 +58,7 @@@ static inline void nft_set_pktinfo(stru pkt->skb = skb; pkt->xt.state = state; } +#endif static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, struct sk_buff *skb) @@@ -423,8 -421,7 +423,7 @@@ struct nft_set unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:13, - bound:1, + u16 flags:14, genmask:2; u8 klen; u8 dlen; @@@ -929,11 -926,9 +928,11 @@@ struct nft_chain_type int family; struct module *owner; unsigned int hook_mask; +#if IS_ENABLED(CONFIG_NETFILTER) nf_hookfn *hooks[NF_MAX_HOOKS]; int (*ops_register)(struct net *net, const struct nf_hook_ops *ops); void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops); +#endif }; int nft_chain_validate_dependency(const struct nft_chain *chain, @@@ -959,9 -954,7 +958,9 @@@ struct nft_stats * @flow_block: flow block (for hardware offload) */ struct nft_base_chain { +#if IS_ENABLED(CONFIG_NETFILTER) struct nf_hook_ops ops; +#endif const struct nft_chain_type *type; u8 policy; u8 flags; @@@ -1158,9 -1151,7 +1157,9 @@@ struct nft_flowtable use:30; u64 handle; /* runtime data below here */ +#if IS_ENABLED(CONFIG_NETFILTER) struct nf_hook_ops *ops ____cacheline_aligned; +#endif struct nf_flowtable data; }; @@@ -1215,8 -1206,6 +1214,8 @@@ void nft_trace_notify(struct nft_tracei #define MODULE_ALIAS_NFT_OBJ(type) \ MODULE_ALIAS("nft-obj-" __stringify(type)) +#if IS_ENABLED(CONFIG_NF_TABLES) + /* * The gencursor defines two generations, the currently active and the * next one. Objects contain a bitmask of 2 bits specifying the generations @@@ -1290,8 -1279,6 +1289,8 @@@ static inline void nft_set_elem_change_ ext->genmask ^= nft_genmask_next(net); } +#endif /* IS_ENABLED(CONFIG_NF_TABLES) */ + /* * We use a free bit in the genmask field to indicate the element * is busy, meaning it is currently being processed either by @@@ -1360,12 -1347,15 +1359,15 @@@ struct nft_trans_rule struct nft_trans_set { struct nft_set *set; u32 set_id; + bool bound; }; #define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) + #define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound) struct nft_trans_chain { bool update; @@@ -1396,12 -1386,15 +1398,15 @@@ struct nft_trans_table struct nft_trans_elem { struct nft_set *set; struct nft_set_elem elem; + bool bound; }; #define nft_trans_elem_set(trans) \ (((struct nft_trans_elem *)trans->data)->set) #define nft_trans_elem(trans) \ (((struct nft_trans_elem *)trans->data)->elem) + #define nft_trans_elem_set_bound(trans) \ + (((struct nft_trans_elem *)trans->data)->bound) struct nft_trans_obj { struct nft_object *obj; diff --combined include/net/netfilter/nf_tables_offload.h index 8a5969d9b80b,c8b9dec376f5..db104665a9e4 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@@ -9,7 -9,6 +9,7 @@@ struct nft_offload_reg u32 len; u32 base_offset; u32 offset; + struct nft_data data; struct nft_data mask; }; @@@ -64,10 -63,6 +64,10 @@@ struct nft_rule struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule); void nft_flow_rule_destroy(struct nft_flow_rule *flow); int nft_flow_rule_offload_commit(struct net *net); +void nft_indr_block_get_and_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command); #define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ (__reg)->base_offset = \ @@@ -78,4 -73,6 +78,6 @@@ (__reg)->key = __key; \ memset(&(__reg)->mask, 0xff, (__reg)->len); + int nft_chain_offload_priority(struct nft_base_chain *basechain); + #endif diff --combined include/net/pkt_cls.h index 0790a4ed909c,98be18ef1ed3..64999ffcb486 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@@ -70,6 -70,15 +70,6 @@@ static inline struct Qdisc *tcf_block_q return block->q; } -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident); -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident); -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident); -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident); - int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@@ -128,6 -137,32 +128,6 @@@ void tc_setup_cb_block_unregister(struc { } -static inline -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - return 0; -} - -static inline -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - return 0; -} - -static inline -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ -} - -static inline -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ -} - static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { @@@ -611,7 -646,7 +611,7 @@@ tc_cls_common_offload_init(struct flow_ { cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; - cls_common->prio = tp->prio; + cls_common->prio = tp->prio >> 16; if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE) cls_common->extack = extack; } diff --combined include/uapi/linux/bpf.h index 4393bd4b2419,a5aa7d3ac6a1..0e66371bea13 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@@ -134,7 -134,6 +134,7 @@@ enum bpf_map_type BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_DEVMAP_HASH, }; /* Note that tracing related programs such as @@@ -1467,8 -1466,8 +1467,8 @@@ union bpf_attr * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. @@@ -2714,33 -2713,6 +2714,33 @@@ * **-EPERM** if no permission to send the *sig*. * * **-EAGAIN** if bpf program can try again. + * + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header. + * + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** SYN cookie cannot be issued due to error + * + * **-ENOENT** SYN cookie should not be issued (no SYN flood) + * + * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies + * + * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@@ -2852,8 -2824,7 +2852,8 @@@ FN(strtoul), \ FN(sk_storage_get), \ FN(sk_storage_delete), \ - FN(send_signal), + FN(send_signal), \ + FN(tcp_gen_syncookie), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@@ -3536,10 -3507,6 +3536,10 @@@ enum bpf_task_fd_type BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; +#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) +#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) +#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) + struct bpf_flow_keys { __u16 nhoff; __u16 thoff; @@@ -3561,8 -3528,6 +3561,8 @@@ __u32 ipv6_dst[4]; /* in6_addr; network order */ }; }; + __u32 flags; + __be32 flow_label; }; struct bpf_func_info { diff --combined net/ipv4/tcp.c index f8fa1686f7f3,77b485d60b9d..051ef10374f6 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@@ -984,6 -984,9 +984,9 @@@ new_segment if (!skb) goto wait_for_memory; + #ifdef CONFIG_TLS_DEVICE + skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); + #endif skb_entail(sk, skb); copy = size_goal; } @@@ -1162,7 -1165,7 +1165,7 @@@ int tcp_sendmsg_locked(struct sock *sk struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; - bool process_backlog = false; + int process_backlog = 0; bool zc = false; long timeo; @@@ -1254,10 -1257,9 +1257,10 @@@ new_segment if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - if (process_backlog && sk_flush_backlog(sk)) { - process_backlog = false; - goto restart; + if (unlikely(process_backlog >= 16)) { + process_backlog = 0; + if (sk_flush_backlog(sk)) + goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, @@@ -1265,7 -1267,7 +1268,7 @@@ if (!skb) goto wait_for_memory; - process_backlog = true; + process_backlog++; skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); @@@ -1777,21 -1779,19 +1780,21 @@@ static int tcp_zerocopy_receive(struct break; frags = skb_shinfo(skb)->frags; while (offset) { - if (frags->size > offset) + if (skb_frag_size(frags) > offset) goto out; - offset -= frags->size; + offset -= skb_frag_size(frags); frags++; } } - if (frags->size != PAGE_SIZE || frags->page_offset) { + if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { int remaining = zc->recv_skip_hint; + int size = skb_frag_size(frags); - while (remaining && (frags->size != PAGE_SIZE || - frags->page_offset)) { - remaining -= frags->size; + while (remaining && (size != PAGE_SIZE || + skb_frag_off(frags))) { + remaining -= size; frags++; + size = skb_frag_size(frags); } zc->recv_skip_hint -= remaining; break; @@@ -3784,8 -3784,8 +3787,8 @@@ int tcp_md5_hash_skb_data(struct tcp_md return 1; for (i = 0; i < shi->nr_frags; ++i) { - const struct skb_frag_struct *f = &shi->frags[i]; - unsigned int offset = f->page_offset; + const skb_frag_t *f = &shi->frags[i]; + unsigned int offset = skb_frag_off(f); struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT); sg_set_page(&sg, page, skb_frag_size(f), diff --combined net/ipv4/tcp_output.c index e6d02e05bb1c,979520e46e33..5c46bc4c7e8d --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@@ -1320,6 -1320,7 +1320,7 @@@ int tcp_fragment(struct sock *sk, enum buff = sk_stream_alloc_skb(sk, nsize, gfp, true); if (!buff) return -ENOMEM; /* We'll just try again later. */ + skb_copy_decrypted(buff, skb); sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); @@@ -1402,7 -1403,7 +1403,7 @@@ static int __pskb_trim_head(struct sk_b } else { shinfo->frags[k] = shinfo->frags[i]; if (eat) { - shinfo->frags[k].page_offset += eat; + skb_frag_off_add(&shinfo->frags[k], eat); skb_frag_size_sub(&shinfo->frags[k], eat); eat = 0; } @@@ -1874,6 -1875,7 +1875,7 @@@ static int tso_fragment(struct sock *sk buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) return -ENOMEM; + skb_copy_decrypted(buff, skb); sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); @@@ -2143,6 -2145,7 +2145,7 @@@ static int tcp_mtu_probe(struct sock *s sk_mem_charge(sk, nskb->truesize); skb = tcp_send_head(sk); + skb_copy_decrypted(nskb, skb); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; diff --combined net/netfilter/nf_tables_api.c index fe3b7b0c6c66,d47469f824a1..6d00bef023c4 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@@ -138,9 -138,14 +138,14 @@@ static void nft_set_trans_bind(const st return; list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { - if (trans->msg_type == NFT_MSG_NEWSET && - nft_trans_set(trans) == set) { - set->bound = true; + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_trans_set(trans) == set) + nft_trans_set_bound(trans) = true; + break; + case NFT_MSG_NEWSETELEM: + if (nft_trans_elem_set(trans) == set) + nft_trans_elem_set_bound(trans) = true; break; } } @@@ -1662,6 -1667,10 +1667,10 @@@ static int nf_tables_addchain(struct nf chain->flags |= NFT_BASE_CHAIN | flags; basechain->policy = NF_ACCEPT; + if (chain->flags & NFT_CHAIN_HW_OFFLOAD && + nft_chain_offload_priority(basechain) < 0) + return -EOPNOTSUPP; + flow_block_init(&basechain->flow_block); } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); @@@ -6906,7 -6915,7 +6915,7 @@@ static int __nf_tables_abort(struct ne break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - if (nft_trans_set(trans)->bound) { + if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; } @@@ -6918,7 -6927,7 +6927,7 @@@ nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: - if (nft_trans_elem_set(trans)->bound) { + if (nft_trans_elem_set_bound(trans)) { nft_trans_destroy(trans); break; } @@@ -7593,11 -7602,6 +7602,11 @@@ static struct pernet_operations nf_tabl .exit = nf_tables_exit_net, }; +static struct flow_indr_block_ing_entry block_ing_entry = { + .cb = nft_indr_block_get_and_ing_cmd, + .list = LIST_HEAD_INIT(block_ing_entry.list), +}; + static int __init nf_tables_module_init(void) { int err; @@@ -7629,7 -7633,6 +7638,7 @@@ goto err5; nft_chain_route_init(); + flow_indr_add_block_ing_cb(&block_ing_entry); return err; err5: rhltable_destroy(&nft_objname_ht); @@@ -7646,7 -7649,6 +7655,7 @@@ err1 static void __exit nf_tables_module_exit(void) { + flow_indr_del_block_ing_cb(&block_ing_entry); nfnetlink_subsys_unregister(&nf_tables_subsys); unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); diff --combined net/netfilter/nf_tables_offload.c index d3c4c9c88bc8,c0d18c1d77ac..3c2725ade61b --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@@ -103,10 -103,11 +103,11 @@@ void nft_offload_update_dependency(stru } static void nft_flow_offload_common_init(struct flow_cls_common_offload *common, - __be16 proto, - struct netlink_ext_ack *extack) + __be16 proto, int priority, + struct netlink_ext_ack *extack) { common->protocol = proto; + common->prio = priority; common->extack = extack; } @@@ -124,6 -125,15 +125,15 @@@ static int nft_setup_cb_call(struct nft return 0; } + int nft_chain_offload_priority(struct nft_base_chain *basechain) + { + if (basechain->ops.priority <= 0 || + basechain->ops.priority > USHRT_MAX) + return -1; + + return 0; + } + static int nft_flow_offload_rule(struct nft_trans *trans, enum flow_cls_command command) { @@@ -142,7 -152,8 +152,8 @@@ if (flow) proto = flow->proto; - nft_flow_offload_common_init(&cls_flow.common, proto, &extack); + nft_flow_offload_common_init(&cls_flow.common, proto, + basechain->ops.priority, &extack); cls_flow.command = command; cls_flow.cookie = (unsigned long) rule; if (flow) @@@ -171,110 -182,24 +182,110 @@@ static int nft_flow_offload_unbind(stru return 0; } +static int nft_block_setup(struct nft_base_chain *basechain, + struct flow_block_offload *bo, + enum flow_block_command cmd) +{ + int err; + + switch (cmd) { + case FLOW_BLOCK_BIND: + err = nft_flow_offload_bind(bo, basechain); + break; + case FLOW_BLOCK_UNBIND: + err = nft_flow_offload_unbind(bo, basechain); + break; + default: + WARN_ON_ONCE(1); + err = -EOPNOTSUPP; + } + + return err; +} + +static int nft_block_offload_cmd(struct nft_base_chain *chain, + struct net_device *dev, + enum flow_block_command cmd) +{ + struct netlink_ext_ack extack = {}; + struct flow_block_offload bo = {}; + int err; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); + if (err < 0) + return err; + + return nft_block_setup(chain, &bo, cmd); +} + +static void nft_indr_block_ing_cmd(struct net_device *dev, + struct nft_base_chain *chain, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) +{ + struct netlink_ext_ack extack = {}; + struct flow_block_offload bo = {}; + + if (!chain) + return; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); + + nft_block_setup(chain, &bo, cmd); +} + +static int nft_indr_block_offload_cmd(struct nft_base_chain *chain, + struct net_device *dev, + enum flow_block_command cmd) +{ + struct flow_block_offload bo = {}; + struct netlink_ext_ack extack = {}; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + flow_indr_block_call(dev, &bo, cmd); + + if (list_empty(&bo.cb_list)) + return -EOPNOTSUPP; + + return nft_block_setup(chain, &bo, cmd); +} + #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK static int nft_flow_offload_chain(struct nft_trans *trans, enum flow_block_command cmd) { struct nft_chain *chain = trans->ctx.chain; - struct netlink_ext_ack extack = {}; - struct flow_block_offload bo = {}; struct nft_base_chain *basechain; struct net_device *dev; - int err; if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; basechain = nft_base_chain(chain); dev = basechain->ops.dev; - if (!dev || !dev->netdev_ops->ndo_setup_tc) + if (!dev) return -EOPNOTSUPP; /* Only default policy to accept is supported for now. */ @@@ -283,10 -208,26 +294,10 @@@ nft_trans_chain_policy(trans) != NF_ACCEPT) return -EOPNOTSUPP; - bo.command = cmd; - bo.block = &basechain->flow_block; - bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; - bo.extack = &extack; - INIT_LIST_HEAD(&bo.cb_list); - - err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo); - if (err < 0) - return err; - - switch (cmd) { - case FLOW_BLOCK_BIND: - err = nft_flow_offload_bind(&bo, basechain); - break; - case FLOW_BLOCK_UNBIND: - err = nft_flow_offload_unbind(&bo, basechain); - break; - } - - return err; + if (dev->netdev_ops->ndo_setup_tc) + return nft_block_offload_cmd(basechain, dev, cmd); + else + return nft_indr_block_offload_cmd(basechain, dev, cmd); } int nft_flow_rule_offload_commit(struct net *net) @@@ -336,33 -277,3 +347,33 @@@ return err; } + +void nft_indr_block_get_and_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command) +{ + struct net *net = dev_net(dev); + const struct nft_table *table; + const struct nft_chain *chain; + + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (table->family != NFPROTO_NETDEV) + continue; + + list_for_each_entry_rcu(chain, &table->chains, list) { + if (nft_is_base_chain(chain)) { + struct nft_base_chain *basechain; + + basechain = nft_base_chain(chain); + if (!strncmp(basechain->dev_name, dev->name, + IFNAMSIZ)) { + nft_indr_block_ing_cmd(dev, basechain, + cb, cb_priv, + command); + return; + } + } + } + } +} diff --combined net/rxrpc/ar-internal.h index 63b26baa108a,145335611af6..fa5b030acaa8 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@@ -226,9 -226,6 +226,9 @@@ struct rxrpc_security int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, unsigned int, unsigned int, rxrpc_seq_t, u16); + /* Free crypto request on a call */ + void (*free_call_crypto)(struct rxrpc_call *); + /* Locate the data in a received packet that has been verified. */ void (*locate_data)(struct rxrpc_call *, struct sk_buff *, unsigned int *, unsigned int *); @@@ -257,7 -254,8 +257,8 @@@ */ struct rxrpc_local { struct rcu_head rcu; - atomic_t usage; + atomic_t active_users; /* Number of users of the local endpoint */ + atomic_t usage; /* Number of references to the structure */ struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct list_head link; struct socket *socket; /* my UDP socket */ @@@ -560,7 -558,6 +561,7 @@@ struct rxrpc_call unsigned long expect_term_by; /* When we expect call termination by */ u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ + struct skcipher_request *cipher_req; /* Packet cipher request buffer */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ @@@ -653,7 -650,6 +654,6 @@@ /* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ - u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_serial_t ackr_first_seq; /* first sequence number received */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ @@@ -747,7 -743,7 +747,7 @@@ int rxrpc_reject_call(struct rxrpc_soc /* * call_event.c */ - void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, + void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); @@@ -1006,6 -1002,8 +1006,8 @@@ struct rxrpc_local *rxrpc_lookup_local( struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *); struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *); void rxrpc_put_local(struct rxrpc_local *); + struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *); + void rxrpc_unuse_local(struct rxrpc_local *); void rxrpc_queue_local(struct rxrpc_local *); void rxrpc_destroy_all_locals(struct rxrpc_net *); diff --combined net/sched/sch_taprio.c index 046fd2c102b4,e25d414ae12f..540bde009ea5 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@@ -677,6 -677,10 +677,6 @@@ static const struct nla_policy entry_po [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, }; -static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { - [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, -}; - static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@@ -1191,7 -1195,8 +1191,8 @@@ unlock spin_unlock_bh(qdisc_lock(sch)); free_sched: - kfree(new_admin); + if (new_admin) + call_rcu(&new_admin->rcu, taprio_free_sched_cb); return err; } diff --combined net/tipc/link.c index 289e848084ac,c2c5c53cad22..6cc75ffd9e2c --- a/net/tipc/link.c +++ b/net/tipc/link.c @@@ -106,8 -106,6 +106,6 @@@ struct tipc_stats * @transmitq: queue for sent, non-acked messages * @backlogq: queue for messages waiting to be sent * @snt_nxt: next sequence number to use for outbound messages - * @prev_from: sequence number of most previous retransmission request - * @stale_limit: time when repeated identical retransmits must force link reset * @ackers: # of peers that needs to ack each packet before it can be released * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages @@@ -164,9 -162,7 +162,7 @@@ struct tipc_link u16 limit; } backlog[5]; u16 snd_nxt; - u16 prev_from; u16 window; - unsigned long stale_limit; /* Reception */ u16 rcv_nxt; @@@ -180,7 -176,6 +176,7 @@@ /* Fragmentation/reassembly */ struct sk_buff *reasm_buf; + struct sk_buff *reasm_tnlmsg; /* Broadcast */ u16 ackers; @@@ -854,31 -849,18 +850,31 @@@ static int link_schedule_user(struct ti */ static void link_prepare_wakeup(struct tipc_link *l) { + struct sk_buff_head *wakeupq = &l->wakeupq; + struct sk_buff_head *inputq = l->inputq; struct sk_buff *skb, *tmp; - int imp, i = 0; + struct sk_buff_head tmpq; + int avail[5] = {0,}; + int imp = 0; + + __skb_queue_head_init(&tmpq); + + for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) + avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; - skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + skb_queue_walk_safe(wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; - if (l->backlog[imp].len < l->backlog[imp].limit) { - skb_unlink(skb, &l->wakeupq); - skb_queue_tail(l->inputq, skb); - } else if (i++ > 10) { - break; - } + if (avail[imp] <= 0) + continue; + avail[imp]--; + __skb_unlink(skb, wakeupq); + __skb_queue_tail(&tmpq, skb); } + + spin_lock_bh(&inputq->lock); + skb_queue_splice_tail(&tmpq, inputq); + spin_unlock_bh(&inputq->lock); + } void tipc_link_reset(struct tipc_link *l) @@@ -911,10 -893,8 +907,10 @@@ l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; kfree_skb(l->reasm_buf); + kfree_skb(l->reasm_tnlmsg); kfree_skb(l->failover_reasm_skb); l->reasm_buf = NULL; + l->reasm_tnlmsg = NULL; l->failover_reasm_skb = NULL; l->rcv_unacked = 0; l->snd_nxt = 1; @@@ -956,10 -936,7 +952,10 @@@ int tipc_link_xmit(struct tipc_link *l int rc = 0; if (unlikely(msg_size(hdr) > mtu)) { - skb_queue_purge(list); + pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", + skb_queue_len(list), msg_user(hdr), + msg_type(hdr), msg_size(hdr), mtu); + __skb_queue_purge(list); return -EMSGSIZE; } @@@ -988,7 -965,7 +984,7 @@@ if (likely(skb_queue_len(transmq) < maxwin)) { _skb = skb_clone(skb, GFP_ATOMIC); if (!_skb) { - skb_queue_purge(list); + __skb_queue_purge(list); return -ENOBUFS; } __skb_dequeue(list); @@@ -1063,47 -1040,53 +1059,53 @@@ static void tipc_link_advance_backlog(s * link_retransmit_failure() - Detect repeated retransmit failures * @l: tipc link sender * @r: tipc link receiver (= l in case of unicast) - * @from: seqno of the 1st packet in retransmit request * @rc: returned code * * Return: true if the repeated retransmit failures happens, otherwise * false */ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, - u16 from, int *rc) + int *rc) { struct sk_buff *skb = skb_peek(&l->transmq); struct tipc_msg *hdr; if (!skb) return false; - hdr = buf_msg(skb); - /* Detect repeated retransmit failures on same packet */ - if (r->prev_from != from) { - r->prev_from = from; - r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); - } else if (time_after(jiffies, r->stale_limit)) { - pr_warn("Retransmission failure on link <%s>\n", l->name); - link_print(l, "State of link "); - pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", - msg_user(hdr), msg_type(hdr), msg_size(hdr), - msg_errcode(hdr)); - pr_info("sqno %u, prev: %x, src: %x\n", - msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr)); - - trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); - trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); - trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); + if (!TIPC_SKB_CB(skb)->retr_cnt) + return false; - if (link_is_bc_sndlink(l)) - *rc = TIPC_LINK_DOWN_EVT; + if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + + msecs_to_jiffies(r->tolerance))) + return false; + + hdr = buf_msg(skb); + if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr))) + return false; + pr_warn("Retransmission failure on link <%s>\n", l->name); + link_print(l, "State of link "); + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", + msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); + pr_info("sqno %u, prev: %x, dest: %x\n", + msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr)); + pr_info("retr_stamp %d, retr_cnt %d\n", + jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp), + TIPC_SKB_CB(skb)->retr_cnt); + + trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); + trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); + trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); + + if (link_is_bc_sndlink(l)) { + r->state = LINK_RESET; + *rc = TIPC_LINK_DOWN_EVT; + } else { *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - return true; } - return false; + return true; } /* tipc_link_bc_retrans() - retransmit zero or more packets @@@ -1129,7 -1112,7 +1131,7 @@@ static int tipc_link_bc_retrans(struct trace_tipc_link_retrans(r, from, to, &l->transmq); - if (link_retransmit_failure(l, r, from, &rc)) + if (link_retransmit_failure(l, r, &rc)) return rc; skb_queue_walk(&l->transmq, skb) { @@@ -1138,11 -1121,10 +1140,10 @@@ continue; if (more(msg_seqno(hdr), to)) break; - if (link_is_bc_sndlink(l)) { - if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) - continue; - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; - } + + if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) + continue; + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC); if (!_skb) return 0; @@@ -1152,6 -1134,10 +1153,10 @@@ _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; + + /* Increase actual retrans counter & mark first time */ + if (!TIPC_SKB_CB(skb)->retr_cnt++) + TIPC_SKB_CB(skb)->retr_stamp = jiffies; } return 0; } @@@ -1252,7 -1238,6 +1257,7 @@@ static int tipc_link_tnl_rcv(struct tip struct sk_buff_head *inputq) { struct sk_buff **reasm_skb = &l->failover_reasm_skb; + struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg; struct sk_buff_head *fdefq = &l->failover_deferdq; struct tipc_msg *hdr = buf_msg(skb); struct sk_buff *iskb; @@@ -1260,56 -1245,40 +1265,56 @@@ int rc = 0; u16 seqno; - /* SYNCH_MSG */ - if (msg_type(hdr) == SYNCH_MSG) - goto drop; + if (msg_type(hdr) == SYNCH_MSG) { + kfree_skb(skb); + return 0; + } - /* FAILOVER_MSG */ - if (!tipc_msg_extract(skb, &iskb, &ipos)) { - pr_warn_ratelimited("Cannot extract FAILOVER_MSG, defq: %d\n", - skb_queue_len(fdefq)); - return rc; + /* Not a fragment? */ + if (likely(!msg_nof_fragms(hdr))) { + if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) { + pr_warn_ratelimited("Unable to extract msg, defq: %d\n", + skb_queue_len(fdefq)); + return 0; + } + kfree_skb(skb); + } else { + /* Set fragment type for buf_append */ + if (msg_fragm_no(hdr) == 1) + msg_set_type(hdr, FIRST_FRAGMENT); + else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr)) + msg_set_type(hdr, FRAGMENT); + else + msg_set_type(hdr, LAST_FRAGMENT); + + if (!tipc_buf_append(reasm_tnlmsg, &skb)) { + /* Successful but non-complete reassembly? */ + if (*reasm_tnlmsg || link_is_bc_rcvlink(l)) + return 0; + pr_warn_ratelimited("Unable to reassemble tunnel msg\n"); + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + iskb = skb; } do { seqno = buf_seqno(iskb); - if (unlikely(less(seqno, l->drop_point))) { kfree_skb(iskb); continue; } - if (unlikely(seqno != l->drop_point)) { __tipc_skb_queue_sorted(fdefq, seqno, iskb); continue; } l->drop_point++; - if (!tipc_data_input(l, iskb, inputq)) rc |= tipc_link_input(l, iskb, inputq, reasm_skb); if (unlikely(rc)) break; } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point))); -drop: - kfree_skb(skb); return rc; } @@@ -1393,12 -1362,10 +1398,10 @@@ static int tipc_link_advance_transmq(st struct tipc_msg *hdr; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u16 ack = l->rcv_nxt - 1; + bool passed = false; u16 seqno, n = 0; int rc = 0; - if (gap && link_retransmit_failure(l, l, acked + 1, &rc)) - return rc; - skb_queue_walk_safe(&l->transmq, skb, tmp) { seqno = buf_seqno(skb); @@@ -1408,12 -1375,17 +1411,17 @@@ next_gap_ack __skb_unlink(skb, &l->transmq); kfree_skb(skb); } else if (less_eq(seqno, acked + gap)) { - /* retransmit skb */ + /* First, check if repeated retrans failures occurs? */ + if (!passed && link_retransmit_failure(l, l, &rc)) + return rc; + passed = true; + + /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; - - _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); + _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, + GFP_ATOMIC); if (!_skb) continue; hdr = buf_msg(_skb); @@@ -1422,6 -1394,10 +1430,10 @@@ _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; + + /* Increase actual retrans counter & mark first time */ + if (!TIPC_SKB_CB(skb)->retr_cnt++) + TIPC_SKB_CB(skb)->retr_stamp = jiffies; } else { /* retry with Gap ACK blocks if any */ if (!ga || n >= ga->gack_cnt) @@@ -1668,7 -1644,7 +1680,7 @@@ void tipc_link_create_dummy_tnl_msg(str struct sk_buff *skb; u32 dnode = l->addr; - skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tnlq); skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, INT_H_SIZE, BASIC_H_SIZE, dnode, onode, 0, 0, 0); @@@ -1699,18 -1675,14 +1711,18 @@@ void tipc_link_tnl_prepare(struct tipc_ struct sk_buff *skb, *tnlskb; struct tipc_msg *hdr, tnlhdr; struct sk_buff_head *queue = &l->transmq; - struct sk_buff_head tmpxq, tnlq; + struct sk_buff_head tmpxq, tnlq, frags; u16 pktlen, pktcnt, seqno = l->snd_nxt; + bool pktcnt_need_update = false; + u16 syncpt; + int rc; if (!tnl) return; - skb_queue_head_init(&tnlq); - skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&frags); /* At least one packet required for safe algorithm => add dummy */ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, @@@ -1720,35 -1692,10 +1732,35 @@@ pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } - skb_queue_tail(&tnlq, skb); + __skb_queue_tail(&tnlq, skb); tipc_link_xmit(l, &tnlq, &tmpxq); __skb_queue_purge(&tmpxq); + /* Link Synching: + * From now on, send only one single ("dummy") SYNCH message + * to peer. The SYNCH message does not contain any data, just + * a header conveying the synch point to the peer. + */ + if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG, + INT_H_SIZE, 0, l->addr, + tipc_own_addr(l->net), + 0, 0, 0); + if (!tnlskb) { + pr_warn("%sunable to create dummy SYNCH_MSG\n", + link_co_err); + return; + } + + hdr = buf_msg(tnlskb); + syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1; + msg_set_syncpt(hdr, syncpt); + msg_set_bearer_id(hdr, l->peer_bearer_id); + __skb_queue_tail(&tnlq, tnlskb); + tipc_link_xmit(tnl, &tnlq, xmitq); + return; + } + /* Initialize reusable tunnel packet header */ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, mtyp, INT_H_SIZE, l->addr); @@@ -1766,39 -1713,6 +1778,39 @@@ tnl if (queue == &l->backlogq) msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); + + /* Tunnel link MTU is not large enough? This could be + * due to: + * 1) Link MTU has just changed or set differently; + * 2) Or FAILOVER on the top of a SYNCH message + * + * The 2nd case should not happen if peer supports + * TIPC_TUNNEL_ENHANCED + */ + if (pktlen > tnl->mtu - INT_H_SIZE) { + if (mtyp == FAILOVER_MSG && + (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu, + &frags); + if (rc) { + pr_warn("%sunable to frag msg: rc %d\n", + link_co_err, rc); + return; + } + pktcnt += skb_queue_len(&frags) - 1; + pktcnt_need_update = true; + skb_queue_splice_tail_init(&frags, &tnlq); + continue; + } + /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED + * => Just warn it and return! + */ + pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n", + link_co_err, msg_user(hdr), + msg_type(hdr), msg_size(hdr)); + return; + } + msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { @@@ -1814,12 -1728,6 +1826,12 @@@ goto tnl; } + if (pktcnt_need_update) + skb_queue_walk(&tnlq, skb) { + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, pktcnt); + } + tipc_link_xmit(tnl, &tnlq, xmitq); if (mtyp == FAILOVER_MSG) { @@@ -2681,7 -2589,7 +2693,7 @@@ int tipc_link_dump(struct tipc_link *l i += scnprintf(buf + i, sz - i, " %x", l->peer_caps); i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt); i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt); - i += scnprintf(buf + i, sz - i, " %u", l->prev_from); + i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", l->acked); diff --combined net/tipc/msg.h index 1c8c8dd32a4e,d7ebc9e955f6..0daa6f04ca81 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@@ -102,13 -102,15 +102,15 @@@ struct plist #define TIPC_MEDIA_INFO_OFFSET 5 struct tipc_skb_cb { - u32 bytes_read; - u32 orig_member; struct sk_buff *tail; unsigned long nxt_retr; - bool validated; + unsigned long retr_stamp; + u32 bytes_read; + u32 orig_member; u16 chain_imp; u16 ackers; + u16 retr_cnt; + bool validated; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) @@@ -721,26 -723,12 +723,26 @@@ static inline void msg_set_last_bcast(s msg_set_bits(m, 4, 16, 0xffff, n); } +static inline u32 msg_nof_fragms(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xffff); +} + +static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + +static inline u32 msg_fragm_no(struct tipc_msg *m) +{ + return msg_bits(m, 4, 16, 0xffff); +} + static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 16, 0xffff, n); } - static inline u16 msg_next_sent(struct tipc_msg *m) { return msg_bits(m, 4, 0, 0xffff); @@@ -891,16 -879,6 +893,16 @@@ static inline void msg_set_msgcnt(struc msg_set_bits(m, 9, 16, 0xffff, n); } +static inline u16 msg_syncpt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_syncpt(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + static inline u32 msg_conn_ack(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); @@@ -1059,8 -1037,6 +1061,8 @@@ bool tipc_msg_bundle(struct sk_buff *sk bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, u32 mtu, u32 dnode); bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, + int pktmax, struct sk_buff_head *frags); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); diff --combined net/tls/tls_device.c index d184230665eb,43922d86e510..a470df7ffcf9 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@@ -243,14 -243,14 +243,14 @@@ static void tls_append_frag(struct tls_ skb_frag_t *frag; frag = &record->frags[record->num_frags - 1]; - if (frag->page.p == pfrag->page && - frag->page_offset + frag->size == pfrag->offset) { - frag->size += size; + if (skb_frag_page(frag) == pfrag->page && + skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) { + skb_frag_size_add(frag, size); } else { ++frag; - frag->page.p = pfrag->page; - frag->page_offset = pfrag->offset; - frag->size = size; + __skb_frag_set_page(frag, pfrag->page); + skb_frag_off_set(frag, pfrag->offset); + skb_frag_size_set(frag, size); ++record->num_frags; get_page(pfrag->page); } @@@ -301,8 -301,8 +301,8 @@@ static int tls_push_record(struct sock frag = &record->frags[i]; sg_unmark_end(&offload_ctx->sg_tx_data[i]); sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag), - frag->size, frag->page_offset); - sk_mem_charge(sk, frag->size); + skb_frag_size(frag), skb_frag_off(frag)); + sk_mem_charge(sk, skb_frag_size(frag)); get_page(skb_frag_page(frag)); } sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]); @@@ -324,7 -324,7 +324,7 @@@ static int tls_create_new_record(struc frag = &record->frags[0]; __skb_frag_set_page(frag, pfrag->page); - frag->page_offset = pfrag->offset; + skb_frag_off_set(frag, pfrag->offset); skb_frag_size_set(frag, prepend_size); get_page(pfrag->page); @@@ -373,9 -373,9 +373,9 @@@ static int tls_push_data(struct sock *s struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); - int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE); struct tls_record_info *record = ctx->open_record; + int tls_push_record_flags; struct page_frag *pfrag; size_t orig_size = size; u32 max_open_record_len; @@@ -390,6 -390,9 +390,9 @@@ if (sk->sk_err) return -sk->sk_err; + flags |= MSG_SENDPAGE_DECRYPTED; + tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (tls_is_partially_sent_record(tls_ctx)) { rc = tls_push_partial_record(sk, tls_ctx, flags); @@@ -576,7 -579,9 +579,9 @@@ void tls_device_write_space(struct soc gfp_t sk_allocation = sk->sk_allocation; sk->sk_allocation = GFP_ATOMIC; - tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL); + tls_push_partial_record(sk, ctx, + MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_DECRYPTED); sk->sk_allocation = sk_allocation; } } diff --combined tools/include/uapi/linux/bpf.h index 4393bd4b2419,a5aa7d3ac6a1..0e66371bea13 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@@ -134,7 -134,6 +134,7 @@@ enum bpf_map_type BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_DEVMAP_HASH, }; /* Note that tracing related programs such as @@@ -1467,8 -1466,8 +1467,8 @@@ union bpf_attr * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. @@@ -2714,33 -2713,6 +2714,33 @@@ * **-EPERM** if no permission to send the *sig*. * * **-EAGAIN** if bpf program can try again. + * + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header. + * + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** SYN cookie cannot be issued due to error + * + * **-ENOENT** SYN cookie should not be issued (no SYN flood) + * + * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies + * + * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@@ -2852,8 -2824,7 +2852,8 @@@ FN(strtoul), \ FN(sk_storage_get), \ FN(sk_storage_delete), \ - FN(send_signal), + FN(send_signal), \ + FN(tcp_gen_syncookie), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@@ -3536,10 -3507,6 +3536,10 @@@ enum bpf_task_fd_type BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; +#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) +#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) +#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) + struct bpf_flow_keys { __u16 nhoff; __u16 thoff; @@@ -3561,8 -3528,6 +3561,8 @@@ __u32 ipv6_dst[4]; /* in6_addr; network order */ }; }; + __u32 flags; + __be32 flow_label; }; struct bpf_func_info { diff --combined tools/lib/bpf/libbpf.c index 2233f919dd88,2b57d7ea7836..e0276520171b --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@@ -39,7 -39,6 +39,7 @@@ #include #include #include +#include #include #include #include @@@ -49,7 -48,6 +49,7 @@@ #include "btf.h" #include "str_error.h" #include "libbpf_internal.h" +#include "hashmap.h" #ifndef EM_BPF #define EM_BPF 247 @@@ -77,12 -75,9 +77,12 @@@ static int __base_pr(enum libbpf_print_ static libbpf_print_fn_t __libbpf_pr = __base_pr; -void libbpf_set_print(libbpf_print_fn_t fn) +libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) { + libbpf_print_fn_t old_print_fn = __libbpf_pr; + __libbpf_pr = fn; + return old_print_fn; } __printf(2, 3) @@@ -187,7 -182,6 +187,6 @@@ struct bpf_program bpf_program_clear_priv_t clear_priv; enum bpf_attach_type expected_attach_type; - int btf_fd; void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@@ -318,7 -312,6 +317,6 @@@ void bpf_program__unload(struct bpf_pro prog->instances.nr = -1; zfree(&prog->instances.fds); - zclose(prog->btf_fd); zfree(&prog->func_info); zfree(&prog->line_info); } @@@ -397,7 -390,6 +395,6 @@@ bpf_program__init(void *data, size_t si prog->instances.fds = NULL; prog->instances.nr = -1; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->btf_fd = -1; return 0; errout: @@@ -1018,21 -1010,23 +1015,21 @@@ static int bpf_object__init_user_maps(s return 0; } -static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, - __u32 id) +static const struct btf_type * +skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) { const struct btf_type *t = btf__type_by_id(btf, id); - while (true) { - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - case BTF_KIND_TYPEDEF: - t = btf__type_by_id(btf, t->type); - break; - default: - return t; - } + if (res_id) + *res_id = id; + + while (btf_is_mod(t) || btf_is_typedef(t)) { + if (res_id) + *res_id = t->type; + t = btf__type_by_id(btf, t->type); } + + return t; } /* @@@ -1045,14 -1039,14 +1042,14 @@@ static bool get_map_field_int(const char *map_name, const struct btf *btf, const struct btf_type *def, const struct btf_member *m, __u32 *res) { - const struct btf_type *t = skip_mods_and_typedefs(btf, m->type); + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); const char *name = btf__name_by_offset(btf, m->name_off); const struct btf_array *arr_info; const struct btf_type *arr_t; - if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { + if (!btf_is_ptr(t)) { pr_warning("map '%s': attr '%s': expected PTR, got %u.\n", - map_name, name, BTF_INFO_KIND(t->info)); + map_name, name, btf_kind(t)); return false; } @@@ -1062,12 -1056,12 +1059,12 @@@ map_name, name, t->type); return false; } - if (BTF_INFO_KIND(arr_t->info) != BTF_KIND_ARRAY) { + if (!btf_is_array(arr_t)) { pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n", - map_name, name, BTF_INFO_KIND(arr_t->info)); + map_name, name, btf_kind(arr_t)); return false; } - arr_info = (const void *)(arr_t + 1); + arr_info = btf_array(arr_t); *res = arr_info->nelems; return true; } @@@ -1085,11 -1079,11 +1082,11 @@@ static int bpf_object__init_user_btf_ma struct bpf_map *map; int vlen, i; - vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx; + vi = btf_var_secinfos(sec) + var_idx; var = btf__type_by_id(obj->btf, vi->type); - var_extra = (const void *)(var + 1); + var_extra = btf_var(var); map_name = btf__name_by_offset(obj->btf, var->name_off); - vlen = BTF_INFO_VLEN(var->info); + vlen = btf_vlen(var); if (map_name == NULL || map_name[0] == '\0') { pr_warning("map #%d: empty name.\n", var_idx); @@@ -1099,9 -1093,9 +1096,9 @@@ pr_warning("map '%s' BTF data is corrupted.\n", map_name); return -EINVAL; } - if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) { + if (!btf_is_var(var)) { pr_warning("map '%s': unexpected var kind %u.\n", - map_name, BTF_INFO_KIND(var->info)); + map_name, btf_kind(var)); return -EINVAL; } if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && @@@ -1111,10 -1105,10 +1108,10 @@@ return -EOPNOTSUPP; } - def = skip_mods_and_typedefs(obj->btf, var->type); - if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) { + def = skip_mods_and_typedefs(obj->btf, var->type, NULL); + if (!btf_is_struct(def)) { pr_warning("map '%s': unexpected def kind %u.\n", - map_name, BTF_INFO_KIND(var->info)); + map_name, btf_kind(var)); return -EINVAL; } if (def->size > vi->size) { @@@ -1137,8 -1131,8 +1134,8 @@@ pr_debug("map '%s': at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset); - vlen = BTF_INFO_VLEN(def->info); - m = (const void *)(def + 1); + vlen = btf_vlen(def); + m = btf_members(def); for (i = 0; i < vlen; i++, m++) { const char *name = btf__name_by_offset(obj->btf, m->name_off); @@@ -1188,9 -1182,9 +1185,9 @@@ map_name, m->type); return -EINVAL; } - if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { + if (!btf_is_ptr(t)) { pr_warning("map '%s': key spec is not PTR: %u.\n", - map_name, BTF_INFO_KIND(t->info)); + map_name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); @@@ -1231,9 -1225,9 +1228,9 @@@ map_name, m->type); return -EINVAL; } - if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { + if (!btf_is_ptr(t)) { pr_warning("map '%s': value spec is not PTR: %u.\n", - map_name, BTF_INFO_KIND(t->info)); + map_name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); @@@ -1294,7 -1288,7 +1291,7 @@@ static int bpf_object__init_user_btf_ma nr_types = btf__get_nr_types(obj->btf); for (i = 1; i <= nr_types; i++) { t = btf__type_by_id(obj->btf, i); - if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC) + if (!btf_is_datasec(t)) continue; name = btf__name_by_offset(obj->btf, t->name_off); if (strcmp(name, MAPS_ELF_SEC) == 0) { @@@ -1308,7 -1302,7 +1305,7 @@@ return -ENOENT; } - vlen = BTF_INFO_VLEN(sec->info); + vlen = btf_vlen(sec); for (i = 0; i < vlen; i++) { err = bpf_object__init_user_btf_map(obj, sec, i, obj->efile.btf_maps_shndx, @@@ -1369,14 -1363,16 +1366,14 @@@ static void bpf_object__sanitize_btf(st struct btf *btf = obj->btf; struct btf_type *t; int i, j, vlen; - __u16 kind; if (!obj->btf || (has_func && has_datasec)) return; for (i = 1; i <= btf__get_nr_types(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); - kind = BTF_INFO_KIND(t->info); - if (!has_datasec && kind == BTF_KIND_VAR) { + if (!has_datasec && btf_is_var(t)) { /* replace VAR with INT */ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); /* @@@ -1385,11 -1381,11 +1382,11 @@@ * original variable took less than 4 bytes */ t->size = 1; - *(int *)(t+1) = BTF_INT_ENC(0, 0, 8); - } else if (!has_datasec && kind == BTF_KIND_DATASEC) { + *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); + } else if (!has_datasec && btf_is_datasec(t)) { /* replace DATASEC with STRUCT */ - struct btf_var_secinfo *v = (void *)(t + 1); - struct btf_member *m = (void *)(t + 1); + const struct btf_var_secinfo *v = btf_var_secinfos(t); + struct btf_member *m = btf_members(t); struct btf_type *vt; char *name; @@@ -1400,7 -1396,7 +1397,7 @@@ name++; } - vlen = BTF_INFO_VLEN(t->info); + vlen = btf_vlen(t); t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); for (j = 0; j < vlen; j++, v++, m++) { /* order of field assignments is important */ @@@ -1410,12 -1406,12 +1407,12 @@@ vt = (void *)btf__type_by_id(btf, v->type); m->name_off = vt->name_off; } - } else if (!has_func && kind == BTF_KIND_FUNC_PROTO) { + } else if (!has_func && btf_is_func_proto(t)) { /* replace FUNC_PROTO with ENUM */ - vlen = BTF_INFO_VLEN(t->info); + vlen = btf_vlen(t); t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); t->size = sizeof(__u32); /* kernel enforced */ - } else if (!has_func && kind == BTF_KIND_FUNC) { + } else if (!has_func && btf_is_func(t)) { /* replace FUNC with TYPEDEF */ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); } @@@ -1773,22 -1769,15 +1770,22 @@@ bpf_program__collect_reloc(struct bpf_p (long long) sym.st_value, sym.st_name, name); shdr_idx = sym.st_shndx; + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + pr_debug("relocation: insn_idx=%u, shdr_idx=%u\n", + insn_idx, shdr_idx); + + if (shdr_idx >= SHN_LORESERVE) { + pr_warning("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n", + name, shdr_idx, insn_idx, + insns[insn_idx].code); + return -LIBBPF_ERRNO__RELOC; + } if (!bpf_object__relo_in_known_section(obj, shdr_idx)) { pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n", prog->section_name, shdr_idx); return -LIBBPF_ERRNO__RELOC; } - insn_idx = rel.r_offset / sizeof(struct bpf_insn); - pr_debug("relocation: insn_idx=%u\n", insn_idx); - if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { pr_warning("incorrect bpf_call opcode\n"); @@@ -2296,900 -2285,9 +2293,897 @@@ bpf_program_reloc_btf_ext(struct bpf_pr prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); } - if (!insn_offset) - prog->btf_fd = btf__fd(obj->btf); - return 0; } +#define BPF_CORE_SPEC_MAX_LEN 64 + +/* represents BPF CO-RE field or array element accessor */ +struct bpf_core_accessor { + __u32 type_id; /* struct/union type or array element type */ + __u32 idx; /* field index or array index */ + const char *name; /* field name or NULL for array accessor */ +}; + +struct bpf_core_spec { + const struct btf *btf; + /* high-level spec: named fields and array indices only */ + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; + /* high-level spec length */ + int len; + /* raw, low-level spec: 1-to-1 with accessor spec string */ + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; + /* raw spec length */ + int raw_len; + /* field byte offset represented by spec */ + __u32 offset; +}; + +static bool str_is_empty(const char *s) +{ + return !s || !s[0]; +} + +/* + * Turn bpf_offset_reloc into a low- and high-level spec representation, + * validating correctness along the way, as well as calculating resulting + * field offset (in bytes), specified by accessor string. Low-level spec + * captures every single level of nestedness, including traversing anonymous + * struct/union members. High-level one only captures semantically meaningful + * "turning points": named fields and array indicies. + * E.g., for this case: + * + * struct sample { + * int __unimportant; + * struct { + * int __1; + * int __2; + * int a[7]; + * }; + * }; + * + * struct sample *s = ...; + * + * int x = &s->a[3]; // access string = '0:1:2:3' + * + * Low-level spec has 1:1 mapping with each element of access string (it's + * just a parsed access string representation): [0, 1, 2, 3]. + * + * High-level spec will capture only 3 points: + * - intial zero-index access by pointer (&s->... is the same as &s[0]...); + * - field 'a' access (corresponds to '2' in low-level spec); + * - array element #3 access (corresponds to '3' in low-level spec). + * + */ +static int bpf_core_spec_parse(const struct btf *btf, + __u32 type_id, + const char *spec_str, + struct bpf_core_spec *spec) +{ + int access_idx, parsed_len, i; + const struct btf_type *t; + const char *name; + __u32 id; + __s64 sz; + + if (str_is_empty(spec_str) || *spec_str == ':') + return -EINVAL; + + memset(spec, 0, sizeof(*spec)); + spec->btf = btf; + + /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ + while (*spec_str) { + if (*spec_str == ':') + ++spec_str; + if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) + return -EINVAL; + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + spec_str += parsed_len; + spec->raw_spec[spec->raw_len++] = access_idx; + } + + if (spec->raw_len == 0) + return -EINVAL; + + /* first spec value is always reloc type array index */ + t = skip_mods_and_typedefs(btf, type_id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[0]; + spec->spec[0].type_id = id; + spec->spec[0].idx = access_idx; + spec->len++; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->offset = access_idx * sz; + + for (i = 1; i < spec->raw_len; i++) { + t = skip_mods_and_typedefs(btf, id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[i]; + + if (btf_is_composite(t)) { + const struct btf_member *m; + __u32 offset; + + if (access_idx >= btf_vlen(t)) + return -EINVAL; + if (btf_member_bitfield_size(t, access_idx)) + return -EINVAL; + + offset = btf_member_bit_offset(t, access_idx); + if (offset % 8) + return -EINVAL; + spec->offset += offset / 8; + + m = btf_members(t) + access_idx; + if (m->name_off) { + name = btf__name_by_offset(btf, m->name_off); + if (str_is_empty(name)) + return -EINVAL; + + spec->spec[spec->len].type_id = id; + spec->spec[spec->len].idx = access_idx; + spec->spec[spec->len].name = name; + spec->len++; + } + + id = m->type; + } else if (btf_is_array(t)) { + const struct btf_array *a = btf_array(t); + + t = skip_mods_and_typedefs(btf, a->type, &id); + if (!t || access_idx >= a->nelems) + return -EINVAL; + + spec->spec[spec->len].type_id = id; + spec->spec[spec->len].idx = access_idx; + spec->len++; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->offset += access_idx * sz; + } else { + pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", + type_id, spec_str, i, id, btf_kind(t)); + return -EINVAL; + } + } + + return 0; +} + +static bool bpf_core_is_flavor_sep(const char *s) +{ + /* check X___Y name pattern, where X and Y are not underscores */ + return s[0] != '_' && /* X */ + s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ + s[4] != '_'; /* Y */ +} + +/* Given 'some_struct_name___with_flavor' return the length of a name prefix + * before last triple underscore. Struct name part after last triple + * underscore is ignored by BPF CO-RE relocation during relocation matching. + */ +static size_t bpf_core_essential_name_len(const char *name) +{ + size_t n = strlen(name); + int i; + + for (i = n - 5; i >= 0; i--) { + if (bpf_core_is_flavor_sep(name + i)) + return i + 1; + } + return n; +} + +/* dynamically sized list of type IDs */ +struct ids_vec { + __u32 *data; + int len; +}; + +static void bpf_core_free_cands(struct ids_vec *cand_ids) +{ + free(cand_ids->data); + free(cand_ids); +} + +static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, + __u32 local_type_id, + const struct btf *targ_btf) +{ + size_t local_essent_len, targ_essent_len; + const char *local_name, *targ_name; + const struct btf_type *t; + struct ids_vec *cand_ids; + __u32 *new_ids; + int i, err, n; + + t = btf__type_by_id(local_btf, local_type_id); + if (!t) + return ERR_PTR(-EINVAL); + + local_name = btf__name_by_offset(local_btf, t->name_off); + if (str_is_empty(local_name)) + return ERR_PTR(-EINVAL); + local_essent_len = bpf_core_essential_name_len(local_name); + + cand_ids = calloc(1, sizeof(*cand_ids)); + if (!cand_ids) + return ERR_PTR(-ENOMEM); + + n = btf__get_nr_types(targ_btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(targ_btf, i); + targ_name = btf__name_by_offset(targ_btf, t->name_off); + if (str_is_empty(targ_name)) + continue; + + targ_essent_len = bpf_core_essential_name_len(targ_name); + if (targ_essent_len != local_essent_len) + continue; + + if (strncmp(local_name, targ_name, local_essent_len) == 0) { + pr_debug("[%d] %s: found candidate [%d] %s\n", + local_type_id, local_name, i, targ_name); + new_ids = realloc(cand_ids->data, cand_ids->len + 1); + if (!new_ids) { + err = -ENOMEM; + goto err_out; + } + cand_ids->data = new_ids; + cand_ids->data[cand_ids->len++] = i; + } + } + return cand_ids; +err_out: + bpf_core_free_cands(cand_ids); + return ERR_PTR(err); +} + +/* Check two types for compatibility, skipping const/volatile/restrict and + * typedefs, to ensure we are relocating offset to the compatible entities: + * - any two STRUCTs/UNIONs are compatible and can be mixed; + * - any two FWDs are compatible; + * - any two PTRs are always compatible; + * - for ENUMs, check sizes, names are ignored; + * - for INT, size and bitness should match, signedness is ignored; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * - everything else shouldn't be ever a target of relocation. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ +static int bpf_core_fields_are_compat(const struct btf *local_btf, + __u32 local_id, + const struct btf *targ_btf, + __u32 targ_id) +{ + const struct btf_type *local_type, *targ_type; + +recur: + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (btf_is_composite(local_type) && btf_is_composite(targ_type)) + return 1; + if (btf_kind(local_type) != btf_kind(targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_FWD: + case BTF_KIND_PTR: + return 1; + case BTF_KIND_ENUM: + return local_type->size == targ_type->size; + case BTF_KIND_INT: + return btf_int_offset(local_type) == 0 && + btf_int_offset(targ_type) == 0 && + local_type->size == targ_type->size && + btf_int_bits(local_type) == btf_int_bits(targ_type); + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + default: + pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n", + btf_kind(local_type), local_id, targ_id); + return 0; + } +} + +/* + * Given single high-level named field accessor in local type, find + * corresponding high-level accessor for a target type. Along the way, + * maintain low-level spec for target as well. Also keep updating target + * offset. + * + * Searching is performed through recursive exhaustive enumeration of all + * fields of a struct/union. If there are any anonymous (embedded) + * structs/unions, they are recursively searched as well. If field with + * desired name is found, check compatibility between local and target types, + * before returning result. + * + * 1 is returned, if field is found. + * 0 is returned if no compatible field is found. + * <0 is returned on error. + */ +static int bpf_core_match_member(const struct btf *local_btf, + const struct bpf_core_accessor *local_acc, + const struct btf *targ_btf, + __u32 targ_id, + struct bpf_core_spec *spec, + __u32 *next_targ_id) +{ + const struct btf_type *local_type, *targ_type; + const struct btf_member *local_member, *m; + const char *local_name, *targ_name; + __u32 local_id; + int i, n, found; + + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!targ_type) + return -EINVAL; + if (!btf_is_composite(targ_type)) + return 0; + + local_id = local_acc->type_id; + local_type = btf__type_by_id(local_btf, local_id); + local_member = btf_members(local_type) + local_acc->idx; + local_name = btf__name_by_offset(local_btf, local_member->name_off); + + n = btf_vlen(targ_type); + m = btf_members(targ_type); + for (i = 0; i < n; i++, m++) { + __u32 offset; + + /* bitfield relocations not supported */ + if (btf_member_bitfield_size(targ_type, i)) + continue; + offset = btf_member_bit_offset(targ_type, i); + if (offset % 8) + continue; + + /* too deep struct/union/array nesting */ + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + /* speculate this member will be the good one */ + spec->offset += offset / 8; + spec->raw_spec[spec->raw_len++] = i; + + targ_name = btf__name_by_offset(targ_btf, m->name_off); + if (str_is_empty(targ_name)) { + /* embedded struct/union, we need to go deeper */ + found = bpf_core_match_member(local_btf, local_acc, + targ_btf, m->type, + spec, next_targ_id); + if (found) /* either found or error */ + return found; + } else if (strcmp(local_name, targ_name) == 0) { + /* matching named field */ + struct bpf_core_accessor *targ_acc; + + targ_acc = &spec->spec[spec->len++]; + targ_acc->type_id = targ_id; + targ_acc->idx = i; + targ_acc->name = targ_name; + + *next_targ_id = m->type; + found = bpf_core_fields_are_compat(local_btf, + local_member->type, + targ_btf, m->type); + if (!found) + spec->len--; /* pop accessor */ + return found; + } + /* member turned out not to be what we looked for */ + spec->offset -= offset / 8; + spec->raw_len--; + } + + return 0; +} + +/* + * Try to match local spec to a target type and, if successful, produce full + * target spec (high-level, low-level + offset). + */ +static int bpf_core_spec_match(struct bpf_core_spec *local_spec, + const struct btf *targ_btf, __u32 targ_id, + struct bpf_core_spec *targ_spec) +{ + const struct btf_type *targ_type; + const struct bpf_core_accessor *local_acc; + struct bpf_core_accessor *targ_acc; + int i, sz, matched; + + memset(targ_spec, 0, sizeof(*targ_spec)); + targ_spec->btf = targ_btf; + + local_acc = &local_spec->spec[0]; + targ_acc = &targ_spec->spec[0]; + + for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, + &targ_id); + if (!targ_type) + return -EINVAL; + + if (local_acc->name) { + matched = bpf_core_match_member(local_spec->btf, + local_acc, + targ_btf, targ_id, + targ_spec, &targ_id); + if (matched <= 0) + return matched; + } else { + /* for i=0, targ_id is already treated as array element + * type (because it's the original struct), for others + * we should find array element type first + */ + if (i > 0) { + const struct btf_array *a; + + if (!btf_is_array(targ_type)) + return 0; + + a = btf_array(targ_type); + if (local_acc->idx >= a->nelems) + return 0; + if (!skip_mods_and_typedefs(targ_btf, a->type, + &targ_id)) + return -EINVAL; + } + + /* too deep struct/union/array nesting */ + if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + targ_acc->type_id = targ_id; + targ_acc->idx = local_acc->idx; + targ_acc->name = NULL; + targ_spec->len++; + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; + targ_spec->raw_len++; + + sz = btf__resolve_size(targ_btf, targ_id); + if (sz < 0) + return sz; + targ_spec->offset += local_acc->idx * sz; + } + } + + return 1; +} + +/* + * Patch relocatable BPF instruction. + * Expected insn->imm value is provided for validation, as well as the new + * relocated value. + * + * Currently three kinds of BPF instructions are supported: + * 1. rX = (assignment with immediate operand); + * 2. rX += (arithmetic operations with immediate operand); + * 3. *(rX) = (indirect memory assignment with immediate operand). + * + * If actual insn->imm value is wrong, bail out. + */ +static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off, + __u32 orig_off, __u32 new_off) +{ + struct bpf_insn *insn; + int insn_idx; + __u8 class; + + if (insn_off % sizeof(struct bpf_insn)) + return -EINVAL; + insn_idx = insn_off / sizeof(struct bpf_insn); + + insn = &prog->insns[insn_idx]; + class = BPF_CLASS(insn->code); + + if (class == BPF_ALU || class == BPF_ALU64) { + if (BPF_SRC(insn->code) != BPF_K) + return -EINVAL; + if (insn->imm != orig_off) + return -EINVAL; + insn->imm = new_off; + pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n", + bpf_program__title(prog, false), + insn_idx, orig_off, new_off); + } else { + pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + bpf_program__title(prog, false), + insn_idx, insn->code, insn->src_reg, insn->dst_reg, + insn->off, insn->imm); + return -EINVAL; + } + return 0; +} + +static struct btf *btf_load_raw(const char *path) +{ + struct btf *btf; + size_t read_cnt; + struct stat st; + void *data; + FILE *f; + + if (stat(path, &st)) + return ERR_PTR(-errno); + + data = malloc(st.st_size); + if (!data) + return ERR_PTR(-ENOMEM); + + f = fopen(path, "rb"); + if (!f) { + btf = ERR_PTR(-errno); + goto cleanup; + } + + read_cnt = fread(data, 1, st.st_size, f); + fclose(f); + if (read_cnt < st.st_size) { + btf = ERR_PTR(-EBADF); + goto cleanup; + } + + btf = btf__new(data, read_cnt); + +cleanup: + free(data); + return btf; +} + +/* + * Probe few well-known locations for vmlinux kernel image and try to load BTF + * data out of it to use for target BTF. + */ +static struct btf *bpf_core_find_kernel_btf(void) +{ + struct { + const char *path_fmt; + bool raw_btf; + } locations[] = { + /* try canonical vmlinux BTF through sysfs first */ + { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, + /* fall back to trying to find vmlinux ELF on disk otherwise */ + { "/boot/vmlinux-%1$s" }, + { "/lib/modules/%1$s/vmlinux-%1$s" }, + { "/lib/modules/%1$s/build/vmlinux" }, + { "/usr/lib/modules/%1$s/kernel/vmlinux" }, + { "/usr/lib/debug/boot/vmlinux-%1$s" }, + { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, + { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, + }; + char path[PATH_MAX + 1]; + struct utsname buf; + struct btf *btf; + int i; + + uname(&buf); + + for (i = 0; i < ARRAY_SIZE(locations); i++) { + snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); + + if (access(path, R_OK)) + continue; + + if (locations[i].raw_btf) + btf = btf_load_raw(path); + else + btf = btf__parse_elf(path, NULL); + + pr_debug("loading kernel BTF '%s': %ld\n", + path, IS_ERR(btf) ? PTR_ERR(btf) : 0); + if (IS_ERR(btf)) + continue; + + return btf; + } + + pr_warning("failed to find valid kernel BTF\n"); + return ERR_PTR(-ESRCH); +} + +/* Output spec definition in the format: + * [] () + => @, + * where is a C-syntax view of recorded field access, e.g.: x.a[3].b + */ +static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) +{ + const struct btf_type *t; + const char *s; + __u32 type_id; + int i; + + type_id = spec->spec[0].type_id; + t = btf__type_by_id(spec->btf, type_id); + s = btf__name_by_offset(spec->btf, t->name_off); + libbpf_print(level, "[%u] %s + ", type_id, s); + + for (i = 0; i < spec->raw_len; i++) + libbpf_print(level, "%d%s", spec->raw_spec[i], + i == spec->raw_len - 1 ? " => " : ":"); + + libbpf_print(level, "%u @ &x", spec->offset); + + for (i = 0; i < spec->len; i++) { + if (spec->spec[i].name) + libbpf_print(level, ".%s", spec->spec[i].name); + else + libbpf_print(level, "[%u]", spec->spec[i].idx); + } + +} + +static size_t bpf_core_hash_fn(const void *key, void *ctx) +{ + return (size_t)key; +} + +static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx) +{ + return k1 == k2; +} + +static void *u32_as_hash_key(__u32 x) +{ + return (void *)(uintptr_t)x; +} + +/* + * CO-RE relocate single instruction. + * + * The outline and important points of the algorithm: + * 1. For given local type, find corresponding candidate target types. + * Candidate type is a type with the same "essential" name, ignoring + * everything after last triple underscore (___). E.g., `sample`, + * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates + * for each other. Names with triple underscore are referred to as + * "flavors" and are useful, among other things, to allow to + * specify/support incompatible variations of the same kernel struct, which + * might differ between different kernel versions and/or build + * configurations. + * + * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C + * converter, when deduplicated BTF of a kernel still contains more than + * one different types with the same name. In that case, ___2, ___3, etc + * are appended starting from second name conflict. But start flavors are + * also useful to be defined "locally", in BPF program, to extract same + * data from incompatible changes between different kernel + * versions/configurations. For instance, to handle field renames between + * kernel versions, one can use two flavors of the struct name with the + * same common name and use conditional relocations to extract that field, + * depending on target kernel version. + * 2. For each candidate type, try to match local specification to this + * candidate target type. Matching involves finding corresponding + * high-level spec accessors, meaning that all named fields should match, + * as well as all array accesses should be within the actual bounds. Also, + * types should be compatible (see bpf_core_fields_are_compat for details). + * 3. It is supported and expected that there might be multiple flavors + * matching the spec. As long as all the specs resolve to the same set of + * offsets across all candidates, there is not error. If there is any + * ambiguity, CO-RE relocation will fail. This is necessary to accomodate + * imprefection of BTF deduplication, which can cause slight duplication of + * the same BTF type, if some directly or indirectly referenced (by + * pointer) type gets resolved to different actual types in different + * object files. If such situation occurs, deduplicated BTF will end up + * with two (or more) structurally identical types, which differ only in + * types they refer to through pointer. This should be OK in most cases and + * is not an error. + * 4. Candidate types search is performed by linearly scanning through all + * types in target BTF. It is anticipated that this is overall more + * efficient memory-wise and not significantly worse (if not better) + * CPU-wise compared to prebuilding a map from all local type names to + * a list of candidate type names. It's also sped up by caching resolved + * list of matching candidates per each local "root" type ID, that has at + * least one bpf_offset_reloc associated with it. This list is shared + * between multiple relocations for the same type ID and is updated as some + * of the candidates are pruned due to structural incompatibility. + */ +static int bpf_core_reloc_offset(struct bpf_program *prog, + const struct bpf_offset_reloc *relo, + int relo_idx, + const struct btf *local_btf, + const struct btf *targ_btf, + struct hashmap *cand_cache) +{ + const char *prog_name = bpf_program__title(prog, false); + struct bpf_core_spec local_spec, cand_spec, targ_spec; + const void *type_key = u32_as_hash_key(relo->type_id); + const struct btf_type *local_type, *cand_type; + const char *local_name, *cand_name; + struct ids_vec *cand_ids; + __u32 local_id, cand_id; + const char *spec_str; + int i, j, err; + + local_id = relo->type_id; + local_type = btf__type_by_id(local_btf, local_id); + if (!local_type) + return -EINVAL; + + local_name = btf__name_by_offset(local_btf, local_type->name_off); + if (str_is_empty(local_name)) + return -EINVAL; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); + if (str_is_empty(spec_str)) + return -EINVAL; + + err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec); + if (err) { + pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", + prog_name, relo_idx, local_id, local_name, spec_str, + err); + return -EINVAL; + } + + pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx); + bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) { + cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); + if (IS_ERR(cand_ids)) { + pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", + prog_name, relo_idx, local_id, local_name, + PTR_ERR(cand_ids)); + return PTR_ERR(cand_ids); + } + err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); + if (err) { + bpf_core_free_cands(cand_ids); + return err; + } + } + + for (i = 0, j = 0; i < cand_ids->len; i++) { + cand_id = cand_ids->data[i]; + cand_type = btf__type_by_id(targ_btf, cand_id); + cand_name = btf__name_by_offset(targ_btf, cand_type->name_off); + + err = bpf_core_spec_match(&local_spec, targ_btf, + cand_id, &cand_spec); + pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ", + prog_name, relo_idx, i, cand_name); + bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); + libbpf_print(LIBBPF_DEBUG, ": %d\n", err); + if (err < 0) { + pr_warning("prog '%s': relo #%d: matching error: %d\n", + prog_name, relo_idx, err); + return err; + } + if (err == 0) + continue; + + if (j == 0) { + targ_spec = cand_spec; + } else if (cand_spec.offset != targ_spec.offset) { + /* if there are many candidates, they should all + * resolve to the same offset + */ + pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n", + prog_name, relo_idx, cand_spec.offset, + targ_spec.offset); + return -EINVAL; + } + + cand_ids->data[j++] = cand_spec.spec[0].type_id; + } + + cand_ids->len = j; + if (cand_ids->len == 0) { + pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", + prog_name, relo_idx, local_id, local_name, spec_str); + return -ESRCH; + } + + err = bpf_core_reloc_insn(prog, relo->insn_off, + local_spec.offset, targ_spec.offset); + if (err) { + pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", + prog_name, relo_idx, relo->insn_off, err); + return -EINVAL; + } + + return 0; +} + +static int +bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) +{ + const struct btf_ext_info_sec *sec; + const struct bpf_offset_reloc *rec; + const struct btf_ext_info *seg; + struct hashmap_entry *entry; + struct hashmap *cand_cache = NULL; + struct bpf_program *prog; + struct btf *targ_btf; + const char *sec_name; + int i, err = 0; + + if (targ_btf_path) + targ_btf = btf__parse_elf(targ_btf_path, NULL); + else + targ_btf = bpf_core_find_kernel_btf(); + if (IS_ERR(targ_btf)) { + pr_warning("failed to get target BTF: %ld\n", + PTR_ERR(targ_btf)); + return PTR_ERR(targ_btf); + } + + cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); + if (IS_ERR(cand_cache)) { + err = PTR_ERR(cand_cache); + goto out; + } + + seg = &obj->btf_ext->offset_reloc_info; + for_each_btf_ext_sec(seg, sec) { + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) { + err = -EINVAL; + goto out; + } + prog = bpf_object__find_program_by_title(obj, sec_name); + if (!prog) { + pr_warning("failed to find program '%s' for CO-RE offset relocation\n", + sec_name); + err = -EINVAL; + goto out; + } + + pr_debug("prog '%s': performing %d CO-RE offset relocs\n", + sec_name, sec->num_info); + + for_each_btf_ext_rec(seg, sec, i, rec) { + err = bpf_core_reloc_offset(prog, rec, i, obj->btf, + targ_btf, cand_cache); + if (err) { + pr_warning("prog '%s': relo #%d: failed to relocate: %d\n", + sec_name, i, err); + goto out; + } + } + } + +out: + btf__free(targ_btf); + if (!IS_ERR_OR_NULL(cand_cache)) { + hashmap__for_each_entry(cand_cache, entry, i) { + bpf_core_free_cands(entry->value); + } + hashmap__free(cand_cache); + } + return err; +} + +static int +bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) +{ + int err = 0; + + if (obj->btf_ext->offset_reloc_info.len) + err = bpf_core_reloc_offsets(obj, targ_btf_path); + + return err; +} + static int bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct reloc_desc *relo) @@@ -3297,21 -2395,14 +3291,21 @@@ bpf_program__relocate(struct bpf_progra return 0; } - static int -bpf_object__relocate(struct bpf_object *obj) +bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { struct bpf_program *prog; size_t i; int err; + if (obj->btf_ext) { + err = bpf_object__relocate_core(obj, targ_btf_path); + if (err) { + pr_warning("failed to perform CO-RE relocations: %d\n", + err); + return err; + } + } for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; @@@ -3366,7 -2457,7 +3360,7 @@@ load_program(struct bpf_program *prog, char *cp, errmsg[STRERR_BUFSIZE]; int log_buf_size = BPF_LOG_BUF_SIZE; char *log_buf; - int ret; + int btf_fd, ret; if (!insns || !insns_cnt) return -EINVAL; @@@ -3381,7 -2472,12 +3375,12 @@@ load_attr.license = license; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; + /* if .BTF.ext was loaded, kernel supports associated BTF for prog */ + if (prog->obj->btf_ext) + btf_fd = bpf_object__btf_fd(prog->obj); + else + btf_fd = -1; + load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = prog->func_info_cnt; @@@ -3712,7 -2808,7 +3711,7 @@@ int bpf_object__load_xattr(struct bpf_o obj->loaded = true; CHECK_ERR(bpf_object__create_maps(obj), err, out); - CHECK_ERR(bpf_object__relocate(obj), err, out); + CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out); CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out); return 0; @@@ -5903,13 -4999,15 +5902,15 @@@ int libbpf_num_possible_cpus(void static const char *fcpu = "/sys/devices/system/cpu/possible"; int len = 0, n = 0, il = 0, ir = 0; unsigned int start = 0, end = 0; + int tmp_cpus = 0; static int cpus; char buf[128]; int error = 0; int fd = -1; - if (cpus > 0) - return cpus; + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus; fd = open(fcpu, O_RDONLY); if (fd < 0) { @@@ -5932,7 -5030,7 +5933,7 @@@ } buf[len] = '\0'; - for (ir = 0, cpus = 0; ir <= len; ir++) { + for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ if (buf[ir] == ',' || buf[ir] == '\0') { buf[ir] = '\0'; @@@ -5944,13 -5042,15 +5945,15 @@@ } else if (n == 1) { end = start; } - cpus += end - start + 1; + tmp_cpus += end - start + 1; il = ir + 1; } } - if (cpus <= 0) { - pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu); + if (tmp_cpus <= 0) { + pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); return -EINVAL; } - return cpus; + + WRITE_ONCE(cpus, tmp_cpus); + return tmp_cpus; }