]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge tag 'mac80211-next-for-davem-2018-05-23' of git://git.kernel.org/pub/scm/linux...
authorDavid S. Miller <davem@davemloft.net>
Wed, 23 May 2018 19:53:00 +0000 (15:53 -0400)
committerDavid S. Miller <davem@davemloft.net>
Wed, 23 May 2018 19:53:00 +0000 (15:53 -0400)
Johannes Berg says:

For this round, we have various things all over the place, notably
 * a fix for a race in aggregation, which I want to let
   bake for a bit longer before sending to stable
 * some new statistics (ACK RSSI, TXQ)
 * TXQ configuration
 * preparations for HE, particularly radiotap
 * replace confusing "country IE" by "country element" since it's
   not referring to Ireland

Note that I merged net-next to get a fix from mac80211 that got
there via net, to apply one patch that would otherwise conflict.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
68 files changed:
Documentation/devicetree/bindings/net/dsa/qca8k.txt
Documentation/devicetree/bindings/net/sff,sfp.txt
drivers/net/bonding/bond_main.c
drivers/net/dsa/qca8k.c
drivers/net/dsa/qca8k.h
drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
drivers/net/ethernet/neterion/vxge/vxge-config.c
drivers/net/ethernet/neterion/vxge/vxge-config.h
drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
drivers/net/ethernet/neterion/vxge/vxge-main.c
drivers/net/ethernet/netronome/Kconfig
drivers/net/ethernet/netronome/nfp/Makefile
drivers/net/ethernet/netronome/nfp/abm/ctrl.c [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/abm/main.c [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/abm/main.h [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/nfp_abi.h [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/nfp_app.c
drivers/net/ethernet/netronome/nfp/nfp_app.h
drivers/net/ethernet/netronome/nfp/nfp_app_nic.c
drivers/net/ethernet/netronome/nfp/nfp_devlink.c
drivers/net/ethernet/netronome/nfp/nfp_main.c
drivers/net/ethernet/netronome/nfp/nfp_main.h
drivers/net/ethernet/netronome/nfp/nfp_net.h
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
drivers/net/ethernet/netronome/nfp/nfp_net_main.c
drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
drivers/net/ethernet/netronome/nfp/nfp_port.c
drivers/net/ethernet/netronome/nfp/nfp_port.h
drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/phy/sfp.c
drivers/net/team/team.c
fs/exec.c
include/linux/binfmts.h
include/linux/bpfilter.h [new file with mode: 0644]
include/linux/netdev_features.h
include/linux/umh.h
include/net/ip.h
include/uapi/linux/bpfilter.h [new file with mode: 0644]
include/uapi/linux/rtnetlink.h
kernel/umh.c
net/Kconfig
net/Makefile
net/bpfilter/Kconfig [new file with mode: 0644]
net/bpfilter/Makefile [new file with mode: 0644]
net/bpfilter/bpfilter_kern.c [new file with mode: 0644]
net/bpfilter/main.c [new file with mode: 0644]
net/bpfilter/msgfmt.h [new file with mode: 0644]
net/core/devlink.c
net/ipv4/Makefile
net/ipv4/bpfilter/Makefile [new file with mode: 0644]
net/ipv4/bpfilter/sockopt.c [new file with mode: 0644]
net/ipv4/fib_frontend.c
net/ipv4/ip_sockglue.c
net/ipv4/netlink.c [new file with mode: 0644]
net/ipv4/route.c
net/ipv4/udp.c
net/ipv6/route.c
net/ipv6/udp.c
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/fib_rule_tests.sh [new file with mode: 0755]
tools/testing/selftests/uevent/Makefile [new file with mode: 0644]
tools/testing/selftests/uevent/config [new file with mode: 0644]
tools/testing/selftests/uevent/uevent_filtering.c [new file with mode: 0644]

index 9c67ee4890d749af16ea27997e235f5f12594cb1..bbcb255c3150230978fba796b320a71c206ddbad 100644 (file)
@@ -2,7 +2,10 @@
 
 Required properties:
 
-- compatible: should be "qca,qca8337"
+- compatible: should be one of:
+    "qca,qca8334"
+    "qca,qca8337"
+
 - #size-cells: must be 0
 - #address-cells: must be 1
 
@@ -14,6 +17,20 @@ port and PHY id, each subnode describing a port needs to have a valid phandle
 referencing the internal PHY connected to it. The CPU port of this switch is
 always port 0.
 
+A CPU port node has the following optional node:
+
+- fixed-link            : Fixed-link subnode describing a link to a non-MDIO
+                          managed entity. See
+                          Documentation/devicetree/bindings/net/fixed-link.txt
+                          for details.
+
+For QCA8K the 'fixed-link' sub-node supports only the following properties:
+
+- 'speed' (integer, mandatory), to indicate the link speed. Accepted
+  values are 10, 100 and 1000
+- 'full-duplex' (boolean, optional), to indicate that full duplex is
+  used. When absent, half duplex is assumed.
+
 Example:
 
 
@@ -53,6 +70,10 @@ Example:
                                        label = "cpu";
                                        ethernet = <&gmac1>;
                                        phy-mode = "rgmii";
+                                       fixed-link {
+                                               speed = 1000;
+                                               full-duplex;
+                                       };
                                };
 
                                port@1 {
index 929591d52ed6670c321e3790cdc7d8c8b9787efa..832139919f20a38150c794b811839faa454c840e 100644 (file)
@@ -7,11 +7,11 @@ Required properties:
   "sff,sfp" for SFP modules
   "sff,sff" for soldered down SFF modules
 
-Optional Properties:
-
 - i2c-bus : phandle of an I2C bus controller for the SFP two wire serial
   interface
 
+Optional Properties:
+
 - mod-def0-gpios : GPIO phandle and a specifier of the MOD-DEF0 (AKA Mod_ABS)
   module presence input gpio signal, active (module absent) high. Must
   not be present for SFF modules
index 06efdf6a762b52df73d275a42cdaab5ffad1cc53..fea17b92b1aedc4a5b0699ba51cab9bfe1fac60a 100644 (file)
@@ -1107,7 +1107,8 @@ static void bond_compute_features(struct bonding *bond)
 
 done:
        bond_dev->vlan_features = vlan_features;
-       bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL;
+       bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
+                                   NETIF_F_GSO_UDP_L4;
        bond_dev->gso_max_segs = gso_max_segs;
        netif_set_gso_max_size(bond_dev, gso_max_size);
 
@@ -4268,7 +4269,7 @@ void bond_setup(struct net_device *bond_dev)
                                NETIF_F_HW_VLAN_CTAG_RX |
                                NETIF_F_HW_VLAN_CTAG_FILTER;
 
-       bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
+       bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
        bond_dev->features |= bond_dev->hw_features;
 }
 
index 757b6d90ea365b96150e6ac51a8503cf7cb3f24b..cdcde7f8e0b275362b759ba95fae337041d553e0 100644 (file)
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2009 Felix Fietkau <nbd@nbd.name>
  * Copyright (C) 2011-2012 Gabor Juhos <juhosg@openwrt.org>
  * Copyright (c) 2015, The Linux Foundation. All rights reserved.
  * Copyright (c) 2016 John Crispin <john@phrozen.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
@@ -473,10 +465,10 @@ qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode)
 static void
 qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable)
 {
-       u32 mask = QCA8K_PORT_STATUS_TXMAC;
+       u32 mask = QCA8K_PORT_STATUS_TXMAC | QCA8K_PORT_STATUS_RXMAC;
 
        /* Port 0 and 6 have no internal PHY */
-       if ((port > 0) && (port < 6))
+       if (port > 0 && port < 6)
                mask |= QCA8K_PORT_STATUS_LINK_AUTO;
 
        if (enable)
@@ -490,6 +482,7 @@ qca8k_setup(struct dsa_switch *ds)
 {
        struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
        int ret, i, phy_mode = -1;
+       u32 mask;
 
        /* Make sure that port 0 is the cpu port */
        if (!dsa_is_cpu_port(ds, 0)) {
@@ -515,7 +508,10 @@ qca8k_setup(struct dsa_switch *ds)
        if (ret < 0)
                return ret;
 
-       /* Enable CPU Port */
+       /* Enable CPU Port, force it to maximum bandwidth and full-duplex */
+       mask = QCA8K_PORT_STATUS_SPEED_1000 | QCA8K_PORT_STATUS_TXFLOW |
+              QCA8K_PORT_STATUS_RXFLOW | QCA8K_PORT_STATUS_DUPLEX;
+       qca8k_write(priv, QCA8K_REG_PORT_STATUS(QCA8K_CPU_PORT), mask);
        qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0,
                      QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN);
        qca8k_port_set_status(priv, QCA8K_CPU_PORT, 1);
@@ -583,6 +579,47 @@ qca8k_setup(struct dsa_switch *ds)
        return 0;
 }
 
+static void
+qca8k_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phy)
+{
+       struct qca8k_priv *priv = ds->priv;
+       u32 reg;
+
+       /* Force fixed-link setting for CPU port, skip others. */
+       if (!phy_is_pseudo_fixed_link(phy))
+               return;
+
+       /* Set port speed */
+       switch (phy->speed) {
+       case 10:
+               reg = QCA8K_PORT_STATUS_SPEED_10;
+               break;
+       case 100:
+               reg = QCA8K_PORT_STATUS_SPEED_100;
+               break;
+       case 1000:
+               reg = QCA8K_PORT_STATUS_SPEED_1000;
+               break;
+       default:
+               dev_dbg(priv->dev, "port%d link speed %dMbps not supported.\n",
+                       port, phy->speed);
+               return;
+       }
+
+       /* Set duplex mode */
+       if (phy->duplex == DUPLEX_FULL)
+               reg |= QCA8K_PORT_STATUS_DUPLEX;
+
+       /* Force flow control */
+       if (dsa_is_cpu_port(ds, port))
+               reg |= QCA8K_PORT_STATUS_RXFLOW | QCA8K_PORT_STATUS_TXFLOW;
+
+       /* Force link down before changing MAC options */
+       qca8k_port_set_status(priv, port, 0);
+       qca8k_write(priv, QCA8K_REG_PORT_STATUS(port), reg);
+       qca8k_port_set_status(priv, port, 1);
+}
+
 static int
 qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum)
 {
@@ -837,6 +874,7 @@ qca8k_get_tag_protocol(struct dsa_switch *ds, int port)
 static const struct dsa_switch_ops qca8k_switch_ops = {
        .get_tag_protocol       = qca8k_get_tag_protocol,
        .setup                  = qca8k_setup,
+       .adjust_link            = qca8k_adjust_link,
        .get_strings            = qca8k_get_strings,
        .phy_read               = qca8k_phy_read,
        .phy_write              = qca8k_phy_write,
@@ -868,6 +906,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
                return -ENOMEM;
 
        priv->bus = mdiodev->bus;
+       priv->dev = &mdiodev->dev;
 
        /* read the switches ID register */
        id = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
@@ -939,6 +978,7 @@ static SIMPLE_DEV_PM_OPS(qca8k_pm_ops,
                         qca8k_suspend, qca8k_resume);
 
 static const struct of_device_id qca8k_of_match[] = {
+       { .compatible = "qca,qca8334" },
        { .compatible = "qca,qca8337" },
        { /* sentinel */ },
 };
index 1cf8a920d4ffc5ed84b8fd0948fa088b2b3ebf1b..613fe5c50236c50cfbc6659b5a7d895b21409ec9 100644 (file)
 #define QCA8K_GOL_MAC_ADDR0                            0x60
 #define QCA8K_GOL_MAC_ADDR1                            0x64
 #define QCA8K_REG_PORT_STATUS(_i)                      (0x07c + (_i) * 4)
-#define   QCA8K_PORT_STATUS_SPEED                      GENMASK(2, 0)
-#define   QCA8K_PORT_STATUS_SPEED_S                    0
+#define   QCA8K_PORT_STATUS_SPEED                      GENMASK(1, 0)
+#define   QCA8K_PORT_STATUS_SPEED_10                   0
+#define   QCA8K_PORT_STATUS_SPEED_100                  0x1
+#define   QCA8K_PORT_STATUS_SPEED_1000                 0x2
 #define   QCA8K_PORT_STATUS_TXMAC                      BIT(2)
 #define   QCA8K_PORT_STATUS_RXMAC                      BIT(3)
 #define   QCA8K_PORT_STATUS_TXFLOW                     BIT(4)
@@ -165,6 +167,7 @@ struct qca8k_priv {
        struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS];
        struct dsa_switch *ds;
        struct mutex reg_mutex;
+       struct device *dev;
 };
 
 struct qca8k_mib_desc {
index adacc63991314b798545db3de88f33b0b8c94367..c7f8d0441278fb19b1283d49c54265075bf89cf2 100644 (file)
@@ -212,6 +212,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
        CH_PCI_ID_TABLE_FENTRY(0x6085), /* Custom T6240-SO */
        CH_PCI_ID_TABLE_FENTRY(0x6086), /* Custom T6225-SO-CR */
        CH_PCI_ID_TABLE_FENTRY(0x6087), /* Custom T6225-CR */
+       CH_PCI_ID_TABLE_FENTRY(0x6088), /* Custom T62100-CR */
+       CH_PCI_ID_TABLE_FENTRY(0x6089), /* Custom T62100-KR */
 CH_PCI_DEVICE_ID_TABLE_DEFINE_END;
 
 #endif /* __T4_PCI_ID_TBL_H__ */
index 6223930a8155e66878e1bde5645c5d31809e2a8b..c60da9e8bf143ceadace9e08dc2033a359f47253 100644 (file)
@@ -693,7 +693,7 @@ __vxge_hw_device_is_privilaged(u32 host_type, u32 func_id)
                VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)
                return VXGE_HW_OK;
        else
-               return VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+               return VXGE_HW_ERR_PRIVILEGED_OPERATION;
 }
 
 /*
@@ -1920,7 +1920,7 @@ enum vxge_hw_status vxge_hw_device_getpause_data(struct __vxge_hw_device *hldev,
        }
 
        if (!(hldev->access_rights & VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)) {
-               status = VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+               status = VXGE_HW_ERR_PRIVILEGED_OPERATION;
                goto exit;
        }
 
@@ -3153,7 +3153,7 @@ vxge_hw_mgmt_reg_read(struct __vxge_hw_device *hldev,
        case vxge_hw_mgmt_reg_type_mrpcim:
                if (!(hldev->access_rights &
                        VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)) {
-                       status = VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+                       status = VXGE_HW_ERR_PRIVILEGED_OPERATION;
                        break;
                }
                if (offset > sizeof(struct vxge_hw_mrpcim_reg) - 8) {
@@ -3165,7 +3165,7 @@ vxge_hw_mgmt_reg_read(struct __vxge_hw_device *hldev,
        case vxge_hw_mgmt_reg_type_srpcim:
                if (!(hldev->access_rights &
                        VXGE_HW_DEVICE_ACCESS_RIGHT_SRPCIM)) {
-                       status = VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+                       status = VXGE_HW_ERR_PRIVILEGED_OPERATION;
                        break;
                }
                if (index > VXGE_HW_TITAN_SRPCIM_REG_SPACES - 1) {
@@ -3279,7 +3279,7 @@ vxge_hw_mgmt_reg_write(struct __vxge_hw_device *hldev,
        case vxge_hw_mgmt_reg_type_mrpcim:
                if (!(hldev->access_rights &
                        VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)) {
-                       status = VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+                       status = VXGE_HW_ERR_PRIVILEGED_OPERATION;
                        break;
                }
                if (offset > sizeof(struct vxge_hw_mrpcim_reg) - 8) {
@@ -3291,7 +3291,7 @@ vxge_hw_mgmt_reg_write(struct __vxge_hw_device *hldev,
        case vxge_hw_mgmt_reg_type_srpcim:
                if (!(hldev->access_rights &
                        VXGE_HW_DEVICE_ACCESS_RIGHT_SRPCIM)) {
-                       status = VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+                       status = VXGE_HW_ERR_PRIVILEGED_OPERATION;
                        break;
                }
                if (index > VXGE_HW_TITAN_SRPCIM_REG_SPACES - 1) {
index cfa970417f818036bc83d6ed22deeb47cea28e90..d743a37a3cee8db94501ebe27bce03612df23be1 100644 (file)
@@ -127,7 +127,7 @@ enum vxge_hw_status {
        VXGE_HW_ERR_INVALID_TCODE                 = VXGE_HW_BASE_ERR + 14,
        VXGE_HW_ERR_INVALID_BLOCK_SIZE            = VXGE_HW_BASE_ERR + 15,
        VXGE_HW_ERR_INVALID_STATE                 = VXGE_HW_BASE_ERR + 16,
-       VXGE_HW_ERR_PRIVILAGED_OPEARATION         = VXGE_HW_BASE_ERR + 17,
+       VXGE_HW_ERR_PRIVILEGED_OPERATION          = VXGE_HW_BASE_ERR + 17,
        VXGE_HW_ERR_INVALID_PORT                  = VXGE_HW_BASE_ERR + 18,
        VXGE_HW_ERR_FIFO                          = VXGE_HW_BASE_ERR + 19,
        VXGE_HW_ERR_VPATH                         = VXGE_HW_BASE_ERR + 20,
index 0452848d1316b0b9e7c8b864eb9e200a6e5e2532..03c3d1230c17c94151ba6c8d28720bbc941cb9f7 100644 (file)
@@ -276,7 +276,7 @@ static void vxge_get_ethtool_stats(struct net_device *dev,
        *ptr++ = 0;
        status = vxge_hw_device_xmac_stats_get(hldev, xmac_stats);
        if (status != VXGE_HW_OK) {
-               if (status != VXGE_HW_ERR_PRIVILAGED_OPEARATION) {
+               if (status != VXGE_HW_ERR_PRIVILEGED_OPERATION) {
                        vxge_debug_init(VXGE_ERR,
                                "%s : %d Failure in getting xmac stats",
                                __func__, __LINE__);
index b2299f2b215502a7d5cbceb37a4d2292040e0d74..a8918bb7c8020807924e0bcf2a9b8c90afc03aa3 100644 (file)
@@ -3484,11 +3484,11 @@ static int vxge_device_register(struct __vxge_hw_device *hldev,
                                0,
                                &stat);
 
-       if (status == VXGE_HW_ERR_PRIVILAGED_OPEARATION)
+       if (status == VXGE_HW_ERR_PRIVILEGED_OPERATION)
                vxge_debug_init(
                        vxge_hw_device_trace_level_get(hldev),
                        "%s: device stats clear returns"
-                       "VXGE_HW_ERR_PRIVILAGED_OPEARATION", ndev->name);
+                       "VXGE_HW_ERR_PRIVILEGED_OPERATION", ndev->name);
 
        vxge_debug_entryexit(vxge_hw_device_trace_level_get(hldev),
                "%s: %s:%d  Exiting...",
index ae0c46ba7546d27cae8c531a93dfeee2fd821cd4..66f15b05b65e0606a04682757ac86c9eb082e76b 100644 (file)
@@ -36,6 +36,19 @@ config NFP_APP_FLOWER
          either directly, with Open vSwitch, or any other way.  Note that
          TC Flower offload requires specific FW to work.
 
+config NFP_APP_ABM_NIC
+       bool "NFP4000/NFP6000 Advanced buffer management NIC support"
+       depends on NFP
+       depends on NET_SWITCHDEV
+       default y
+       help
+         Enable driver support for Advanced buffer management NIC on NFP.
+         ABM NIC allows advanced configuration of queuing and scheduling
+         of packets, including ECN marking. Say Y, if you are planning to
+         use one of the NFP4000 and NFP6000 platforms which support this
+         functionality.
+         Code will be built into the nfp.ko driver.
+
 config NFP_DEBUG
        bool "Debug support for Netronome(R) NFP4000/NFP6000 NIC drivers"
        depends on NFP
index d5866d708dfa3f3a5dd3a5d28ad0c04a8f3d0313..6373f56205fdafe86c3ad82093dea0a9ea97e803 100644 (file)
@@ -30,6 +30,7 @@ nfp-objs := \
            nfp_net_sriov.o \
            nfp_netvf_main.o \
            nfp_port.o \
+           nfp_shared_buf.o \
            nic/main.o
 
 ifeq ($(CONFIG_NFP_APP_FLOWER),y)
@@ -52,4 +53,10 @@ nfp-objs += \
            bpf/jit.o
 endif
 
+ifeq ($(CONFIG_NFP_APP_ABM_NIC),y)
+nfp-objs += \
+           abm/ctrl.o \
+           abm/main.o
+endif
+
 nfp-$(CONFIG_NFP_DEBUG) += nfp_net_debugfs.o
diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
new file mode 100644 (file)
index 0000000..e40f6f0
--- /dev/null
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+
+#include "../nfpcore/nfp_cpp.h"
+#include "../nfp_app.h"
+#include "../nfp_main.h"
+#include "../nfp_net.h"
+#include "main.h"
+
+void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink)
+{
+       alink->queue_base = nn_readl(alink->vnic, NFP_NET_CFG_START_RXQ);
+       alink->queue_base /= alink->vnic->stride_rx;
+}
+
+int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm)
+{
+       struct nfp_pf *pf = abm->app->pf;
+       unsigned int pf_id;
+
+       pf_id = nfp_cppcore_pcie_unit(pf->cpp);
+       abm->pf_id = pf_id;
+
+       return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
new file mode 100644 (file)
index 0000000..5a12bb2
--- /dev/null
@@ -0,0 +1,399 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/etherdevice.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+
+#include "../nfpcore/nfp.h"
+#include "../nfpcore/nfp_cpp.h"
+#include "../nfpcore/nfp_nsp.h"
+#include "../nfp_app.h"
+#include "../nfp_main.h"
+#include "../nfp_net.h"
+#include "../nfp_net_repr.h"
+#include "../nfp_port.h"
+#include "main.h"
+
+static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id)
+{
+       return FIELD_PREP(NFP_ABM_PORTID_TYPE, rtype) |
+              FIELD_PREP(NFP_ABM_PORTID_ID, id);
+}
+
+static struct net_device *nfp_abm_repr_get(struct nfp_app *app, u32 port_id)
+{
+       enum nfp_repr_type rtype;
+       struct nfp_reprs *reprs;
+       u8 port;
+
+       rtype = FIELD_GET(NFP_ABM_PORTID_TYPE, port_id);
+       port = FIELD_GET(NFP_ABM_PORTID_ID, port_id);
+
+       reprs = rcu_dereference(app->reprs[rtype]);
+       if (!reprs)
+               return NULL;
+
+       if (port >= reprs->num_reprs)
+               return NULL;
+
+       return rcu_dereference(reprs->reprs[port]);
+}
+
+static int
+nfp_abm_spawn_repr(struct nfp_app *app, struct nfp_abm_link *alink,
+                  enum nfp_port_type ptype)
+{
+       struct net_device *netdev;
+       enum nfp_repr_type rtype;
+       struct nfp_reprs *reprs;
+       struct nfp_repr *repr;
+       struct nfp_port *port;
+       int err;
+
+       if (ptype == NFP_PORT_PHYS_PORT)
+               rtype = NFP_REPR_TYPE_PHYS_PORT;
+       else
+               rtype = NFP_REPR_TYPE_PF;
+
+       netdev = nfp_repr_alloc(app);
+       if (!netdev)
+               return -ENOMEM;
+       repr = netdev_priv(netdev);
+       repr->app_priv = alink;
+
+       port = nfp_port_alloc(app, ptype, netdev);
+       if (IS_ERR(port)) {
+               err = PTR_ERR(port);
+               goto err_free_repr;
+       }
+
+       if (ptype == NFP_PORT_PHYS_PORT) {
+               port->eth_forced = true;
+               err = nfp_port_init_phy_port(app->pf, app, port, alink->id);
+               if (err)
+                       goto err_free_port;
+       } else {
+               port->pf_id = alink->abm->pf_id;
+               port->pf_split = app->pf->max_data_vnics > 1;
+               port->pf_split_id = alink->id;
+               port->vnic = alink->vnic->dp.ctrl_bar;
+       }
+
+       SET_NETDEV_DEV(netdev, &alink->vnic->pdev->dev);
+       eth_hw_addr_random(netdev);
+
+       err = nfp_repr_init(app, netdev, nfp_abm_portid(rtype, alink->id),
+                           port, alink->vnic->dp.netdev);
+       if (err)
+               goto err_free_port;
+
+       reprs = nfp_reprs_get_locked(app, rtype);
+       WARN(nfp_repr_get_locked(app, reprs, alink->id), "duplicate repr");
+       rcu_assign_pointer(reprs->reprs[alink->id], netdev);
+
+       nfp_info(app->cpp, "%s Port %d Representor(%s) created\n",
+                ptype == NFP_PORT_PF_PORT ? "PCIe" : "Phys",
+                alink->id, netdev->name);
+
+       return 0;
+
+err_free_port:
+       nfp_port_free(port);
+err_free_repr:
+       nfp_repr_free(netdev);
+       return err;
+}
+
+static void
+nfp_abm_kill_repr(struct nfp_app *app, struct nfp_abm_link *alink,
+                 enum nfp_repr_type rtype)
+{
+       struct net_device *netdev;
+       struct nfp_reprs *reprs;
+
+       reprs = nfp_reprs_get_locked(app, rtype);
+       netdev = nfp_repr_get_locked(app, reprs, alink->id);
+       if (!netdev)
+               return;
+       rcu_assign_pointer(reprs->reprs[alink->id], NULL);
+       synchronize_rcu();
+       /* Cast to make sure nfp_repr_clean_and_free() takes a nfp_repr */
+       nfp_repr_clean_and_free((struct nfp_repr *)netdev_priv(netdev));
+}
+
+static void
+nfp_abm_kill_reprs(struct nfp_abm *abm, struct nfp_abm_link *alink)
+{
+       nfp_abm_kill_repr(abm->app, alink, NFP_REPR_TYPE_PF);
+       nfp_abm_kill_repr(abm->app, alink, NFP_REPR_TYPE_PHYS_PORT);
+}
+
+static void nfp_abm_kill_reprs_all(struct nfp_abm *abm)
+{
+       struct nfp_pf *pf = abm->app->pf;
+       struct nfp_net *nn;
+
+       list_for_each_entry(nn, &pf->vnics, vnic_list)
+               nfp_abm_kill_reprs(abm, (struct nfp_abm_link *)nn->app_priv);
+}
+
+static enum devlink_eswitch_mode nfp_abm_eswitch_mode_get(struct nfp_app *app)
+{
+       struct nfp_abm *abm = app->priv;
+
+       return abm->eswitch_mode;
+}
+
+static int nfp_abm_eswitch_set_legacy(struct nfp_abm *abm)
+{
+       nfp_abm_kill_reprs_all(abm);
+
+       abm->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+       return 0;
+}
+
+static void nfp_abm_eswitch_clean_up(struct nfp_abm *abm)
+{
+       if (abm->eswitch_mode != DEVLINK_ESWITCH_MODE_LEGACY)
+               WARN_ON(nfp_abm_eswitch_set_legacy(abm));
+}
+
+static int nfp_abm_eswitch_set_switchdev(struct nfp_abm *abm)
+{
+       struct nfp_app *app = abm->app;
+       struct nfp_pf *pf = app->pf;
+       struct nfp_net *nn;
+       int err;
+
+       list_for_each_entry(nn, &pf->vnics, vnic_list) {
+               struct nfp_abm_link *alink = nn->app_priv;
+
+               err = nfp_abm_spawn_repr(app, alink, NFP_PORT_PHYS_PORT);
+               if (err)
+                       goto err_kill_all_reprs;
+
+               err = nfp_abm_spawn_repr(app, alink, NFP_PORT_PF_PORT);
+               if (err)
+                       goto err_kill_all_reprs;
+       }
+
+       abm->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+       return 0;
+
+err_kill_all_reprs:
+       nfp_abm_kill_reprs_all(abm);
+       return err;
+}
+
+static int nfp_abm_eswitch_mode_set(struct nfp_app *app, u16 mode)
+{
+       struct nfp_abm *abm = app->priv;
+
+       if (abm->eswitch_mode == mode)
+               return 0;
+
+       switch (mode) {
+       case DEVLINK_ESWITCH_MODE_LEGACY:
+               return nfp_abm_eswitch_set_legacy(abm);
+       case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+               return nfp_abm_eswitch_set_switchdev(abm);
+       default:
+               return -EINVAL;
+       }
+}
+
+static void
+nfp_abm_vnic_set_mac(struct nfp_pf *pf, struct nfp_abm *abm, struct nfp_net *nn,
+                    unsigned int id)
+{
+       struct nfp_eth_table_port *eth_port = &pf->eth_tbl->ports[id];
+       u8 mac_addr[ETH_ALEN];
+       const char *mac_str;
+       char name[32];
+
+       if (id > pf->eth_tbl->count) {
+               nfp_warn(pf->cpp, "No entry for persistent MAC address\n");
+               eth_hw_addr_random(nn->dp.netdev);
+               return;
+       }
+
+       snprintf(name, sizeof(name), "eth%u.mac.pf%u",
+                eth_port->eth_index, abm->pf_id);
+
+       mac_str = nfp_hwinfo_lookup(pf->hwinfo, name);
+       if (!mac_str) {
+               nfp_warn(pf->cpp, "Can't lookup persistent MAC address (%s)\n",
+                        name);
+               eth_hw_addr_random(nn->dp.netdev);
+               return;
+       }
+
+       if (sscanf(mac_str, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx",
+                  &mac_addr[0], &mac_addr[1], &mac_addr[2],
+                  &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) {
+               nfp_warn(pf->cpp, "Can't parse persistent MAC address (%s)\n",
+                        mac_str);
+               eth_hw_addr_random(nn->dp.netdev);
+               return;
+       }
+
+       ether_addr_copy(nn->dp.netdev->dev_addr, mac_addr);
+       ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr);
+}
+
+static int
+nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
+{
+       struct nfp_eth_table_port *eth_port = &app->pf->eth_tbl->ports[id];
+       struct nfp_abm *abm = app->priv;
+       struct nfp_abm_link *alink;
+       int err;
+
+       alink = kzalloc(sizeof(*alink), GFP_KERNEL);
+       if (!alink)
+               return -ENOMEM;
+       nn->app_priv = alink;
+       alink->abm = abm;
+       alink->vnic = nn;
+       alink->id = id;
+
+       /* This is a multi-host app, make sure MAC/PHY is up, but don't
+        * make the MAC/PHY state follow the state of any of the ports.
+        */
+       err = nfp_eth_set_configured(app->cpp, eth_port->index, true);
+       if (err < 0)
+               goto err_free_alink;
+
+       netif_keep_dst(nn->dp.netdev);
+
+       nfp_abm_vnic_set_mac(app->pf, abm, nn, id);
+       nfp_abm_ctrl_read_params(alink);
+
+       return 0;
+
+err_free_alink:
+       kfree(alink);
+       return err;
+}
+
+static void nfp_abm_vnic_free(struct nfp_app *app, struct nfp_net *nn)
+{
+       struct nfp_abm_link *alink = nn->app_priv;
+
+       nfp_abm_kill_reprs(alink->abm, alink);
+       kfree(alink);
+}
+
+static int nfp_abm_init(struct nfp_app *app)
+{
+       struct nfp_pf *pf = app->pf;
+       struct nfp_reprs *reprs;
+       struct nfp_abm *abm;
+       int err;
+
+       if (!pf->eth_tbl) {
+               nfp_err(pf->cpp, "ABM NIC requires ETH table\n");
+               return -EINVAL;
+       }
+       if (pf->max_data_vnics != pf->eth_tbl->count) {
+               nfp_err(pf->cpp, "ETH entries don't match vNICs (%d vs %d)\n",
+                       pf->max_data_vnics, pf->eth_tbl->count);
+               return -EINVAL;
+       }
+       if (!pf->mac_stats_bar) {
+               nfp_warn(app->cpp, "ABM NIC requires mac_stats symbol\n");
+               return -EINVAL;
+       }
+
+       abm = kzalloc(sizeof(*abm), GFP_KERNEL);
+       if (!abm)
+               return -ENOMEM;
+       app->priv = abm;
+       abm->app = app;
+
+       err = nfp_abm_ctrl_find_addrs(abm);
+       if (err)
+               goto err_free_abm;
+
+       err = -ENOMEM;
+       reprs = nfp_reprs_alloc(pf->max_data_vnics);
+       if (!reprs)
+               goto err_free_abm;
+       RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PHYS_PORT], reprs);
+
+       reprs = nfp_reprs_alloc(pf->max_data_vnics);
+       if (!reprs)
+               goto err_free_phys;
+       RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PF], reprs);
+
+       return 0;
+
+err_free_phys:
+       nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+err_free_abm:
+       kfree(abm);
+       app->priv = NULL;
+       return err;
+}
+
+static void nfp_abm_clean(struct nfp_app *app)
+{
+       struct nfp_abm *abm = app->priv;
+
+       nfp_abm_eswitch_clean_up(abm);
+       nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
+       nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+       kfree(abm);
+       app->priv = NULL;
+}
+
+const struct nfp_app_type app_abm = {
+       .id             = NFP_APP_ACTIVE_BUFFER_MGMT_NIC,
+       .name           = "abm",
+
+       .init           = nfp_abm_init,
+       .clean          = nfp_abm_clean,
+
+       .vnic_alloc     = nfp_abm_vnic_alloc,
+       .vnic_free      = nfp_abm_vnic_free,
+
+       .eswitch_mode_get       = nfp_abm_eswitch_mode_get,
+       .eswitch_mode_set       = nfp_abm_eswitch_mode_set,
+
+       .repr_get       = nfp_abm_repr_get,
+};
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
new file mode 100644 (file)
index 0000000..5938b69
--- /dev/null
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_ABM_H__
+#define __NFP_ABM_H__ 1
+
+#include <net/devlink.h>
+
+struct nfp_app;
+struct nfp_net;
+
+#define NFP_ABM_PORTID_TYPE    GENMASK(23, 16)
+#define NFP_ABM_PORTID_ID      GENMASK(7, 0)
+
+/**
+ * struct nfp_abm - ABM NIC app structure
+ * @app:       back pointer to nfp_app
+ * @pf_id:     ID of our PF link
+ * @eswitch_mode:      devlink eswitch mode, advanced functions only visible
+ *                     in switchdev mode
+ */
+struct nfp_abm {
+       struct nfp_app *app;
+       unsigned int pf_id;
+       enum devlink_eswitch_mode eswitch_mode;
+};
+
+/**
+ * struct nfp_abm_link - port tuple of a ABM NIC
+ * @abm:       back pointer to nfp_abm
+ * @vnic:      data vNIC
+ * @id:                id of the data vNIC
+ * @queue_base:        id of base to host queue within PCIe (not QC idx)
+ */
+struct nfp_abm_link {
+       struct nfp_abm *abm;
+       struct nfp_net *vnic;
+       unsigned int id;
+       unsigned int queue_base;
+};
+
+void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
+int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_abi.h b/drivers/net/ethernet/netronome/nfp/nfp_abi.h
new file mode 100644 (file)
index 0000000..7ffa6e6
--- /dev/null
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_ABI__
+#define __NFP_ABI__ 1
+
+#include <linux/types.h>
+
+#define NFP_MBOX_SYM_NAME              "_abi_nfd_pf%u_mbox"
+#define NFP_MBOX_SYM_MIN_SIZE          16 /* When no data needed */
+
+#define NFP_MBOX_CMD           0x00
+#define NFP_MBOX_RET           0x04
+#define NFP_MBOX_DATA_LEN      0x08
+#define NFP_MBOX_RESERVED      0x0c
+#define NFP_MBOX_DATA          0x10
+
+/**
+ * enum nfp_mbox_cmd - PF mailbox commands
+ *
+ * @NFP_MBOX_NO_CMD:   null command
+ * Used to indicate previous command has finished.
+ *
+ * @NFP_MBOX_POOL_GET: get shared buffer pool info/config
+ * Input  - struct nfp_shared_buf_pool_id
+ * Output - struct nfp_shared_buf_pool_info_get
+ *
+ * @NFP_MBOX_POOL_SET: set shared buffer pool info/config
+ * Input  - struct nfp_shared_buf_pool_info_set
+ * Output - None
+ */
+enum nfp_mbox_cmd {
+       NFP_MBOX_NO_CMD                 = 0x00,
+
+       NFP_MBOX_POOL_GET               = 0x01,
+       NFP_MBOX_POOL_SET               = 0x02,
+};
+
+#define NFP_SHARED_BUF_COUNT_SYM_NAME  "_abi_nfd_pf%u_sb_cnt"
+#define NFP_SHARED_BUF_TABLE_SYM_NAME  "_abi_nfd_pf%u_sb_tbl"
+
+/**
+ * struct nfp_shared_buf - NFP shared buffer description
+ * @id:                                numerical user-visible id of the shared buffer
+ * @size:                      size in bytes of the buffer
+ * @ingress_pools_count:       number of ingress pools
+ * @egress_pools_count:                number of egress pools
+ * @ingress_tc_count:          number of ingress trafic classes
+ * @egress_tc_count:           number of egress trafic classes
+ * @pool_size_unit:            pool size may be in credits, each credit is
+ *                             @pool_size_unit bytes
+ */
+struct nfp_shared_buf {
+       __le32 id;
+       __le32 size;
+       __le16 ingress_pools_count;
+       __le16 egress_pools_count;
+       __le16 ingress_tc_count;
+       __le16 egress_tc_count;
+
+       __le32 pool_size_unit;
+};
+
+/**
+ * struct nfp_shared_buf_pool_id - shared buffer pool identification
+ * @shared_buf:                shared buffer id
+ * @pool:              pool index
+ */
+struct nfp_shared_buf_pool_id {
+       __le32 shared_buf;
+       __le32 pool;
+};
+
+/**
+ * struct nfp_shared_buf_pool_info_get - struct devlink_sb_pool_info mirror
+ * @pool_type:         one of enum devlink_sb_pool_type
+ * @size:              pool size in units of SB's @pool_size_unit
+ * @threshold_type:    one of enum devlink_sb_threshold_type
+ */
+struct nfp_shared_buf_pool_info_get {
+       __le32 pool_type;
+       __le32 size;
+       __le32 threshold_type;
+};
+
+/**
+ * struct nfp_shared_buf_pool_info_set - packed args of sb_pool_set
+ * @id:                        pool identification info
+ * @size:              pool size in units of SB's @pool_size_unit
+ * @threshold_type:    one of enum devlink_sb_threshold_type
+ */
+struct nfp_shared_buf_pool_info_set {
+       struct nfp_shared_buf_pool_id id;
+       __le32 size;
+       __le32 threshold_type;
+};
+
+#endif
index 0e0253c7e17b5e6f1ccb9ce4bd7f9ac87f67463a..c9d8a7ab311e536fc994091d964b8b92eb2ca778 100644 (file)
@@ -54,6 +54,9 @@ static const struct nfp_app_type *apps[] = {
 #ifdef CONFIG_NFP_APP_FLOWER
        [NFP_APP_FLOWER_NIC]    = &app_flower,
 #endif
+#ifdef CONFIG_NFP_APP_ABM_NIC
+       [NFP_APP_ACTIVE_BUFFER_MGMT_NIC] = &app_abm,
+#endif
 };
 
 struct nfp_app *nfp_app_from_netdev(struct net_device *netdev)
index 2d9cb2528fc7fb7bae771f13fbc5a4b1e65e13bb..23b99a4e05c20570a2cf3c5c067f6d9f8d34d2c6 100644 (file)
@@ -57,11 +57,13 @@ enum nfp_app_id {
        NFP_APP_CORE_NIC        = 0x1,
        NFP_APP_BPF_NIC         = 0x2,
        NFP_APP_FLOWER_NIC      = 0x3,
+       NFP_APP_ACTIVE_BUFFER_MGMT_NIC = 0x4,
 };
 
 extern const struct nfp_app_type app_nic;
 extern const struct nfp_app_type app_bpf;
 extern const struct nfp_app_type app_flower;
+extern const struct nfp_app_type app_abm;
 
 /**
  * struct nfp_app_type - application definition
@@ -95,6 +97,7 @@ extern const struct nfp_app_type app_flower;
  * @bpf:       BPF ndo offload-related calls
  * @xdp_offload:    offload an XDP program
  * @eswitch_mode_get:    get SR-IOV eswitch mode
+ * @eswitch_mode_set:    set SR-IOV eswitch mode (under pf->lock)
  * @sriov_enable: app-specific sriov initialisation
  * @sriov_disable: app-specific sriov clean-up
  * @repr_get:  get representor netdev
@@ -146,6 +149,7 @@ struct nfp_app_type {
        void (*sriov_disable)(struct nfp_app *app);
 
        enum devlink_eswitch_mode (*eswitch_mode_get)(struct nfp_app *app);
+       int (*eswitch_mode_set)(struct nfp_app *app, u16 mode);
        struct net_device *(*repr_get)(struct nfp_app *app, u32 id);
 };
 
@@ -370,6 +374,13 @@ static inline int nfp_app_eswitch_mode_get(struct nfp_app *app, u16 *mode)
        return 0;
 }
 
+static inline int nfp_app_eswitch_mode_set(struct nfp_app *app, u16 mode)
+{
+       if (!app->type->eswitch_mode_set)
+               return -EOPNOTSUPP;
+       return app->type->eswitch_mode_set(app, mode);
+}
+
 static inline int nfp_app_sriov_enable(struct nfp_app *app, int num_vfs)
 {
        if (!app || !app->type->sriov_enable)
@@ -410,5 +421,7 @@ void nfp_app_free(struct nfp_app *app);
 
 int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
                           unsigned int id);
+int nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
+                                  struct nfp_net *nn, unsigned int id);
 
 #endif
index b9618c37403f9abe70c2835b338e8ddc7a6a500c..e2dfe4f168bba36d816ba6ab0b537ea9edb2affa 100644 (file)
@@ -38,9 +38,8 @@
 #include "nfp_net.h"
 #include "nfp_port.h"
 
-static int
-nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
-                              struct nfp_net *nn, unsigned int id)
+int nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
+                                  struct nfp_net *nn, unsigned int id)
 {
        int err;
 
index b1e67cf4257a5d3b3d046c1fe2330983b4c8cdcf..71c2edd8303100eb62e5f6db909ee4f436a3f6a9 100644 (file)
@@ -149,6 +149,26 @@ nfp_devlink_port_unsplit(struct devlink *devlink, unsigned int port_index)
        return ret;
 }
 
+static int
+nfp_devlink_sb_pool_get(struct devlink *devlink, unsigned int sb_index,
+                       u16 pool_index, struct devlink_sb_pool_info *pool_info)
+{
+       struct nfp_pf *pf = devlink_priv(devlink);
+
+       return nfp_shared_buf_pool_get(pf, sb_index, pool_index, pool_info);
+}
+
+static int
+nfp_devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
+                       u16 pool_index,
+                       u32 size, enum devlink_sb_threshold_type threshold_type)
+{
+       struct nfp_pf *pf = devlink_priv(devlink);
+
+       return nfp_shared_buf_pool_set(pf, sb_index, pool_index,
+                                      size, threshold_type);
+}
+
 static int nfp_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 {
        struct nfp_pf *pf = devlink_priv(devlink);
@@ -156,10 +176,25 @@ static int nfp_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
        return nfp_app_eswitch_mode_get(pf->app, mode);
 }
 
+static int nfp_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+       struct nfp_pf *pf = devlink_priv(devlink);
+       int ret;
+
+       mutex_lock(&pf->lock);
+       ret = nfp_app_eswitch_mode_set(pf->app, mode);
+       mutex_unlock(&pf->lock);
+
+       return ret;
+}
+
 const struct devlink_ops nfp_devlink_ops = {
        .port_split             = nfp_devlink_port_split,
        .port_unsplit           = nfp_devlink_port_unsplit,
+       .sb_pool_get            = nfp_devlink_sb_pool_get,
+       .sb_pool_set            = nfp_devlink_sb_pool_set,
        .eswitch_mode_get       = nfp_devlink_eswitch_mode_get,
+       .eswitch_mode_set       = nfp_devlink_eswitch_mode_set,
 };
 
 int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port)
index 0ade122805ad1deb467035294a2f8939ae8a4139..46b76d5a726c6ade2c48c000172a3d9ba9db7253 100644 (file)
@@ -55,6 +55,7 @@
 
 #include "nfpcore/nfp6000_pcie.h"
 
+#include "nfp_abi.h"
 #include "nfp_app.h"
 #include "nfp_main.h"
 #include "nfp_net.h"
@@ -75,6 +76,122 @@ static const struct pci_device_id nfp_pci_device_ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, nfp_pci_device_ids);
 
+int nfp_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format,
+                              unsigned int default_val)
+{
+       char name[256];
+       int err = 0;
+       u64 val;
+
+       snprintf(name, sizeof(name), format, nfp_cppcore_pcie_unit(pf->cpp));
+
+       val = nfp_rtsym_read_le(pf->rtbl, name, &err);
+       if (err) {
+               if (err == -ENOENT)
+                       return default_val;
+               nfp_err(pf->cpp, "Unable to read symbol %s\n", name);
+               return err;
+       }
+
+       return val;
+}
+
+u8 __iomem *
+nfp_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt,
+                unsigned int min_size, struct nfp_cpp_area **area)
+{
+       char pf_symbol[256];
+
+       snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt,
+                nfp_cppcore_pcie_unit(pf->cpp));
+
+       return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area);
+}
+
+/* Callers should hold the devlink instance lock */
+int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length,
+                void *out_data, u64 out_length)
+{
+       unsigned long long addr;
+       unsigned long err_at;
+       u64 max_data_sz;
+       u32 val = 0;
+       u32 cpp_id;
+       int n, err;
+
+       if (!pf->mbox)
+               return -EOPNOTSUPP;
+
+       cpp_id = NFP_CPP_ISLAND_ID(pf->mbox->target, NFP_CPP_ACTION_RW, 0,
+                                  pf->mbox->domain);
+       addr = pf->mbox->addr;
+       max_data_sz = pf->mbox->size - NFP_MBOX_SYM_MIN_SIZE;
+
+       /* Check if cmd field is clear */
+       err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, &val);
+       if (err || val) {
+               nfp_warn(pf->cpp, "failed to issue command (%u): %u, err: %d\n",
+                        cmd, val, err);
+               return err ?: -EBUSY;
+       }
+
+       in_length = min(in_length, max_data_sz);
+       n = nfp_cpp_write(pf->cpp, cpp_id, addr + NFP_MBOX_DATA,
+                         in_data, in_length);
+       if (n != in_length)
+               return -EIO;
+       /* Write data_len and wipe reserved */
+       err = nfp_cpp_writeq(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN,
+                            in_length);
+       if (err)
+               return err;
+
+       /* Read back for ordering */
+       err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN, &val);
+       if (err)
+               return err;
+
+       /* Write cmd and wipe return value */
+       err = nfp_cpp_writeq(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, cmd);
+       if (err)
+               return err;
+
+       err_at = jiffies + 5 * HZ;
+       while (true) {
+               /* Wait for command to go to 0 (NFP_MBOX_NO_CMD) */
+               err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, &val);
+               if (err)
+                       return err;
+               if (!val)
+                       break;
+
+               if (time_is_before_eq_jiffies(err_at))
+                       return -ETIMEDOUT;
+
+               msleep(5);
+       }
+
+       /* Copy output if any (could be error info, do it before reading ret) */
+       err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN, &val);
+       if (err)
+               return err;
+
+       out_length = min_t(u32, val, min(out_length, max_data_sz));
+       n = nfp_cpp_read(pf->cpp, cpp_id, addr + NFP_MBOX_DATA,
+                        out_data, out_length);
+       if (n != out_length)
+               return -EIO;
+
+       /* Check if there is an error */
+       err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_RET, &val);
+       if (err)
+               return err;
+       if (val)
+               return -val;
+
+       return out_length;
+}
+
 static bool nfp_board_ready(struct nfp_pf *pf)
 {
        const char *cp;
@@ -436,6 +553,25 @@ static void nfp_fw_unload(struct nfp_pf *pf)
        nfp_nsp_close(nsp);
 }
 
+static int nfp_pf_find_rtsyms(struct nfp_pf *pf)
+{
+       char pf_symbol[256];
+       unsigned int pf_id;
+
+       pf_id = nfp_cppcore_pcie_unit(pf->cpp);
+
+       /* Optional per-PCI PF mailbox */
+       snprintf(pf_symbol, sizeof(pf_symbol), NFP_MBOX_SYM_NAME, pf_id);
+       pf->mbox = nfp_rtsym_lookup(pf->rtbl, pf_symbol);
+       if (pf->mbox && pf->mbox->size < NFP_MBOX_SYM_MIN_SIZE) {
+               nfp_err(pf->cpp, "PF mailbox symbol too small: %llu < %d\n",
+                       pf->mbox->size, NFP_MBOX_SYM_MIN_SIZE);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int nfp_pci_probe(struct pci_dev *pdev,
                         const struct pci_device_id *pci_id)
 {
@@ -510,6 +646,10 @@ static int nfp_pci_probe(struct pci_dev *pdev,
        pf->mip = nfp_mip_open(pf->cpp);
        pf->rtbl = __nfp_rtsym_table_read(pf->cpp, pf->mip);
 
+       err = nfp_pf_find_rtsyms(pf);
+       if (err)
+               goto err_fw_unload;
+
        pf->dump_flag = NFP_DUMP_NSP_DIAG;
        pf->dumpspec = nfp_net_dump_load_dumpspec(pf->cpp, pf->rtbl);
 
index 42211083b51f40a63c4957b2c5ab5b9e567ddba9..595b3dc280e3bb20b7f4db4bef0a8c2369c034e7 100644 (file)
 #include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/workqueue.h>
+#include <net/devlink.h>
 
 struct dentry;
 struct device;
-struct devlink_ops;
 struct pci_dev;
 
 struct nfp_cpp;
@@ -60,7 +60,9 @@ struct nfp_mip;
 struct nfp_net;
 struct nfp_nsp_identify;
 struct nfp_port;
+struct nfp_rtsym;
 struct nfp_rtsym_table;
+struct nfp_shared_buf;
 
 /**
  * struct nfp_dumpspec - NFP FW dump specification structure
@@ -87,6 +89,7 @@ struct nfp_dumpspec {
  * @vf_cfg_mem:                Pointer to mapped VF configuration area
  * @vfcfg_tbl2_area:   Pointer to the CPP area for the VF config table
  * @vfcfg_tbl2:                Pointer to mapped VF config table
+ * @mbox:              RTSym of per-PCI PF mailbox (under devlink lock)
  * @irq_entries:       Array of MSI-X entries for all vNICs
  * @limit_vfs:         Number of VFs supported by firmware (~0 for PCI limit)
  * @num_vfs:           Number of SR-IOV VFs enabled
@@ -108,6 +111,8 @@ struct nfp_dumpspec {
  * @ports:             Linked list of port structures (struct nfp_port)
  * @wq:                        Workqueue for running works which need to grab @lock
  * @port_refresh_work: Work entry for taking netdevs out
+ * @shared_bufs:       Array of shared buffer structures if FW has any SBs
+ * @num_shared_bufs:   Number of elements in @shared_bufs
  * @lock:              Protects all fields which may change after probe
  */
 struct nfp_pf {
@@ -127,6 +132,8 @@ struct nfp_pf {
        struct nfp_cpp_area *vfcfg_tbl2_area;
        u8 __iomem *vfcfg_tbl2;
 
+       const struct nfp_rtsym *mbox;
+
        struct msix_entry *irq_entries;
 
        unsigned int limit_vfs;
@@ -158,6 +165,9 @@ struct nfp_pf {
        struct workqueue_struct *wq;
        struct work_struct port_refresh_work;
 
+       struct nfp_shared_buf *shared_bufs;
+       unsigned int num_shared_bufs;
+
        struct mutex lock;
 };
 
@@ -177,6 +187,14 @@ nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev,
 
 bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
 
+int nfp_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format,
+                              unsigned int default_val);
+u8 __iomem *
+nfp_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt,
+                unsigned int min_size, struct nfp_cpp_area **area);
+int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length,
+                void *out_data, u64 out_length);
+
 enum nfp_dump_diag {
        NFP_DUMP_NSP_DIAG = 0,
 };
@@ -188,4 +206,11 @@ s64 nfp_net_dump_calculate_size(struct nfp_pf *pf, struct nfp_dumpspec *spec,
 int nfp_net_dump_populate_buffer(struct nfp_pf *pf, struct nfp_dumpspec *spec,
                                 struct ethtool_dump *dump_param, void *dest);
 
+int nfp_shared_buf_register(struct nfp_pf *pf);
+void nfp_shared_buf_unregister(struct nfp_pf *pf);
+int nfp_shared_buf_pool_get(struct nfp_pf *pf, unsigned int sb, u16 pool_index,
+                           struct devlink_sb_pool_info *pool_info);
+int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb,
+                           u16 pool_index, u32 size,
+                           enum devlink_sb_threshold_type threshold_type);
 #endif /* NFP_MAIN_H */
index bd7d8ae31e1733e3c3c75dc0b540823d0532652d..57cb035dcc6dc82afd7d0826e541c4b49e7745ac 100644 (file)
@@ -545,6 +545,7 @@ struct nfp_net_dp {
 /**
  * struct nfp_net - NFP network device structure
  * @dp:                        Datapath structure
+ * @id:                        vNIC id within the PF (0 for VFs)
  * @fw_ver:            Firmware version
  * @cap:                Capabilities advertised by the Firmware
  * @max_mtu:            Maximum support MTU advertised by the Firmware
@@ -597,6 +598,8 @@ struct nfp_net {
 
        struct nfp_net_fw_version fw_ver;
 
+       u32 id;
+
        u32 cap;
        u32 max_mtu;
 
@@ -909,7 +912,7 @@ int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new,
 void nfp_net_debugfs_create(void);
 void nfp_net_debugfs_destroy(void);
 struct dentry *nfp_net_debugfs_device_add(struct pci_dev *pdev);
-void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id);
+void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir);
 void nfp_net_debugfs_dir_clean(struct dentry **dir);
 #else
 static inline void nfp_net_debugfs_create(void)
@@ -926,7 +929,7 @@ static inline struct dentry *nfp_net_debugfs_device_add(struct pci_dev *pdev)
 }
 
 static inline void
-nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id)
+nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir)
 {
 }
 
index d9111c07769904a8506c3a43542a095a97905167..eea11e881bf53c52b7ce35478c675f869f6c375b 100644 (file)
@@ -3277,6 +3277,24 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
        return features;
 }
 
+static int
+nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
+{
+       struct nfp_net *nn = netdev_priv(netdev);
+       int n;
+
+       if (nn->port)
+               return nfp_port_get_phys_port_name(netdev, name, len);
+
+       if (!nn->dp.is_vf) {
+               n = snprintf(name, len, "%d", nn->id);
+               if (n >= len)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 /**
  * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW
  * @nn:   NFP Net device to reconfigure
@@ -3475,7 +3493,7 @@ const struct net_device_ops nfp_net_netdev_ops = {
        .ndo_set_mac_address    = nfp_net_set_mac_address,
        .ndo_set_features       = nfp_net_set_features,
        .ndo_features_check     = nfp_net_features_check,
-       .ndo_get_phys_port_name = nfp_port_get_phys_port_name,
+       .ndo_get_phys_port_name = nfp_net_get_phys_port_name,
        .ndo_udp_tunnel_add     = nfp_net_add_vxlan_port,
        .ndo_udp_tunnel_del     = nfp_net_del_vxlan_port,
        .ndo_bpf                = nfp_net_xdp,
index 67cdd8330c59bedafd887c459147e6779c281015..099b63d6745143179b7aee5be58be14a2c2dfe3f 100644 (file)
@@ -201,7 +201,7 @@ static const struct file_operations nfp_xdp_q_fops = {
        .llseek = seq_lseek
 };
 
-void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id)
+void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir)
 {
        struct dentry *queues, *tx, *rx, *xdp;
        char name[20];
@@ -211,7 +211,7 @@ void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id)
                return;
 
        if (nfp_net_is_data_vnic(nn))
-               sprintf(name, "vnic%d", id);
+               sprintf(name, "vnic%d", nn->id);
        else
                strcpy(name, "ctrl-vnic");
        nn->debugfs_dir = debugfs_create_dir(name, ddir);
index 45cd2092e498a3a07d94c8103455abffc7041993..28516eecccc8dea0b52797d2f338164781c1180e 100644 (file)
@@ -101,48 +101,15 @@ nfp_net_find_port(struct nfp_eth_table *eth_tbl, unsigned int index)
        return NULL;
 }
 
-static int
-nfp_net_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format,
-                              unsigned int default_val)
-{
-       char name[256];
-       int err = 0;
-       u64 val;
-
-       snprintf(name, sizeof(name), format, nfp_cppcore_pcie_unit(pf->cpp));
-
-       val = nfp_rtsym_read_le(pf->rtbl, name, &err);
-       if (err) {
-               if (err == -ENOENT)
-                       return default_val;
-               nfp_err(pf->cpp, "Unable to read symbol %s\n", name);
-               return err;
-       }
-
-       return val;
-}
-
 static int nfp_net_pf_get_num_ports(struct nfp_pf *pf)
 {
-       return nfp_net_pf_rtsym_read_optional(pf, "nfd_cfg_pf%u_num_ports", 1);
+       return nfp_pf_rtsym_read_optional(pf, "nfd_cfg_pf%u_num_ports", 1);
 }
 
 static int nfp_net_pf_get_app_id(struct nfp_pf *pf)
 {
-       return nfp_net_pf_rtsym_read_optional(pf, "_pf%u_net_app_id",
-                                             NFP_APP_CORE_NIC);
-}
-
-static u8 __iomem *
-nfp_net_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt,
-                    unsigned int min_size, struct nfp_cpp_area **area)
-{
-       char pf_symbol[256];
-
-       snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt,
-                nfp_cppcore_pcie_unit(pf->cpp));
-
-       return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area);
+       return nfp_pf_rtsym_read_optional(pf, "_pf%u_net_app_id",
+                                         NFP_APP_CORE_NIC);
 }
 
 static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn)
@@ -211,11 +178,13 @@ nfp_net_pf_init_vnic(struct nfp_pf *pf, struct nfp_net *nn, unsigned int id)
 {
        int err;
 
+       nn->id = id;
+
        err = nfp_net_init(nn);
        if (err)
                return err;
 
-       nfp_net_debugfs_vnic_add(nn, pf->ddir, id);
+       nfp_net_debugfs_vnic_add(nn, pf->ddir);
 
        if (nn->port) {
                err = nfp_devlink_port_register(pf->app, nn->port);
@@ -379,9 +348,8 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride)
        if (!nfp_app_needs_ctrl_vnic(pf->app))
                return 0;
 
-       ctrl_bar = nfp_net_pf_map_rtsym(pf, "net.ctrl", "_pf%u_net_ctrl_bar",
-                                       NFP_PF_CSR_SLICE_SIZE,
-                                       &pf->ctrl_vnic_bar);
+       ctrl_bar = nfp_pf_map_rtsym(pf, "net.ctrl", "_pf%u_net_ctrl_bar",
+                                   NFP_PF_CSR_SLICE_SIZE, &pf->ctrl_vnic_bar);
        if (IS_ERR(ctrl_bar)) {
                nfp_err(pf->cpp, "Failed to find ctrl vNIC memory symbol\n");
                err = PTR_ERR(ctrl_bar);
@@ -507,8 +475,8 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
        int err;
 
        min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
-       mem = nfp_net_pf_map_rtsym(pf, "net.bar0", "_pf%d_net_bar0",
-                                  min_size, &pf->data_vnic_bar);
+       mem = nfp_pf_map_rtsym(pf, "net.bar0", "_pf%d_net_bar0",
+                              min_size, &pf->data_vnic_bar);
        if (IS_ERR(mem)) {
                nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
                return PTR_ERR(mem);
@@ -528,10 +496,9 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
                }
        }
 
-       pf->vf_cfg_mem = nfp_net_pf_map_rtsym(pf, "net.vfcfg",
-                                             "_pf%d_net_vf_bar",
-                                             NFP_NET_CFG_BAR_SZ *
-                                             pf->limit_vfs, &pf->vf_cfg_bar);
+       pf->vf_cfg_mem = nfp_pf_map_rtsym(pf, "net.vfcfg", "_pf%d_net_vf_bar",
+                                         NFP_NET_CFG_BAR_SZ * pf->limit_vfs,
+                                         &pf->vf_cfg_bar);
        if (IS_ERR(pf->vf_cfg_mem)) {
                if (PTR_ERR(pf->vf_cfg_mem) != -ENOENT) {
                        err = PTR_ERR(pf->vf_cfg_mem);
@@ -541,9 +508,9 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
        }
 
        min_size = NFP_NET_VF_CFG_SZ * pf->limit_vfs + NFP_NET_VF_CFG_MB_SZ;
-       pf->vfcfg_tbl2 = nfp_net_pf_map_rtsym(pf, "net.vfcfg_tbl2",
-                                             "_pf%d_net_vf_cfg2",
-                                             min_size, &pf->vfcfg_tbl2_area);
+       pf->vfcfg_tbl2 = nfp_pf_map_rtsym(pf, "net.vfcfg_tbl2",
+                                         "_pf%d_net_vf_cfg2",
+                                         min_size, &pf->vfcfg_tbl2_area);
        if (IS_ERR(pf->vfcfg_tbl2)) {
                if (PTR_ERR(pf->vfcfg_tbl2) != -ENOENT) {
                        err = PTR_ERR(pf->vfcfg_tbl2);
@@ -763,6 +730,10 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
        if (err)
                goto err_app_clean;
 
+       err = nfp_shared_buf_register(pf);
+       if (err)
+               goto err_devlink_unreg;
+
        mutex_lock(&pf->lock);
        pf->ddir = nfp_net_debugfs_device_add(pf->pdev);
 
@@ -796,6 +767,8 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
 err_clean_ddir:
        nfp_net_debugfs_dir_clean(&pf->ddir);
        mutex_unlock(&pf->lock);
+       nfp_shared_buf_unregister(pf);
+err_devlink_unreg:
        cancel_work_sync(&pf->port_refresh_work);
        devlink_unregister(devlink);
 err_app_clean:
@@ -823,6 +796,7 @@ void nfp_net_pci_remove(struct nfp_pf *pf)
 
        mutex_unlock(&pf->lock);
 
+       nfp_shared_buf_unregister(pf);
        devlink_unregister(priv_to_devlink(pf));
 
        nfp_net_pf_free_irqs(pf);
index 6e79da91e475d3fccf99989ece63a865cfbea160..09e87d5f4f72cc060fffa05438b297382203df98 100644 (file)
@@ -385,7 +385,7 @@ struct net_device *nfp_repr_alloc(struct nfp_app *app)
        return NULL;
 }
 
-static void nfp_repr_clean_and_free(struct nfp_repr *repr)
+void nfp_repr_clean_and_free(struct nfp_repr *repr)
 {
        nfp_info(repr->app->cpp, "Destroying Representor(%s)\n",
                 repr->netdev->name);
index cd756a15445f29e777c43de80b1879bb596f150e..8366e4f3c62327b38d45cecd08bcd25c49baf0d5 100644 (file)
@@ -76,6 +76,7 @@ struct nfp_repr_pcpu_stats {
  * @port:      Port of representor
  * @app:       APP handle
  * @stats:     Statistic of packets hitting CPU
+ * @app_priv:  Pointer for APP data
  */
 struct nfp_repr {
        struct net_device *netdev;
@@ -83,6 +84,7 @@ struct nfp_repr {
        struct nfp_port *port;
        struct nfp_app *app;
        struct nfp_repr_pcpu_stats __percpu *stats;
+       void *app_priv;
 };
 
 /**
@@ -125,6 +127,7 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
                  struct net_device *pf_netdev);
 void nfp_repr_free(struct net_device *netdev);
 struct net_device *nfp_repr_alloc(struct nfp_app *app);
+void nfp_repr_clean_and_free(struct nfp_repr *repr);
 void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs);
 void nfp_reprs_clean_and_free_by_type(struct nfp_app *app,
                                      enum nfp_repr_type type);
index b802a1d554493060abec27f650692d8e64d9848c..68928c86b698eb4b2b69738eb1c2fdd033ac7c43 100644 (file)
@@ -283,7 +283,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 
        nfp_net_info(nn);
        vf->ddir = nfp_net_debugfs_device_add(pdev);
-       nfp_net_debugfs_vnic_add(nn, vf->ddir, 0);
+       nfp_net_debugfs_vnic_add(nn, vf->ddir);
 
        return 0;
 
index 7bd8be5c833b08f3416fff971e1101bf8791214d..9c1298114c70f801bced625870a4688e3eeda501 100644 (file)
@@ -181,7 +181,11 @@ nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
                                     eth_port->label_subport);
                break;
        case NFP_PORT_PF_PORT:
-               n = snprintf(name, len, "pf%d", port->pf_id);
+               if (!port->pf_split)
+                       n = snprintf(name, len, "pf%d", port->pf_id);
+               else
+                       n = snprintf(name, len, "pf%ds%d", port->pf_id,
+                                    port->pf_split_id);
                break;
        case NFP_PORT_VF_PORT:
                n = snprintf(name, len, "pf%dvf%d", port->pf_id, port->vf_id);
@@ -218,6 +222,8 @@ int nfp_port_configure(struct net_device *netdev, bool configed)
        eth_port = __nfp_port_get_eth_port(port);
        if (!eth_port)
                return 0;
+       if (port->eth_forced)
+               return 0;
 
        err = nfp_eth_set_configured(port->app->cpp, eth_port->index, configed);
        return err < 0 && err != -EOPNOTSUPP ? err : 0;
index fa7e669a969c6e5baa8aec30fedb410d29806793..18666750456e359b1f6c9103ccff54a3a41e498a 100644 (file)
@@ -77,10 +77,13 @@ enum nfp_port_flags {
  * @app:       backpointer to the app structure
  * @dl_port:   devlink port structure
  * @eth_id:    for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme
+ * @eth_forced:        for %NFP_PORT_PHYS_PORT port is forced UP or DOWN, don't change
  * @eth_port:  for %NFP_PORT_PHYS_PORT translated ETH Table port entry
  * @eth_stats: for %NFP_PORT_PHYS_PORT MAC stats if available
  * @pf_id:     for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT ID of the PCI PF (0-3)
  * @vf_id:     for %NFP_PORT_VF_PORT ID of the PCI VF within @pf_id
+ * @pf_split:  for %NFP_PORT_PF_PORT %true if PCI PF has more than one vNIC
+ * @pf_split_id:for %NFP_PORT_PF_PORT ID of PCI PF vNIC (valid if @pf_split)
  * @vnic:      for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT vNIC ctrl memory
  * @port_list: entry on pf's list of ports
  */
@@ -99,6 +102,7 @@ struct nfp_port {
                /* NFP_PORT_PHYS_PORT */
                struct {
                        unsigned int eth_id;
+                       bool eth_forced;
                        struct nfp_eth_table_port *eth_port;
                        u8 __iomem *eth_stats;
                };
@@ -106,6 +110,8 @@ struct nfp_port {
                struct {
                        unsigned int pf_id;
                        unsigned int vf_id;
+                       bool pf_split;
+                       unsigned int pf_split_id;
                        u8 __iomem *vnic;
                };
        };
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c b/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c
new file mode 100644 (file)
index 0000000..0ecd837
--- /dev/null
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <net/devlink.h>
+
+#include "nfpcore/nfp_cpp.h"
+#include "nfpcore/nfp_nffw.h"
+#include "nfp_abi.h"
+#include "nfp_app.h"
+#include "nfp_main.h"
+
+static u32 nfp_shared_buf_pool_unit(struct nfp_pf *pf, unsigned int sb)
+{
+       __le32 sb_id = cpu_to_le32(sb);
+       unsigned int i;
+
+       for (i = 0; i < pf->num_shared_bufs; i++)
+               if (pf->shared_bufs[i].id == sb_id)
+                       return le32_to_cpu(pf->shared_bufs[i].pool_size_unit);
+
+       WARN_ON_ONCE(1);
+       return 0;
+}
+
+int nfp_shared_buf_pool_get(struct nfp_pf *pf, unsigned int sb, u16 pool_index,
+                           struct devlink_sb_pool_info *pool_info)
+{
+       struct nfp_shared_buf_pool_info_get get_data;
+       struct nfp_shared_buf_pool_id id = {
+               .shared_buf     = cpu_to_le32(sb),
+               .pool           = cpu_to_le32(pool_index),
+       };
+       unsigned int unit_size;
+       int n;
+
+       unit_size = nfp_shared_buf_pool_unit(pf, sb);
+       if (!unit_size)
+               return -EINVAL;
+
+       n = nfp_mbox_cmd(pf, NFP_MBOX_POOL_GET, &id, sizeof(id),
+                        &get_data, sizeof(get_data));
+       if (n < 0)
+               return n;
+       if (n < sizeof(get_data))
+               return -EIO;
+
+       pool_info->pool_type = le32_to_cpu(get_data.pool_type);
+       pool_info->threshold_type = le32_to_cpu(get_data.threshold_type);
+       pool_info->size = le32_to_cpu(get_data.size) * unit_size;
+
+       return 0;
+}
+
+int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb,
+                           u16 pool_index, u32 size,
+                           enum devlink_sb_threshold_type threshold_type)
+{
+       struct nfp_shared_buf_pool_info_set set_data = {
+               .id = {
+                       .shared_buf     = cpu_to_le32(sb),
+                       .pool           = cpu_to_le32(pool_index),
+               },
+               .threshold_type = cpu_to_le32(threshold_type),
+       };
+       unsigned int unit_size;
+
+       unit_size = nfp_shared_buf_pool_unit(pf, sb);
+       if (!unit_size || size % unit_size)
+               return -EINVAL;
+       set_data.size = cpu_to_le32(size / unit_size);
+
+       return nfp_mbox_cmd(pf, NFP_MBOX_POOL_SET, &set_data, sizeof(set_data),
+                           NULL, 0);
+}
+
+int nfp_shared_buf_register(struct nfp_pf *pf)
+{
+       struct devlink *devlink = priv_to_devlink(pf);
+       unsigned int i, num_entries, entry_sz;
+       struct nfp_cpp_area *sb_desc_area;
+       u8 __iomem *sb_desc;
+       int n, err;
+
+       if (!pf->mbox)
+               return 0;
+
+       n = nfp_pf_rtsym_read_optional(pf, NFP_SHARED_BUF_COUNT_SYM_NAME, 0);
+       if (n <= 0)
+               return n;
+       num_entries = n;
+
+       sb_desc = nfp_pf_map_rtsym(pf, "sb_tbl", NFP_SHARED_BUF_TABLE_SYM_NAME,
+                                  num_entries * sizeof(pf->shared_bufs[0]),
+                                  &sb_desc_area);
+       if (IS_ERR(sb_desc))
+               return PTR_ERR(sb_desc);
+
+       entry_sz = nfp_cpp_area_size(sb_desc_area) / num_entries;
+
+       pf->shared_bufs = kmalloc_array(num_entries, sizeof(pf->shared_bufs[0]),
+                                       GFP_KERNEL);
+       if (!pf->shared_bufs) {
+               err = -ENOMEM;
+               goto err_release_area;
+       }
+
+       for (i = 0; i < num_entries; i++) {
+               struct nfp_shared_buf *sb = &pf->shared_bufs[i];
+
+               /* Entries may be larger in future FW */
+               memcpy_fromio(sb, sb_desc + i * entry_sz, sizeof(*sb));
+
+               err = devlink_sb_register(devlink,
+                                         le32_to_cpu(sb->id),
+                                         le32_to_cpu(sb->size),
+                                         le16_to_cpu(sb->ingress_pools_count),
+                                         le16_to_cpu(sb->egress_pools_count),
+                                         le16_to_cpu(sb->ingress_tc_count),
+                                         le16_to_cpu(sb->egress_tc_count));
+               if (err)
+                       goto err_unreg_prev;
+       }
+       pf->num_shared_bufs = num_entries;
+
+       nfp_cpp_area_release_free(sb_desc_area);
+
+       return 0;
+
+err_unreg_prev:
+       while (i--)
+               devlink_sb_unregister(devlink,
+                                     le32_to_cpu(pf->shared_bufs[i].id));
+       kfree(pf->shared_bufs);
+err_release_area:
+       nfp_cpp_area_release_free(sb_desc_area);
+       return err;
+}
+
+void nfp_shared_buf_unregister(struct nfp_pf *pf)
+{
+       struct devlink *devlink = priv_to_devlink(pf);
+       unsigned int i;
+
+       for (i = 0; i < pf->num_shared_bufs; i++)
+               devlink_sb_unregister(devlink,
+                                     le32_to_cpu(pf->shared_bufs[i].id));
+       kfree(pf->shared_bufs);
+}
index a0e336bd1d85798bb295bdc1be538bd4efb67e38..749655c329b240021a34e99612412626c26e8855 100644 (file)
@@ -933,7 +933,6 @@ static int nfp6000_area_read(struct nfp_cpp_area *area, void *kernel_vaddr,
        u32 *wrptr32 = kernel_vaddr;
        const u32 __iomem *rdptr32;
        int n, width;
-       bool is_64;
 
        priv = nfp_cpp_area_priv(area);
        rdptr64 = priv->iomem + offset;
@@ -943,10 +942,15 @@ static int nfp6000_area_read(struct nfp_cpp_area *area, void *kernel_vaddr,
                return -EFAULT;
 
        width = priv->width.read;
-
        if (width <= 0)
                return -EINVAL;
 
+       /* MU reads via a PCIe2CPP BAR support 32bit (and other) lengths */
+       if (priv->target == (NFP_CPP_TARGET_MU & NFP_CPP_TARGET_ID_MASK) &&
+           priv->action == NFP_CPP_ACTION_RW &&
+           (offset % sizeof(u64) == 4 || length % sizeof(u64) == 4))
+               width = TARGET_WIDTH_32;
+
        /* Unaligned? Translate to an explicit access */
        if ((priv->offset + offset) & (width - 1))
                return nfp_cpp_explicit_read(nfp_cpp_area_cpp(area),
@@ -956,36 +960,29 @@ static int nfp6000_area_read(struct nfp_cpp_area *area, void *kernel_vaddr,
                                             priv->offset + offset,
                                             kernel_vaddr, length, width);
 
-       is_64 = width == TARGET_WIDTH_64;
-
-       /* MU reads via a PCIe2CPP BAR supports 32bit (and other) lengths */
-       if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) &&
-           priv->action == NFP_CPP_ACTION_RW)
-               is_64 = false;
+       if (WARN_ON(!priv->bar))
+               return -EFAULT;
 
-       if (is_64) {
-               if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0)
-                       return -EINVAL;
-       } else {
+       switch (width) {
+       case TARGET_WIDTH_32:
                if (offset % sizeof(u32) != 0 || length % sizeof(u32) != 0)
                        return -EINVAL;
-       }
 
-       if (WARN_ON(!priv->bar))
-               return -EFAULT;
+               for (n = 0; n < length; n += sizeof(u32))
+                       *wrptr32++ = __raw_readl(rdptr32++);
+               return n;
+#ifdef __raw_readq
+       case TARGET_WIDTH_64:
+               if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0)
+                       return -EINVAL;
 
-       if (is_64)
-#ifndef __raw_readq
-               return -EINVAL;
-#else
                for (n = 0; n < length; n += sizeof(u64))
                        *wrptr64++ = __raw_readq(rdptr64++);
+               return n;
 #endif
-       else
-               for (n = 0; n < length; n += sizeof(u32))
-                       *wrptr32++ = __raw_readl(rdptr32++);
-
-       return n;
+       default:
+               return -EINVAL;
+       }
 }
 
 static int
@@ -999,7 +996,6 @@ nfp6000_area_write(struct nfp_cpp_area *area,
        struct nfp6000_area_priv *priv;
        u32 __iomem *wrptr32;
        int n, width;
-       bool is_64;
 
        priv = nfp_cpp_area_priv(area);
        wrptr64 = priv->iomem + offset;
@@ -1009,10 +1005,15 @@ nfp6000_area_write(struct nfp_cpp_area *area,
                return -EFAULT;
 
        width = priv->width.write;
-
        if (width <= 0)
                return -EINVAL;
 
+       /* MU writes via a PCIe2CPP BAR support 32bit (and other) lengths */
+       if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) &&
+           priv->action == NFP_CPP_ACTION_RW &&
+           (offset % sizeof(u64) == 4 || length % sizeof(u64) == 4))
+               width = TARGET_WIDTH_32;
+
        /* Unaligned? Translate to an explicit access */
        if ((priv->offset + offset) & (width - 1))
                return nfp_cpp_explicit_write(nfp_cpp_area_cpp(area),
@@ -1022,40 +1023,33 @@ nfp6000_area_write(struct nfp_cpp_area *area,
                                              priv->offset + offset,
                                              kernel_vaddr, length, width);
 
-       is_64 = width == TARGET_WIDTH_64;
-
-       /* MU writes via a PCIe2CPP BAR supports 32bit (and other) lengths */
-       if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) &&
-           priv->action == NFP_CPP_ACTION_RW)
-               is_64 = false;
+       if (WARN_ON(!priv->bar))
+               return -EFAULT;
 
-       if (is_64) {
-               if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0)
-                       return -EINVAL;
-       } else {
+       switch (width) {
+       case TARGET_WIDTH_32:
                if (offset % sizeof(u32) != 0 || length % sizeof(u32) != 0)
                        return -EINVAL;
-       }
 
-       if (WARN_ON(!priv->bar))
-               return -EFAULT;
+               for (n = 0; n < length; n += sizeof(u32)) {
+                       __raw_writel(*rdptr32++, wrptr32++);
+                       wmb();
+               }
+               return n;
+#ifdef __raw_writeq
+       case TARGET_WIDTH_64:
+               if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0)
+                       return -EINVAL;
 
-       if (is_64)
-#ifndef __raw_writeq
-               return -EINVAL;
-#else
                for (n = 0; n < length; n += sizeof(u64)) {
                        __raw_writeq(*rdptr64++, wrptr64++);
                        wmb();
                }
+               return n;
 #endif
-       else
-               for (n = 0; n < length; n += sizeof(u32)) {
-                       __raw_writel(*rdptr32++, wrptr32++);
-                       wmb();
-               }
-
-       return n;
+       default:
+               return -EINVAL;
+       }
 }
 
 struct nfp6000_explicit_priv {
index da07ccdf84bf1979fdd43dc3fc1d4d742b41c1af..60a5769ef5a1a8c2dacde6c9d2551f587d8e402a 100644 (file)
@@ -1618,8 +1618,24 @@ static int netvsc_set_ringparam(struct net_device *ndev,
        return ret;
 }
 
+static u32 netvsc_get_msglevel(struct net_device *ndev)
+{
+       struct net_device_context *ndev_ctx = netdev_priv(ndev);
+
+       return ndev_ctx->msg_enable;
+}
+
+static void netvsc_set_msglevel(struct net_device *ndev, u32 val)
+{
+       struct net_device_context *ndev_ctx = netdev_priv(ndev);
+
+       ndev_ctx->msg_enable = val;
+}
+
 static const struct ethtool_ops ethtool_ops = {
        .get_drvinfo    = netvsc_get_drvinfo,
+       .get_msglevel   = netvsc_get_msglevel,
+       .set_msglevel   = netvsc_set_msglevel,
        .get_link       = ethtool_op_get_link,
        .get_ethtool_stats = netvsc_get_ethtool_stats,
        .get_sset_count = netvsc_get_sset_count,
index 4ab6e9a50bbee22b48f748ef580a6bbe6f34a2d2..c4c92db86dfa8449e4416cdf8b1e0cc70f2c2d39 100644 (file)
@@ -976,6 +976,7 @@ static int sfp_probe(struct platform_device *pdev)
        if (pdev->dev.of_node) {
                struct device_node *node = pdev->dev.of_node;
                const struct of_device_id *id;
+               struct i2c_adapter *i2c;
                struct device_node *np;
 
                id = of_match_node(sfp_of_match, node);
@@ -985,19 +986,20 @@ static int sfp_probe(struct platform_device *pdev)
                sff = sfp->type = id->data;
 
                np = of_parse_phandle(node, "i2c-bus", 0);
-               if (np) {
-                       struct i2c_adapter *i2c;
-
-                       i2c = of_find_i2c_adapter_by_node(np);
-                       of_node_put(np);
-                       if (!i2c)
-                               return -EPROBE_DEFER;
-
-                       err = sfp_i2c_configure(sfp, i2c);
-                       if (err < 0) {
-                               i2c_put_adapter(i2c);
-                               return err;
-                       }
+               if (!np) {
+                       dev_err(sfp->dev, "missing 'i2c-bus' property\n");
+                       return -ENODEV;
+               }
+
+               i2c = of_find_i2c_adapter_by_node(np);
+               of_node_put(np);
+               if (!i2c)
+                       return -EPROBE_DEFER;
+
+               err = sfp_i2c_configure(sfp, i2c);
+               if (err < 0) {
+                       i2c_put_adapter(i2c);
+                       return err;
                }
        }
 
@@ -1065,6 +1067,15 @@ static int sfp_probe(struct platform_device *pdev)
        if (poll)
                mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
 
+       /* We could have an issue in cases no Tx disable pin is available or
+        * wired as modules using a laser as their light source will continue to
+        * be active when the fiber is removed. This could be a safety issue and
+        * we should at least warn the user about that.
+        */
+       if (!sfp->gpio[GPIO_TX_DISABLE])
+               dev_warn(sfp->dev,
+                        "No tx_disable pin: SFP modules will always be emitting.\n");
+
        return 0;
 }
 
index 9dbd390ace340811e356539fa988705cf49c8fd9..d6ff881165d07485511bbd3129dbc9d41740b4f4 100644 (file)
@@ -1026,7 +1026,8 @@ static void __team_compute_features(struct team *team)
        }
 
        team->dev->vlan_features = vlan_features;
-       team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL;
+       team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
+                                    NETIF_F_GSO_UDP_L4;
        team->dev->hard_header_len = max_hard_header_len;
 
        team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
@@ -2117,7 +2118,7 @@ static void team_setup(struct net_device *dev)
                           NETIF_F_HW_VLAN_CTAG_RX |
                           NETIF_F_HW_VLAN_CTAG_FILTER;
 
-       dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
+       dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
        dev->features |= dev->hw_features;
 }
 
index 183059c427b9c5552fc29d5eb01f90891930240f..30a36c2a39bfd5c48fe561e8e98ba95026e49eb6 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1706,14 +1706,13 @@ static int exec_binprm(struct linux_binprm *bprm)
 /*
  * sys_execve() executes a new program.
  */
-static int do_execveat_common(int fd, struct filename *filename,
-                             struct user_arg_ptr argv,
-                             struct user_arg_ptr envp,
-                             int flags)
+static int __do_execve_file(int fd, struct filename *filename,
+                           struct user_arg_ptr argv,
+                           struct user_arg_ptr envp,
+                           int flags, struct file *file)
 {
        char *pathbuf = NULL;
        struct linux_binprm *bprm;
-       struct file *file;
        struct files_struct *displaced;
        int retval;
 
@@ -1752,7 +1751,8 @@ static int do_execveat_common(int fd, struct filename *filename,
        check_unsafe_exec(bprm);
        current->in_execve = 1;
 
-       file = do_open_execat(fd, filename, flags);
+       if (!file)
+               file = do_open_execat(fd, filename, flags);
        retval = PTR_ERR(file);
        if (IS_ERR(file))
                goto out_unmark;
@@ -1760,7 +1760,9 @@ static int do_execveat_common(int fd, struct filename *filename,
        sched_exec();
 
        bprm->file = file;
-       if (fd == AT_FDCWD || filename->name[0] == '/') {
+       if (!filename) {
+               bprm->filename = "none";
+       } else if (fd == AT_FDCWD || filename->name[0] == '/') {
                bprm->filename = filename->name;
        } else {
                if (filename->name[0] == '\0')
@@ -1826,7 +1828,8 @@ static int do_execveat_common(int fd, struct filename *filename,
        task_numa_free(current);
        free_bprm(bprm);
        kfree(pathbuf);
-       putname(filename);
+       if (filename)
+               putname(filename);
        if (displaced)
                put_files_struct(displaced);
        return retval;
@@ -1849,10 +1852,27 @@ static int do_execveat_common(int fd, struct filename *filename,
        if (displaced)
                reset_files_struct(displaced);
 out_ret:
-       putname(filename);
+       if (filename)
+               putname(filename);
        return retval;
 }
 
+static int do_execveat_common(int fd, struct filename *filename,
+                             struct user_arg_ptr argv,
+                             struct user_arg_ptr envp,
+                             int flags)
+{
+       return __do_execve_file(fd, filename, argv, envp, flags, NULL);
+}
+
+int do_execve_file(struct file *file, void *__argv, void *__envp)
+{
+       struct user_arg_ptr argv = { .ptr.native = __argv };
+       struct user_arg_ptr envp = { .ptr.native = __envp };
+
+       return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
+}
+
 int do_execve(struct filename *filename,
        const char __user *const __user *__argv,
        const char __user *const __user *__envp)
index 4955e0863b83d456ba467f291d335b01bc1aa2a6..c05f24fac4f62533e77009509f53ed4f5a3eee2f 100644 (file)
@@ -150,5 +150,6 @@ extern int do_execveat(int, struct filename *,
                       const char __user * const __user *,
                       const char __user * const __user *,
                       int);
+int do_execve_file(struct file *file, void *__argv, void *__envp);
 
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
new file mode 100644 (file)
index 0000000..687b176
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BPFILTER_H
+#define _LINUX_BPFILTER_H
+
+#include <uapi/linux/bpfilter.h>
+
+struct sock;
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval,
+                           unsigned int optlen);
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval,
+                           int *optlen);
+extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
+                                      char __user *optval,
+                                      unsigned int optlen, bool is_set);
+#endif
index c87c3a3453c19fe8ae61c10daecc55c29399aae0..623bb8ced060046fdb2b856a147696ec1280be50 100644 (file)
@@ -220,7 +220,6 @@ enum {
                                 NETIF_F_GSO_GRE_CSUM |                 \
                                 NETIF_F_GSO_IPXIP4 |                   \
                                 NETIF_F_GSO_IPXIP6 |                   \
-                                NETIF_F_GSO_UDP_L4 |                   \
                                 NETIF_F_GSO_UDP_TUNNEL |               \
                                 NETIF_F_GSO_UDP_TUNNEL_CSUM)
 
index 244aff6382208824e64c6387a38bb60e967f0548..5c812acbb80ae0f9e9c8bb8fa329b3485596b9e8 100644 (file)
@@ -22,8 +22,10 @@ struct subprocess_info {
        const char *path;
        char **argv;
        char **envp;
+       struct file *file;
        int wait;
        int retval;
+       pid_t pid;
        int (*init)(struct subprocess_info *info, struct cred *new);
        void (*cleanup)(struct subprocess_info *info);
        void *data;
@@ -38,6 +40,16 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp,
                          int (*init)(struct subprocess_info *info, struct cred *new),
                          void (*cleanup)(struct subprocess_info *), void *data);
 
+struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
+                         int (*init)(struct subprocess_info *info, struct cred *new),
+                         void (*cleanup)(struct subprocess_info *), void *data);
+struct umh_info {
+       struct file *pipe_to_umh;
+       struct file *pipe_from_umh;
+       pid_t pid;
+};
+int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
+
 extern int
 call_usermodehelper_exec(struct subprocess_info *info, int wait);
 
index bada1f1f871e163b1c7d0434e5928242648cbfad..0d2281b4b27ac0804176c063de830663762ab980 100644 (file)
@@ -664,4 +664,7 @@ extern int sysctl_icmp_msgs_burst;
 int ip_misc_proc_init(void);
 #endif
 
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+                               struct netlink_ext_ack *extack);
+
 #endif /* _IP_H */
diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h
new file mode 100644 (file)
index 0000000..2ec3cc9
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_LINUX_BPFILTER_H
+#define _UAPI_LINUX_BPFILTER_H
+
+#include <linux/if.h>
+
+enum {
+       BPFILTER_IPT_SO_SET_REPLACE = 64,
+       BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65,
+       BPFILTER_IPT_SET_MAX,
+};
+
+enum {
+       BPFILTER_IPT_SO_GET_INFO = 64,
+       BPFILTER_IPT_SO_GET_ENTRIES = 65,
+       BPFILTER_IPT_SO_GET_REVISION_MATCH = 66,
+       BPFILTER_IPT_SO_GET_REVISION_TARGET = 67,
+       BPFILTER_IPT_GET_MAX,
+};
+
+#endif /* _UAPI_LINUX_BPFILTER_H */
index 9b15005955faaa244bfae5730a1cdcb3fd42b4a0..cabb210c93af650a272ca05333f86516aa7fed9b 100644 (file)
@@ -327,6 +327,9 @@ enum rtattr_type_t {
        RTA_PAD,
        RTA_UID,
        RTA_TTL_PROPAGATE,
+       RTA_IP_PROTO,
+       RTA_SPORT,
+       RTA_DPORT,
        __RTA_MAX
 };
 
index f76b3ff876cfc66db7f9c024ad08d07f8a353e6f..30db93fd7e39b60eae6fb98aa31d8dc8106f1e56 100644 (file)
@@ -25,6 +25,8 @@
 #include <linux/ptrace.h>
 #include <linux/async.h>
 #include <linux/uaccess.h>
+#include <linux/shmem_fs.h>
+#include <linux/pipe_fs_i.h>
 
 #include <trace/events/module.h>
 
@@ -97,9 +99,13 @@ static int call_usermodehelper_exec_async(void *data)
 
        commit_creds(new);
 
-       retval = do_execve(getname_kernel(sub_info->path),
-                          (const char __user *const __user *)sub_info->argv,
-                          (const char __user *const __user *)sub_info->envp);
+       if (sub_info->file)
+               retval = do_execve_file(sub_info->file,
+                                       sub_info->argv, sub_info->envp);
+       else
+               retval = do_execve(getname_kernel(sub_info->path),
+                                  (const char __user *const __user *)sub_info->argv,
+                                  (const char __user *const __user *)sub_info->envp);
 out:
        sub_info->retval = retval;
        /*
@@ -185,6 +191,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work)
                if (pid < 0) {
                        sub_info->retval = pid;
                        umh_complete(sub_info);
+               } else {
+                       sub_info->pid = pid;
                }
        }
 }
@@ -393,6 +401,117 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
 }
 EXPORT_SYMBOL(call_usermodehelper_setup);
 
+struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
+               int (*init)(struct subprocess_info *info, struct cred *new),
+               void (*cleanup)(struct subprocess_info *info), void *data)
+{
+       struct subprocess_info *sub_info;
+
+       sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL);
+       if (!sub_info)
+               return NULL;
+
+       INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
+       sub_info->path = "none";
+       sub_info->file = file;
+       sub_info->init = init;
+       sub_info->cleanup = cleanup;
+       sub_info->data = data;
+       return sub_info;
+}
+
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+{
+       struct umh_info *umh_info = info->data;
+       struct file *from_umh[2];
+       struct file *to_umh[2];
+       int err;
+
+       /* create pipe to send data to umh */
+       err = create_pipe_files(to_umh, 0);
+       if (err)
+               return err;
+       err = replace_fd(0, to_umh[0], 0);
+       fput(to_umh[0]);
+       if (err < 0) {
+               fput(to_umh[1]);
+               return err;
+       }
+
+       /* create pipe to receive data from umh */
+       err = create_pipe_files(from_umh, 0);
+       if (err) {
+               fput(to_umh[1]);
+               replace_fd(0, NULL, 0);
+               return err;
+       }
+       err = replace_fd(1, from_umh[1], 0);
+       fput(from_umh[1]);
+       if (err < 0) {
+               fput(to_umh[1]);
+               replace_fd(0, NULL, 0);
+               fput(from_umh[0]);
+               return err;
+       }
+
+       umh_info->pipe_to_umh = to_umh[1];
+       umh_info->pipe_from_umh = from_umh[0];
+       return 0;
+}
+
+static void umh_save_pid(struct subprocess_info *info)
+{
+       struct umh_info *umh_info = info->data;
+
+       umh_info->pid = info->pid;
+}
+
+/**
+ * fork_usermode_blob - fork a blob of bytes as a usermode process
+ * @data: a blob of bytes that can be do_execv-ed as a file
+ * @len: length of the blob
+ * @info: information about usermode process (shouldn't be NULL)
+ *
+ * Returns either negative error or zero which indicates success
+ * in executing a blob of bytes as a usermode process. In such
+ * case 'struct umh_info *info' is populated with two pipes
+ * and a pid of the process. The caller is responsible for health
+ * check of the user process, killing it via pid, and closing the
+ * pipes when user process is no longer needed.
+ */
+int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
+{
+       struct subprocess_info *sub_info;
+       struct file *file;
+       ssize_t written;
+       loff_t pos = 0;
+       int err;
+
+       file = shmem_kernel_file_setup("", len, 0);
+       if (IS_ERR(file))
+               return PTR_ERR(file);
+
+       written = kernel_write(file, data, len, &pos);
+       if (written != len) {
+               err = written;
+               if (err >= 0)
+                       err = -ENOMEM;
+               goto out;
+       }
+
+       err = -ENOMEM;
+       sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup,
+                                                 umh_save_pid, info);
+       if (!sub_info)
+               goto out;
+
+       err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+out:
+       fput(file);
+       return err;
+}
+EXPORT_SYMBOL_GPL(fork_usermode_blob);
+
 /**
  * call_usermodehelper_exec - start a usermode application
  * @sub_info: information about the subprocessa
index df8d45ef47d8701817b45b8848d9eb3dd52dcfdc..ba554cedb615fcd9e53c9605cf6b9c7d063ab96c 100644 (file)
@@ -202,6 +202,8 @@ source "net/bridge/netfilter/Kconfig"
 
 endif
 
+source "net/bpfilter/Kconfig"
+
 source "net/dccp/Kconfig"
 source "net/sctp/Kconfig"
 source "net/rds/Kconfig"
index 77aaddedbd29f7058d945f4fb1cac2de9f87e047..bdaf53925acd5606fdb953800620bd05cf0f259e 100644 (file)
@@ -20,6 +20,7 @@ obj-$(CONFIG_TLS)             += tls/
 obj-$(CONFIG_XFRM)             += xfrm/
 obj-$(CONFIG_UNIX)             += unix/
 obj-$(CONFIG_NET)              += ipv6/
+obj-$(CONFIG_BPFILTER)         += bpfilter/
 obj-$(CONFIG_PACKET)           += packet/
 obj-$(CONFIG_NET_KEY)          += key/
 obj-$(CONFIG_BRIDGE)           += bridge/
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
new file mode 100644 (file)
index 0000000..60725c5
--- /dev/null
@@ -0,0 +1,16 @@
+menuconfig BPFILTER
+       bool "BPF based packet filtering framework (BPFILTER)"
+       default n
+       depends on NET && BPF
+       help
+         This builds experimental bpfilter framework that is aiming to
+         provide netfilter compatible functionality via BPF
+
+if BPFILTER
+config BPFILTER_UMH
+       tristate "bpfilter kernel module with user mode helper"
+       default m
+       help
+         This builds bpfilter kernel module with embedded user mode helper
+endif
+
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
new file mode 100644 (file)
index 0000000..2af752c
--- /dev/null
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Linux BPFILTER layer.
+#
+
+hostprogs-y := bpfilter_umh
+bpfilter_umh-objs := main.o
+HOSTCFLAGS += -I. -Itools/include/
+ifeq ($(CONFIG_BPFILTER_UMH), y)
+# builtin bpfilter_umh should be compiled with -static
+# since rootfs isn't mounted at the time of __init
+# function is called and do_execv won't find elf interpreter
+HOSTLDFLAGS += -static
+endif
+
+# a bit of elf magic to convert bpfilter_umh binary into a binary blob
+# inside bpfilter_umh.o elf file referenced by
+# _binary_net_bpfilter_bpfilter_umh_start symbol
+# which bpfilter_kern.c passes further into umh blob loader at run-time
+quiet_cmd_copy_umh = GEN $@
+      cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
+      $(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \
+      -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \
+      --rename-section .data=.init.rodata $< $@
+
+$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh
+       $(call cmd,copy_umh)
+
+obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
+bpfilter-objs += bpfilter_kern.o bpfilter_umh.o
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
new file mode 100644 (file)
index 0000000..7596314
--- /dev/null
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/umh.h>
+#include <linux/bpfilter.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include "msgfmt.h"
+
+#define UMH_start _binary_net_bpfilter_bpfilter_umh_start
+#define UMH_end _binary_net_bpfilter_bpfilter_umh_end
+
+extern char UMH_start;
+extern char UMH_end;
+
+static struct umh_info info;
+/* since ip_getsockopt() can run in parallel, serialize access to umh */
+static DEFINE_MUTEX(bpfilter_lock);
+
+static void shutdown_umh(struct umh_info *info)
+{
+       struct task_struct *tsk;
+
+       tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID);
+       if (tsk)
+               force_sig(SIGKILL, tsk);
+       fput(info->pipe_to_umh);
+       fput(info->pipe_from_umh);
+}
+
+static void __stop_umh(void)
+{
+       if (bpfilter_process_sockopt) {
+               bpfilter_process_sockopt = NULL;
+               shutdown_umh(&info);
+       }
+}
+
+static void stop_umh(void)
+{
+       mutex_lock(&bpfilter_lock);
+       __stop_umh();
+       mutex_unlock(&bpfilter_lock);
+}
+
+static int __bpfilter_process_sockopt(struct sock *sk, int optname,
+                                     char __user *optval,
+                                     unsigned int optlen, bool is_set)
+{
+       struct mbox_request req;
+       struct mbox_reply reply;
+       loff_t pos;
+       ssize_t n;
+       int ret;
+
+       req.is_set = is_set;
+       req.pid = current->pid;
+       req.cmd = optname;
+       req.addr = (long)optval;
+       req.len = optlen;
+       mutex_lock(&bpfilter_lock);
+       n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos);
+       if (n != sizeof(req)) {
+               pr_err("write fail %zd\n", n);
+               __stop_umh();
+               ret = -EFAULT;
+               goto out;
+       }
+       pos = 0;
+       n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos);
+       if (n != sizeof(reply)) {
+               pr_err("read fail %zd\n", n);
+               __stop_umh();
+               ret = -EFAULT;
+               goto out;
+       }
+       ret = reply.status;
+out:
+       mutex_unlock(&bpfilter_lock);
+       return ret;
+}
+
+static int __init load_umh(void)
+{
+       int err;
+
+       /* fork usermode process */
+       err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info);
+       if (err)
+               return err;
+       pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
+
+       /* health check that usermode process started correctly */
+       if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) {
+               stop_umh();
+               return -EFAULT;
+       }
+       bpfilter_process_sockopt = &__bpfilter_process_sockopt;
+       return 0;
+}
+
+static void __exit fini_umh(void)
+{
+       stop_umh();
+}
+module_init(load_umh);
+module_exit(fini_umh);
+MODULE_LICENSE("GPL");
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
new file mode 100644 (file)
index 0000000..81bbc16
--- /dev/null
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/uio.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "include/uapi/linux/bpf.h"
+#include <asm/unistd.h>
+#include "msgfmt.h"
+
+int debug_fd;
+
+static int handle_get_cmd(struct mbox_request *cmd)
+{
+       switch (cmd->cmd) {
+       case 0:
+               return 0;
+       default:
+               break;
+       }
+       return -ENOPROTOOPT;
+}
+
+static int handle_set_cmd(struct mbox_request *cmd)
+{
+       return -ENOPROTOOPT;
+}
+
+static void loop(void)
+{
+       while (1) {
+               struct mbox_request req;
+               struct mbox_reply reply;
+               int n;
+
+               n = read(0, &req, sizeof(req));
+               if (n != sizeof(req)) {
+                       dprintf(debug_fd, "invalid request %d\n", n);
+                       return;
+               }
+
+               reply.status = req.is_set ?
+                       handle_set_cmd(&req) :
+                       handle_get_cmd(&req);
+
+               n = write(1, &reply, sizeof(reply));
+               if (n != sizeof(reply)) {
+                       dprintf(debug_fd, "reply failed %d\n", n);
+                       return;
+               }
+       }
+}
+
+int main(void)
+{
+       debug_fd = open("/dev/console", 00000002 | 00000100);
+       dprintf(debug_fd, "Started bpfilter\n");
+       loop();
+       close(debug_fd);
+       return 0;
+}
diff --git a/net/bpfilter/msgfmt.h b/net/bpfilter/msgfmt.h
new file mode 100644 (file)
index 0000000..98d121c
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_BPFILTER_MSGFMT_H
+#define _NET_BPFILTER_MSGFMT_H
+
+struct mbox_request {
+       __u64 addr;
+       __u32 len;
+       __u32 is_set;
+       __u32 cmd;
+       __u32 pid;
+};
+
+struct mbox_reply {
+       __u32 status;
+};
+
+#endif
index 5c8a40e1a01ebb143516baaac6eecec55d996d26..475246b355f090a082d23076596b7fc74f7b9fe7 100644 (file)
@@ -2756,7 +2756,8 @@ static const struct genl_ops devlink_nl_ops[] = {
                .doit = devlink_nl_cmd_eswitch_set_doit,
                .policy = devlink_nl_policy,
                .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NO_LOCK,
        },
        {
                .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
index b379520f91334b6ba4c9e8a1410acd8142fc742b..eec9569ffa5cb143ea4d4feb0ca7a73925ea33e5 100644 (file)
@@ -14,7 +14,9 @@ obj-y     := route.o inetpeer.o protocol.o \
             udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
             fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
             inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
-            metrics.o
+            metrics.o netlink.o
+
+obj-$(CONFIG_BPFILTER) += bpfilter/
 
 obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
 obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile
new file mode 100644 (file)
index 0000000..ce262d7
--- /dev/null
@@ -0,0 +1,2 @@
+obj-$(CONFIG_BPFILTER) += sockopt.o
+
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
new file mode 100644 (file)
index 0000000..42a96d2
--- /dev/null
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/uaccess.h>
+#include <linux/bpfilter.h>
+#include <uapi/linux/bpf.h>
+#include <linux/wait.h>
+#include <linux/kmod.h>
+
+int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
+                               char __user *optval,
+                               unsigned int optlen, bool is_set);
+EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
+
+int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval,
+                         unsigned int optlen, bool is_set)
+{
+       if (!bpfilter_process_sockopt) {
+               int err = request_module("bpfilter");
+
+               if (err)
+                       return err;
+               if (!bpfilter_process_sockopt)
+                       return -ECHILD;
+       }
+       return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set);
+}
+
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
+                           unsigned int optlen)
+{
+       return bpfilter_mbox_request(sk, optname, optval, optlen, true);
+}
+
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
+                           int __user *optlen)
+{
+       int len;
+
+       if (get_user(len, optlen))
+               return -EFAULT;
+
+       return bpfilter_mbox_request(sk, optname, optval, len, false);
+}
index 4d622112bf95fabd9d0e87c64f1e27138a3c1556..897ae92dff0fd7e118756a13b9cdc8466d284df0 100644 (file)
@@ -649,6 +649,9 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
        [RTA_ENCAP]             = { .type = NLA_NESTED },
        [RTA_UID]               = { .type = NLA_U32 },
        [RTA_MARK]              = { .type = NLA_U32 },
+       [RTA_IP_PROTO]          = { .type = NLA_U8 },
+       [RTA_SPORT]             = { .type = NLA_U16 },
+       [RTA_DPORT]             = { .type = NLA_U16 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
index 5ad2d8ed3a3fe2aa51d814af442df7ff5e074d3e..e0791faacb24167620bab304af8852c3edc118e3 100644 (file)
@@ -47,6 +47,8 @@
 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
 
+#include <linux/bpfilter.h>
+
 /*
  *     SOL_IP control messages.
  */
@@ -1244,6 +1246,11 @@ int ip_setsockopt(struct sock *sk, int level,
                return -ENOPROTOOPT;
 
        err = do_ip_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_BPFILTER
+       if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
+           optname < BPFILTER_IPT_SET_MAX)
+               err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
+#endif
 #ifdef CONFIG_NETFILTER
        /* we need to exclude all possible ENOPROTOOPTs except default case */
        if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
@@ -1552,6 +1559,11 @@ int ip_getsockopt(struct sock *sk, int level,
        int err;
 
        err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
+#ifdef CONFIG_BPFILTER
+       if (optname >= BPFILTER_IPT_SO_GET_INFO &&
+           optname < BPFILTER_IPT_GET_MAX)
+               err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
+#endif
 #ifdef CONFIG_NETFILTER
        /* we need to exclude all possible ENOPROTOOPTs except default case */
        if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
@@ -1584,6 +1596,11 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
        err = do_ip_getsockopt(sk, level, optname, optval, optlen,
                MSG_CMSG_COMPAT);
 
+#ifdef CONFIG_BPFILTER
+       if (optname >= BPFILTER_IPT_SO_GET_INFO &&
+           optname < BPFILTER_IPT_GET_MAX)
+               err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
+#endif
 #ifdef CONFIG_NETFILTER
        /* we need to exclude all possible ENOPROTOOPTs except default case */
        if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
diff --git a/net/ipv4/netlink.c b/net/ipv4/netlink.c
new file mode 100644 (file)
index 0000000..f86bb4f
--- /dev/null
@@ -0,0 +1,23 @@
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/types.h>
+#include <net/net_namespace.h>
+#include <net/netlink.h>
+#include <net/ip.h>
+
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+                               struct netlink_ext_ack *extack)
+{
+       *ip_proto = nla_get_u8(attr);
+
+       switch (*ip_proto) {
+       case IPPROTO_TCP:
+       case IPPROTO_UDP:
+       case IPPROTO_ICMP:
+               return 0;
+       default:
+               NL_SET_ERR_MSG(extack, "Unsupported ip proto");
+               return -EOPNOTSUPP;
+       }
+}
+EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto);
index 2cfa1b518f8d6368a563c1ae14d7dff7ce43e473..0e401dc4e1bdb6413f5e75c5fb1f11bac787574b 100644 (file)
@@ -2574,11 +2574,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
 EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
 /* called with rcu_read_lock held */
-static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
-                       struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
-                       u32 seq)
+static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
+                       struct rtable *rt, u32 table_id, struct flowi4 *fl4,
+                       struct sk_buff *skb, u32 portid, u32 seq)
 {
-       struct rtable *rt = skb_rtable(skb);
        struct rtmsg *r;
        struct nlmsghdr *nlh;
        unsigned long expires = 0;
@@ -2674,7 +2673,7 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
                        }
                } else
 #endif
-                       if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
+                       if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
                                goto nla_put_failure;
        }
 
@@ -2689,43 +2688,93 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
        return -EMSGSIZE;
 }
 
+static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
+                                                  u8 ip_proto, __be16 sport,
+                                                  __be16 dport)
+{
+       struct sk_buff *skb;
+       struct iphdr *iph;
+
+       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!skb)
+               return NULL;
+
+       /* Reserve room for dummy headers, this skb can pass
+        * through good chunk of routing engine.
+        */
+       skb_reset_mac_header(skb);
+       skb_reset_network_header(skb);
+       skb->protocol = htons(ETH_P_IP);
+       iph = skb_put(skb, sizeof(struct iphdr));
+       iph->protocol = ip_proto;
+       iph->saddr = src;
+       iph->daddr = dst;
+       iph->version = 0x4;
+       iph->frag_off = 0;
+       iph->ihl = 0x5;
+       skb_set_transport_header(skb, skb->len);
+
+       switch (iph->protocol) {
+       case IPPROTO_UDP: {
+               struct udphdr *udph;
+
+               udph = skb_put_zero(skb, sizeof(struct udphdr));
+               udph->source = sport;
+               udph->dest = dport;
+               udph->len = sizeof(struct udphdr);
+               udph->check = 0;
+               break;
+       }
+       case IPPROTO_TCP: {
+               struct tcphdr *tcph;
+
+               tcph = skb_put_zero(skb, sizeof(struct tcphdr));
+               tcph->source    = sport;
+               tcph->dest      = dport;
+               tcph->doff      = sizeof(struct tcphdr) / 4;
+               tcph->rst = 1;
+               tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
+                                           src, dst, 0);
+               break;
+       }
+       case IPPROTO_ICMP: {
+               struct icmphdr *icmph;
+
+               icmph = skb_put_zero(skb, sizeof(struct icmphdr));
+               icmph->type = ICMP_ECHO;
+               icmph->code = 0;
+       }
+       }
+
+       return skb;
+}
+
 static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                             struct netlink_ext_ack *extack)
 {
        struct net *net = sock_net(in_skb->sk);
-       struct rtmsg *rtm;
        struct nlattr *tb[RTA_MAX+1];
+       u32 table_id = RT_TABLE_MAIN;
+       __be16 sport = 0, dport = 0;
        struct fib_result res = {};
+       u8 ip_proto = IPPROTO_UDP;
        struct rtable *rt = NULL;
+       struct sk_buff *skb;
+       struct rtmsg *rtm;
        struct flowi4 fl4;
        __be32 dst = 0;
        __be32 src = 0;
+       kuid_t uid;
        u32 iif;
        int err;
        int mark;
-       struct sk_buff *skb;
-       u32 table_id = RT_TABLE_MAIN;
-       kuid_t uid;
 
        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
                          extack);
        if (err < 0)
-               goto errout;
+               return err;
 
        rtm = nlmsg_data(nlh);
-
-       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-       if (!skb) {
-               err = -ENOBUFS;
-               goto errout;
-       }
-
-       /* Reserve room for dummy headers, this skb can pass
-          through good chunk of routing engine.
-        */
-       skb_reset_mac_header(skb);
-       skb_reset_network_header(skb);
-
        src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
        dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
        iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
@@ -2735,14 +2784,22 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
        else
                uid = (iif ? INVALID_UID : current_uid());
 
-       /* Bugfix: need to give ip_route_input enough of an IP header to
-        * not gag.
-        */
-       ip_hdr(skb)->protocol = IPPROTO_UDP;
-       ip_hdr(skb)->saddr = src;
-       ip_hdr(skb)->daddr = dst;
+       if (tb[RTA_IP_PROTO]) {
+               err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
+                                                 &ip_proto, extack);
+               if (err)
+                       return err;
+       }
+
+       if (tb[RTA_SPORT])
+               sport = nla_get_be16(tb[RTA_SPORT]);
 
-       skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+       if (tb[RTA_DPORT])
+               dport = nla_get_be16(tb[RTA_DPORT]);
+
+       skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
+       if (!skb)
+               return -ENOBUFS;
 
        memset(&fl4, 0, sizeof(fl4));
        fl4.daddr = dst;
@@ -2751,6 +2808,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
        fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
        fl4.flowi4_mark = mark;
        fl4.flowi4_uid = uid;
+       if (sport)
+               fl4.fl4_sport = sport;
+       if (dport)
+               fl4.fl4_dport = dport;
+       fl4.flowi4_proto = ip_proto;
 
        rcu_read_lock();
 
@@ -2760,10 +2822,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                dev = dev_get_by_index_rcu(net, iif);
                if (!dev) {
                        err = -ENODEV;
-                       goto errout_free;
+                       goto errout_rcu;
                }
 
-               skb->protocol   = htons(ETH_P_IP);
+               fl4.flowi4_iif = iif; /* for rt_fill_info */
                skb->dev        = dev;
                skb->mark       = mark;
                err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
@@ -2783,7 +2845,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
        }
 
        if (err)
-               goto errout_free;
+               goto errout_rcu;
 
        if (rtm->rtm_flags & RTM_F_NOTIFY)
                rt->rt_flags |= RTCF_NOTIFY;
@@ -2791,34 +2853,40 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
        if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
                table_id = res.table ? res.table->tb_id : 0;
 
+       /* reset skb for netlink reply msg */
+       skb_trim(skb, 0);
+       skb_reset_network_header(skb);
+       skb_reset_transport_header(skb);
+       skb_reset_mac_header(skb);
+
        if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
                if (!res.fi) {
                        err = fib_props[res.type].error;
                        if (!err)
                                err = -EHOSTUNREACH;
-                       goto errout_free;
+                       goto errout_rcu;
                }
                err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
                                    nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
                                    rt->rt_type, res.prefix, res.prefixlen,
                                    fl4.flowi4_tos, res.fi, 0);
        } else {
-               err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
+               err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
                                   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
        }
        if (err < 0)
-               goto errout_free;
+               goto errout_rcu;
 
        rcu_read_unlock();
 
        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
-errout:
-       return err;
 
 errout_free:
+       return err;
+errout_rcu:
        rcu_read_unlock();
        kfree_skb(skb);
-       goto errout;
+       goto errout_free;
 }
 
 void ip_rt_multicast_event(struct in_device *in_dev)
index ff4d4ba67735fb7d829d09c6d2da1a30d52eefa6..d71f1f3e11555af02f1b04a865d3aaba3a8be406 100644 (file)
@@ -788,7 +788,8 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
                        return -EINVAL;
                if (sk->sk_no_check_tx)
                        return -EINVAL;
-               if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+               if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
+                   dst_xfrm(skb_dst(skb)))
                        return -EIO;
 
                skb_shinfo(skb)->gso_size = cork->gso_size;
index bcb8785c0451c418ae32b9ca1a68db52fe2d3fe0..038d661d5ffc9bd666d6d786178875f8678d2602 100644 (file)
@@ -63,6 +63,7 @@
 #include <net/lwtunnel.h>
 #include <net/ip_tunnels.h>
 #include <net/l3mdev.h>
+#include <net/ip.h>
 #include <trace/events/fib6.h>
 
 #include <linux/uaccess.h>
@@ -4083,6 +4084,9 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
        [RTA_UID]               = { .type = NLA_U32 },
        [RTA_MARK]              = { .type = NLA_U32 },
        [RTA_TABLE]             = { .type = NLA_U32 },
+       [RTA_IP_PROTO]          = { .type = NLA_U8 },
+       [RTA_SPORT]             = { .type = NLA_U16 },
+       [RTA_DPORT]             = { .type = NLA_U16 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -4795,6 +4799,19 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
        else
                fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
 
+       if (tb[RTA_SPORT])
+               fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
+
+       if (tb[RTA_DPORT])
+               fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
+
+       if (tb[RTA_IP_PROTO]) {
+               err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
+                                                 &fl6.flowi6_proto, extack);
+               if (err)
+                       goto errout;
+       }
+
        if (iif) {
                struct net_device *dev;
                int flags = 0;
index 2839c1bd1e584f8fbc3cb7f999f741f3f41b4051..426c9d2b418d59ee8bbb5bb8a0735fccd0241dcc 100644 (file)
@@ -1053,7 +1053,8 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
                        return -EINVAL;
                if (udp_sk(sk)->no_check6_tx)
                        return -EINVAL;
-               if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+               if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
+                   dst_xfrm(skb_dst(skb)))
                        return -EIO;
 
                skb_shinfo(skb)->gso_size = cork->gso_size;
index e60dddbf963c76056deb7aeb4517c0079b1b305b..7cb0f49efdb70446177b7b1b8958deeacb45ef19 100644 (file)
@@ -6,7 +6,7 @@ CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
-TEST_PROGS += udpgso_bench.sh
+TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
new file mode 100755 (executable)
index 0000000..d4cfb6a
--- /dev/null
@@ -0,0 +1,248 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB rules API
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
+
+RTABLE=100
+GW_IP4=192.51.100.2
+SRC_IP=192.51.100.3
+GW_IP6=2001:db8:1::2
+SRC_IP6=2001:db8:1::3
+
+DEV_ADDR=192.51.100.1
+DEV=dummy0
+
+log_test()
+{
+       local rc=$1
+       local expected=$2
+       local msg="$3"
+
+       if [ ${rc} -eq ${expected} ]; then
+               nsuccess=$((nsuccess+1))
+               printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+       else
+               nfail=$((nfail+1))
+               printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+               if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+                       echo
+                       echo "hit enter to continue, 'q' to quit"
+                       read a
+                       [ "$a" = "q" ] && exit 1
+               fi
+       fi
+}
+
+log_section()
+{
+       echo
+       echo "######################################################################"
+       echo "TEST SECTION: $*"
+       echo "######################################################################"
+}
+
+setup()
+{
+       set -e
+       ip netns add testns
+       $IP link set dev lo up
+
+       $IP link add dummy0 type dummy
+       $IP link set dev dummy0 up
+       $IP address add 198.51.100.1/24 dev dummy0
+       $IP -6 address add 2001:db8:1::1/64 dev dummy0
+
+       set +e
+}
+
+cleanup()
+{
+       $IP link del dev dummy0 &> /dev/null
+       ip netns del testns
+}
+
+fib_check_iproute_support()
+{
+       ip rule help 2>&1 | grep -q $1
+       if [ $? -ne 0 ]; then
+               echo "SKIP: iproute2 iprule too old, missing $1 match"
+               return 1
+       fi
+
+       ip route get help 2>&1 | grep -q $2
+       if [ $? -ne 0 ]; then
+               echo "SKIP: iproute2 get route too old, missing $2 match"
+               return 1
+       fi
+
+       return 0
+}
+
+fib_rule6_del()
+{
+       $IP -6 rule del $1
+       log_test $? 0 "rule6 del $1"
+}
+
+fib_rule6_del_by_pref()
+{
+       pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+       $IP -6 rule del pref $pref
+}
+
+fib_rule6_test_match_n_redirect()
+{
+       local match="$1"
+       local getmatch="$2"
+
+       $IP -6 rule add $match table $RTABLE
+       $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
+       log_test $? 0 "rule6 check: $1"
+
+       fib_rule6_del_by_pref "$match"
+       log_test $? 0 "rule6 del by pref: $match"
+}
+
+fib_rule6_test()
+{
+       # setup the fib rule redirect route
+       $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
+
+       match="oif $DEV"
+       fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+       match="from $SRC_IP6 iif $DEV"
+       fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+       match="tos 0x10"
+       fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+       match="fwmark 0x64"
+       getmatch="mark 0x64"
+       fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+       fib_check_iproute_support "uidrange" "uid"
+       if [ $? -eq 0 ]; then
+               match="uidrange 100-100"
+               getmatch="uid 100"
+               fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+       fi
+
+       fib_check_iproute_support "sport" "sport"
+       if [ $? -eq 0 ]; then
+               match="sport 666 dport 777"
+               fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+       fi
+
+       fib_check_iproute_support "ipproto" "ipproto"
+       if [ $? -eq 0 ]; then
+               match="ipproto tcp"
+               fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+       fi
+
+       fib_check_iproute_support "ipproto" "ipproto"
+       if [ $? -eq 0 ]; then
+               match="ipproto icmp"
+               fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+       fi
+}
+
+fib_rule4_del()
+{
+       $IP rule del $1
+       log_test $? 0 "del $1"
+}
+
+fib_rule4_del_by_pref()
+{
+       pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+       $IP rule del pref $pref
+}
+
+fib_rule4_test_match_n_redirect()
+{
+       local match="$1"
+       local getmatch="$2"
+
+       $IP rule add $match table $RTABLE
+       $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
+       log_test $? 0 "rule4 check: $1"
+
+       fib_rule4_del_by_pref "$match"
+       log_test $? 0 "rule4 del by pref: $match"
+}
+
+fib_rule4_test()
+{
+       # setup the fib rule redirect route
+       $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
+
+       match="oif $DEV"
+       fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+       match="from $SRC_IP iif $DEV"
+       fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+       match="tos 0x10"
+       fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+       match="fwmark 0x64"
+       getmatch="mark 0x64"
+       fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+       fib_check_iproute_support "uidrange" "uid"
+       if [ $? -eq 0 ]; then
+               match="uidrange 100-100"
+               getmatch="uid 100"
+               fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+       fi
+
+       fib_check_iproute_support "sport" "sport"
+       if [ $? -eq 0 ]; then
+               match="sport 666 dport 777"
+               fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+       fi
+
+       fib_check_iproute_support "ipproto" "ipproto"
+       if [ $? -eq 0 ]; then
+               match="ipproto tcp"
+               fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+       fi
+
+       fib_check_iproute_support "ipproto" "ipproto"
+       if [ $? -eq 0 ]; then
+               match="ipproto icmp"
+               fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+       fi
+}
+
+run_fibrule_tests()
+{
+       log_section "IPv4 fib rule"
+       fib_rule4_test
+       log_section "IPv6 fib rule"
+       fib_rule6_test
+}
+
+if [ "$(id -u)" -ne 0 ];then
+       echo "SKIP: Need root privileges"
+       exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+       echo "SKIP: Could not run test without ip tool"
+       exit 0
+fi
+
+# start clean
+cleanup &> /dev/null
+setup
+run_fibrule_tests
+cleanup
+
+exit $ret
diff --git a/tools/testing/selftests/uevent/Makefile b/tools/testing/selftests/uevent/Makefile
new file mode 100644 (file)
index 0000000..f7baa9a
--- /dev/null
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+BINARIES := uevent_filtering
+CFLAGS += -Wl,-no-as-needed -Wall
+
+uevent_filtering: uevent_filtering.c ../kselftest.h ../kselftest_harness.h
+       $(CC) $(CFLAGS) $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/uevent/config b/tools/testing/selftests/uevent/config
new file mode 100644 (file)
index 0000000..1038f45
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_USER_NS=y
+CONFIG_NET=y
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
new file mode 100644 (file)
index 0000000..f83391a
--- /dev/null
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/netlink.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sched.h>
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
+#define __UEVENT_BUFFER_SIZE (2048 * 2)
+#define __UEVENT_HEADER "add@/devices/virtual/mem/full"
+#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full")
+#define __UEVENT_LISTEN_ALL -1
+
+ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+       ssize_t ret;
+
+again:
+       ret = read(fd, buf, count);
+       if (ret < 0 && errno == EINTR)
+               goto again;
+
+       return ret;
+}
+
+ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+       ssize_t ret;
+
+again:
+       ret = write(fd, buf, count);
+       if (ret < 0 && errno == EINTR)
+               goto again;
+
+       return ret;
+}
+
+int wait_for_pid(pid_t pid)
+{
+       int status, ret;
+
+again:
+       ret = waitpid(pid, &status, 0);
+       if (ret == -1) {
+               if (errno == EINTR)
+                       goto again;
+
+               return -1;
+       }
+
+       if (ret != pid)
+               goto again;
+
+       if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+               return -1;
+
+       return 0;
+}
+
+static int uevent_listener(unsigned long post_flags, bool expect_uevent,
+                          int sync_fd)
+{
+       int sk_fd, ret;
+       socklen_t sk_addr_len;
+       int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE;
+       uint64_t sync_add = 1;
+       struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
+       char buf[__UEVENT_BUFFER_SIZE] = { 0 };
+       struct iovec iov = { buf, __UEVENT_BUFFER_SIZE };
+       char control[CMSG_SPACE(sizeof(struct ucred))];
+       struct msghdr hdr = {
+               &rcv_addr, sizeof(rcv_addr), &iov, 1,
+               control,   sizeof(control),  0,
+       };
+
+       sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
+                      NETLINK_KOBJECT_UEVENT);
+       if (sk_fd < 0) {
+               fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno));
+               return -1;
+       }
+
+       ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
+                        sizeof(rcv_buf_sz));
+       if (ret < 0) {
+               fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno));
+               goto on_error;
+       }
+
+       sk_addr.nl_family = AF_NETLINK;
+       sk_addr.nl_groups = __UEVENT_LISTEN_ALL;
+
+       sk_addr_len = sizeof(sk_addr);
+       ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len);
+       if (ret < 0) {
+               fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno));
+               goto on_error;
+       }
+
+       ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len);
+       if (ret < 0) {
+               fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno));
+               goto on_error;
+       }
+
+       if ((size_t)sk_addr_len != sizeof(sk_addr)) {
+               fprintf(stderr, "Invalid socket address size\n");
+               goto on_error;
+       }
+
+       if (post_flags & CLONE_NEWUSER) {
+               ret = unshare(CLONE_NEWUSER);
+               if (ret < 0) {
+                       fprintf(stderr,
+                               "%s - Failed to unshare user namespace\n",
+                               strerror(errno));
+                       goto on_error;
+               }
+       }
+
+       if (post_flags & CLONE_NEWNET) {
+               ret = unshare(CLONE_NEWNET);
+               if (ret < 0) {
+                       fprintf(stderr,
+                               "%s - Failed to unshare network namespace\n",
+                               strerror(errno));
+                       goto on_error;
+               }
+       }
+
+       ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
+       close(sync_fd);
+       if (ret != sizeof(sync_add)) {
+               fprintf(stderr, "Failed to synchronize with parent process\n");
+               goto on_error;
+       }
+
+       fret = 0;
+       for (;;) {
+               ssize_t r;
+
+               r = recvmsg(sk_fd, &hdr, 0);
+               if (r <= 0) {
+                       fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno));
+                       ret = -1;
+                       break;
+               }
+
+               /* ignore libudev messages */
+               if (memcmp(buf, "libudev", 8) == 0)
+                       continue;
+
+               /* ignore uevents we didn't trigger */
+               if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0)
+                       continue;
+
+               if (!expect_uevent) {
+                       fprintf(stderr, "Received unexpected uevent:\n");
+                       ret = -1;
+               }
+
+               if (TH_LOG_ENABLED) {
+                       /* If logging is enabled dump the received uevent. */
+                       (void)write_nointr(STDERR_FILENO, buf, r);
+                       (void)write_nointr(STDERR_FILENO, "\n", 1);
+               }
+
+               break;
+       }
+
+on_error:
+       close(sk_fd);
+
+       return fret;
+}
+
+int trigger_uevent(unsigned int times)
+{
+       int fd, ret;
+       unsigned int i;
+
+       fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC);
+       if (fd < 0) {
+               if (errno != ENOENT)
+                       return -EINVAL;
+
+               return -1;
+       }
+
+       for (i = 0; i < times; i++) {
+               ret = write_nointr(fd, "add\n", sizeof("add\n") - 1);
+               if (ret < 0) {
+                       fprintf(stderr, "Failed to trigger uevent\n");
+                       break;
+               }
+       }
+       close(fd);
+
+       return ret;
+}
+
+int set_death_signal(void)
+{
+       int ret;
+       pid_t ppid;
+
+       ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+
+       /* Check whether we have been orphaned. */
+       ppid = getppid();
+       if (ppid == 1) {
+               pid_t self;
+
+               self = getpid();
+               ret = kill(self, SIGKILL);
+       }
+
+       if (ret < 0)
+               return -1;
+
+       return 0;
+}
+
+static int do_test(unsigned long pre_flags, unsigned long post_flags,
+                  bool expect_uevent, int sync_fd)
+{
+       int ret;
+       uint64_t wait_val;
+       pid_t pid;
+       sigset_t mask;
+       sigset_t orig_mask;
+       struct timespec timeout;
+
+       sigemptyset(&mask);
+       sigaddset(&mask, SIGCHLD);
+
+       ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask);
+       if (ret < 0) {
+               fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno));
+               return -1;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno));
+               return -1;
+       }
+
+       if (pid == 0) {
+               /* Make sure that we go away when our parent dies. */
+               ret = set_death_signal();
+               if (ret < 0) {
+                       fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n");
+                       _exit(EXIT_FAILURE);
+               }
+
+               if (pre_flags & CLONE_NEWUSER) {
+                       ret = unshare(CLONE_NEWUSER);
+                       if (ret < 0) {
+                               fprintf(stderr,
+                                       "%s - Failed to unshare user namespace\n",
+                                       strerror(errno));
+                               _exit(EXIT_FAILURE);
+                       }
+               }
+
+               if (pre_flags & CLONE_NEWNET) {
+                       ret = unshare(CLONE_NEWNET);
+                       if (ret < 0) {
+                               fprintf(stderr,
+                                       "%s - Failed to unshare network namespace\n",
+                                       strerror(errno));
+                               _exit(EXIT_FAILURE);
+                       }
+               }
+
+               if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0)
+                       _exit(EXIT_FAILURE);
+
+               _exit(EXIT_SUCCESS);
+       }
+
+       ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val));
+       if (ret != sizeof(wait_val)) {
+               fprintf(stderr, "Failed to synchronize with child process\n");
+               _exit(EXIT_FAILURE);
+       }
+
+       /* Trigger 10 uevents to account for the case where the kernel might
+        * drop some.
+        */
+       ret = trigger_uevent(10);
+       if (ret < 0)
+               fprintf(stderr, "Failed triggering uevents\n");
+
+       /* Wait for 2 seconds before considering this failed. This should be
+        * plenty of time for the kernel to deliver the uevent even under heavy
+        * load.
+        */
+       timeout.tv_sec = 2;
+       timeout.tv_nsec = 0;
+
+again:
+       ret = sigtimedwait(&mask, NULL, &timeout);
+       if (ret < 0) {
+               if (errno == EINTR)
+                       goto again;
+
+               if (!expect_uevent)
+                       ret = kill(pid, SIGTERM); /* success */
+               else
+                       ret = kill(pid, SIGUSR1); /* error */
+               if (ret < 0)
+                       return -1;
+       }
+
+       ret = wait_for_pid(pid);
+       if (ret < 0)
+               return -1;
+
+       return ret;
+}
+
+static void signal_handler(int sig)
+{
+       if (sig == SIGTERM)
+               _exit(EXIT_SUCCESS);
+
+       _exit(EXIT_FAILURE);
+}
+
+TEST(uevent_filtering)
+{
+       int ret, sync_fd;
+       struct sigaction act;
+
+       if (geteuid()) {
+               TH_LOG("Uevent filtering tests require root privileges. Skipping test");
+               _exit(KSFT_SKIP);
+       }
+
+       ret = access(__DEV_FULL, F_OK);
+       EXPECT_EQ(0, ret) {
+               if (errno == ENOENT) {
+                       TH_LOG(__DEV_FULL " does not exist. Skipping test");
+                       _exit(KSFT_SKIP);
+               }
+
+               _exit(KSFT_FAIL);
+       }
+
+       act.sa_handler = signal_handler;
+       act.sa_flags = 0;
+       sigemptyset(&act.sa_mask);
+
+       ret = sigaction(SIGTERM, &act, NULL);
+       ASSERT_EQ(0, ret);
+
+       sync_fd = eventfd(0, EFD_CLOEXEC);
+       ASSERT_GE(sync_fd, 0);
+
+       /*
+        * Setup:
+        * - Open uevent listening socket in initial network namespace owned by
+        *   initial user namespace.
+        * - Trigger uevent in initial network namespace owned by initial user
+        *   namespace.
+        * Expected Result:
+        * - uevent listening socket receives uevent
+        */
+       ret = do_test(0, 0, true, sync_fd);
+       ASSERT_EQ(0, ret) {
+               goto do_cleanup;
+       }
+
+       /*
+        * Setup:
+        * - Open uevent listening socket in non-initial network namespace
+        *   owned by initial user namespace.
+        * - Trigger uevent in initial network namespace owned by initial user
+        *   namespace.
+        * Expected Result:
+        * - uevent listening socket receives uevent
+        */
+       ret = do_test(CLONE_NEWNET, 0, true, sync_fd);
+       ASSERT_EQ(0, ret) {
+               goto do_cleanup;
+       }
+
+       /*
+        * Setup:
+        * - unshare user namespace
+        * - Open uevent listening socket in initial network namespace
+        *   owned by initial user namespace.
+        * - Trigger uevent in initial network namespace owned by initial user
+        *   namespace.
+        * Expected Result:
+        * - uevent listening socket receives uevent
+        */
+       ret = do_test(CLONE_NEWUSER, 0, true, sync_fd);
+       ASSERT_EQ(0, ret) {
+               goto do_cleanup;
+       }
+
+       /*
+        * Setup:
+        * - Open uevent listening socket in non-initial network namespace
+        *   owned by non-initial user namespace.
+        * - Trigger uevent in initial network namespace owned by initial user
+        *   namespace.
+        * Expected Result:
+        * - uevent listening socket receives no uevent
+        */
+       ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd);
+       ASSERT_EQ(0, ret) {
+               goto do_cleanup;
+       }
+
+       /*
+        * Setup:
+        * - Open uevent listening socket in initial network namespace
+        *   owned by initial user namespace.
+        * - unshare network namespace
+        * - Trigger uevent in initial network namespace owned by initial user
+        *   namespace.
+        * Expected Result:
+        * - uevent listening socket receives uevent
+        */
+       ret = do_test(0, CLONE_NEWNET, true, sync_fd);
+       ASSERT_EQ(0, ret) {
+               goto do_cleanup;
+       }
+
+       /*
+        * Setup:
+        * - Open uevent listening socket in initial network namespace
+        *   owned by initial user namespace.
+        * - unshare user namespace
+        * - Trigger uevent in initial network namespace owned by initial user
+        *   namespace.
+        * Expected Result:
+        * - uevent listening socket receives uevent
+        */
+       ret = do_test(0, CLONE_NEWUSER, true, sync_fd);
+       ASSERT_EQ(0, ret) {
+               goto do_cleanup;
+       }
+
+       /*
+        * Setup:
+        * - Open uevent listening socket in initial network namespace
+        *   owned by initial user namespace.
+        * - unshare user namespace
+        * - unshare network namespace
+        * - Trigger uevent in initial network namespace owned by initial user
+        *   namespace.
+        * Expected Result:
+        * - uevent listening socket receives uevent
+        */
+       ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd);
+       ASSERT_EQ(0, ret) {
+               goto do_cleanup;
+       }
+
+do_cleanup:
+       close(sync_fd);
+}
+
+TEST_HARNESS_MAIN