Required properties:
-- compatible: should be "qca,qca8337"
+- compatible: should be one of:
+ "qca,qca8334"
+ "qca,qca8337"
+
- #size-cells: must be 0
- #address-cells: must be 1
referencing the internal PHY connected to it. The CPU port of this switch is
always port 0.
+A CPU port node has the following optional node:
+
+- fixed-link : Fixed-link subnode describing a link to a non-MDIO
+ managed entity. See
+ Documentation/devicetree/bindings/net/fixed-link.txt
+ for details.
+
+For QCA8K the 'fixed-link' sub-node supports only the following properties:
+
+- 'speed' (integer, mandatory), to indicate the link speed. Accepted
+ values are 10, 100 and 1000
+- 'full-duplex' (boolean, optional), to indicate that full duplex is
+ used. When absent, half duplex is assumed.
+
Example:
label = "cpu";
ethernet = <&gmac1>;
phy-mode = "rgmii";
+ fixed-link {
+ speed = 1000;
+ full-duplex;
+ };
};
port@1 {
"sff,sfp" for SFP modules
"sff,sff" for soldered down SFF modules
-Optional Properties:
-
- i2c-bus : phandle of an I2C bus controller for the SFP two wire serial
interface
+Optional Properties:
+
- mod-def0-gpios : GPIO phandle and a specifier of the MOD-DEF0 (AKA Mod_ABS)
module presence input gpio signal, active (module absent) high. Must
not be present for SFF modules
done:
bond_dev->vlan_features = vlan_features;
- bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL;
+ bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
+ NETIF_F_GSO_UDP_L4;
bond_dev->gso_max_segs = gso_max_segs;
netif_set_gso_max_size(bond_dev, gso_max_size);
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;
- bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
+ bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
bond_dev->features |= bond_dev->hw_features;
}
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2009 Felix Fietkau <nbd@nbd.name>
* Copyright (C) 2011-2012 Gabor Juhos <juhosg@openwrt.org>
* Copyright (c) 2015, The Linux Foundation. All rights reserved.
* Copyright (c) 2016 John Crispin <john@phrozen.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/module.h>
static void
qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable)
{
- u32 mask = QCA8K_PORT_STATUS_TXMAC;
+ u32 mask = QCA8K_PORT_STATUS_TXMAC | QCA8K_PORT_STATUS_RXMAC;
/* Port 0 and 6 have no internal PHY */
- if ((port > 0) && (port < 6))
+ if (port > 0 && port < 6)
mask |= QCA8K_PORT_STATUS_LINK_AUTO;
if (enable)
{
struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
int ret, i, phy_mode = -1;
+ u32 mask;
/* Make sure that port 0 is the cpu port */
if (!dsa_is_cpu_port(ds, 0)) {
if (ret < 0)
return ret;
- /* Enable CPU Port */
+ /* Enable CPU Port, force it to maximum bandwidth and full-duplex */
+ mask = QCA8K_PORT_STATUS_SPEED_1000 | QCA8K_PORT_STATUS_TXFLOW |
+ QCA8K_PORT_STATUS_RXFLOW | QCA8K_PORT_STATUS_DUPLEX;
+ qca8k_write(priv, QCA8K_REG_PORT_STATUS(QCA8K_CPU_PORT), mask);
qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0,
QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN);
qca8k_port_set_status(priv, QCA8K_CPU_PORT, 1);
return 0;
}
+static void
+qca8k_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phy)
+{
+ struct qca8k_priv *priv = ds->priv;
+ u32 reg;
+
+ /* Force fixed-link setting for CPU port, skip others. */
+ if (!phy_is_pseudo_fixed_link(phy))
+ return;
+
+ /* Set port speed */
+ switch (phy->speed) {
+ case 10:
+ reg = QCA8K_PORT_STATUS_SPEED_10;
+ break;
+ case 100:
+ reg = QCA8K_PORT_STATUS_SPEED_100;
+ break;
+ case 1000:
+ reg = QCA8K_PORT_STATUS_SPEED_1000;
+ break;
+ default:
+ dev_dbg(priv->dev, "port%d link speed %dMbps not supported.\n",
+ port, phy->speed);
+ return;
+ }
+
+ /* Set duplex mode */
+ if (phy->duplex == DUPLEX_FULL)
+ reg |= QCA8K_PORT_STATUS_DUPLEX;
+
+ /* Force flow control */
+ if (dsa_is_cpu_port(ds, port))
+ reg |= QCA8K_PORT_STATUS_RXFLOW | QCA8K_PORT_STATUS_TXFLOW;
+
+ /* Force link down before changing MAC options */
+ qca8k_port_set_status(priv, port, 0);
+ qca8k_write(priv, QCA8K_REG_PORT_STATUS(port), reg);
+ qca8k_port_set_status(priv, port, 1);
+}
+
static int
qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum)
{
static const struct dsa_switch_ops qca8k_switch_ops = {
.get_tag_protocol = qca8k_get_tag_protocol,
.setup = qca8k_setup,
+ .adjust_link = qca8k_adjust_link,
.get_strings = qca8k_get_strings,
.phy_read = qca8k_phy_read,
.phy_write = qca8k_phy_write,
return -ENOMEM;
priv->bus = mdiodev->bus;
+ priv->dev = &mdiodev->dev;
/* read the switches ID register */
id = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
qca8k_suspend, qca8k_resume);
static const struct of_device_id qca8k_of_match[] = {
+ { .compatible = "qca,qca8334" },
{ .compatible = "qca,qca8337" },
{ /* sentinel */ },
};
#define QCA8K_GOL_MAC_ADDR0 0x60
#define QCA8K_GOL_MAC_ADDR1 0x64
#define QCA8K_REG_PORT_STATUS(_i) (0x07c + (_i) * 4)
-#define QCA8K_PORT_STATUS_SPEED GENMASK(2, 0)
-#define QCA8K_PORT_STATUS_SPEED_S 0
+#define QCA8K_PORT_STATUS_SPEED GENMASK(1, 0)
+#define QCA8K_PORT_STATUS_SPEED_10 0
+#define QCA8K_PORT_STATUS_SPEED_100 0x1
+#define QCA8K_PORT_STATUS_SPEED_1000 0x2
#define QCA8K_PORT_STATUS_TXMAC BIT(2)
#define QCA8K_PORT_STATUS_RXMAC BIT(3)
#define QCA8K_PORT_STATUS_TXFLOW BIT(4)
struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS];
struct dsa_switch *ds;
struct mutex reg_mutex;
+ struct device *dev;
};
struct qca8k_mib_desc {
CH_PCI_ID_TABLE_FENTRY(0x6085), /* Custom T6240-SO */
CH_PCI_ID_TABLE_FENTRY(0x6086), /* Custom T6225-SO-CR */
CH_PCI_ID_TABLE_FENTRY(0x6087), /* Custom T6225-CR */
+ CH_PCI_ID_TABLE_FENTRY(0x6088), /* Custom T62100-CR */
+ CH_PCI_ID_TABLE_FENTRY(0x6089), /* Custom T62100-KR */
CH_PCI_DEVICE_ID_TABLE_DEFINE_END;
#endif /* __T4_PCI_ID_TBL_H__ */
VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)
return VXGE_HW_OK;
else
- return VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+ return VXGE_HW_ERR_PRIVILEGED_OPERATION;
}
/*
}
if (!(hldev->access_rights & VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)) {
- status = VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+ status = VXGE_HW_ERR_PRIVILEGED_OPERATION;
goto exit;
}
case vxge_hw_mgmt_reg_type_mrpcim:
if (!(hldev->access_rights &
VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)) {
- status = VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+ status = VXGE_HW_ERR_PRIVILEGED_OPERATION;
break;
}
if (offset > sizeof(struct vxge_hw_mrpcim_reg) - 8) {
case vxge_hw_mgmt_reg_type_srpcim:
if (!(hldev->access_rights &
VXGE_HW_DEVICE_ACCESS_RIGHT_SRPCIM)) {
- status = VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+ status = VXGE_HW_ERR_PRIVILEGED_OPERATION;
break;
}
if (index > VXGE_HW_TITAN_SRPCIM_REG_SPACES - 1) {
case vxge_hw_mgmt_reg_type_mrpcim:
if (!(hldev->access_rights &
VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)) {
- status = VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+ status = VXGE_HW_ERR_PRIVILEGED_OPERATION;
break;
}
if (offset > sizeof(struct vxge_hw_mrpcim_reg) - 8) {
case vxge_hw_mgmt_reg_type_srpcim:
if (!(hldev->access_rights &
VXGE_HW_DEVICE_ACCESS_RIGHT_SRPCIM)) {
- status = VXGE_HW_ERR_PRIVILAGED_OPEARATION;
+ status = VXGE_HW_ERR_PRIVILEGED_OPERATION;
break;
}
if (index > VXGE_HW_TITAN_SRPCIM_REG_SPACES - 1) {
VXGE_HW_ERR_INVALID_TCODE = VXGE_HW_BASE_ERR + 14,
VXGE_HW_ERR_INVALID_BLOCK_SIZE = VXGE_HW_BASE_ERR + 15,
VXGE_HW_ERR_INVALID_STATE = VXGE_HW_BASE_ERR + 16,
- VXGE_HW_ERR_PRIVILAGED_OPEARATION = VXGE_HW_BASE_ERR + 17,
+ VXGE_HW_ERR_PRIVILEGED_OPERATION = VXGE_HW_BASE_ERR + 17,
VXGE_HW_ERR_INVALID_PORT = VXGE_HW_BASE_ERR + 18,
VXGE_HW_ERR_FIFO = VXGE_HW_BASE_ERR + 19,
VXGE_HW_ERR_VPATH = VXGE_HW_BASE_ERR + 20,
*ptr++ = 0;
status = vxge_hw_device_xmac_stats_get(hldev, xmac_stats);
if (status != VXGE_HW_OK) {
- if (status != VXGE_HW_ERR_PRIVILAGED_OPEARATION) {
+ if (status != VXGE_HW_ERR_PRIVILEGED_OPERATION) {
vxge_debug_init(VXGE_ERR,
"%s : %d Failure in getting xmac stats",
__func__, __LINE__);
0,
&stat);
- if (status == VXGE_HW_ERR_PRIVILAGED_OPEARATION)
+ if (status == VXGE_HW_ERR_PRIVILEGED_OPERATION)
vxge_debug_init(
vxge_hw_device_trace_level_get(hldev),
"%s: device stats clear returns"
- "VXGE_HW_ERR_PRIVILAGED_OPEARATION", ndev->name);
+ "VXGE_HW_ERR_PRIVILEGED_OPERATION", ndev->name);
vxge_debug_entryexit(vxge_hw_device_trace_level_get(hldev),
"%s: %s:%d Exiting...",
either directly, with Open vSwitch, or any other way. Note that
TC Flower offload requires specific FW to work.
+config NFP_APP_ABM_NIC
+ bool "NFP4000/NFP6000 Advanced buffer management NIC support"
+ depends on NFP
+ depends on NET_SWITCHDEV
+ default y
+ help
+ Enable driver support for Advanced buffer management NIC on NFP.
+ ABM NIC allows advanced configuration of queuing and scheduling
+ of packets, including ECN marking. Say Y, if you are planning to
+ use one of the NFP4000 and NFP6000 platforms which support this
+ functionality.
+ Code will be built into the nfp.ko driver.
+
config NFP_DEBUG
bool "Debug support for Netronome(R) NFP4000/NFP6000 NIC drivers"
depends on NFP
nfp_net_sriov.o \
nfp_netvf_main.o \
nfp_port.o \
+ nfp_shared_buf.o \
nic/main.o
ifeq ($(CONFIG_NFP_APP_FLOWER),y)
bpf/jit.o
endif
+ifeq ($(CONFIG_NFP_APP_ABM_NIC),y)
+nfp-objs += \
+ abm/ctrl.o \
+ abm/main.o
+endif
+
nfp-$(CONFIG_NFP_DEBUG) += nfp_net_debugfs.o
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+
+#include "../nfpcore/nfp_cpp.h"
+#include "../nfp_app.h"
+#include "../nfp_main.h"
+#include "../nfp_net.h"
+#include "main.h"
+
+void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink)
+{
+ alink->queue_base = nn_readl(alink->vnic, NFP_NET_CFG_START_RXQ);
+ alink->queue_base /= alink->vnic->stride_rx;
+}
+
+int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm)
+{
+ struct nfp_pf *pf = abm->app->pf;
+ unsigned int pf_id;
+
+ pf_id = nfp_cppcore_pcie_unit(pf->cpp);
+ abm->pf_id = pf_id;
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/etherdevice.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+
+#include "../nfpcore/nfp.h"
+#include "../nfpcore/nfp_cpp.h"
+#include "../nfpcore/nfp_nsp.h"
+#include "../nfp_app.h"
+#include "../nfp_main.h"
+#include "../nfp_net.h"
+#include "../nfp_net_repr.h"
+#include "../nfp_port.h"
+#include "main.h"
+
+static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id)
+{
+ return FIELD_PREP(NFP_ABM_PORTID_TYPE, rtype) |
+ FIELD_PREP(NFP_ABM_PORTID_ID, id);
+}
+
+static struct net_device *nfp_abm_repr_get(struct nfp_app *app, u32 port_id)
+{
+ enum nfp_repr_type rtype;
+ struct nfp_reprs *reprs;
+ u8 port;
+
+ rtype = FIELD_GET(NFP_ABM_PORTID_TYPE, port_id);
+ port = FIELD_GET(NFP_ABM_PORTID_ID, port_id);
+
+ reprs = rcu_dereference(app->reprs[rtype]);
+ if (!reprs)
+ return NULL;
+
+ if (port >= reprs->num_reprs)
+ return NULL;
+
+ return rcu_dereference(reprs->reprs[port]);
+}
+
+static int
+nfp_abm_spawn_repr(struct nfp_app *app, struct nfp_abm_link *alink,
+ enum nfp_port_type ptype)
+{
+ struct net_device *netdev;
+ enum nfp_repr_type rtype;
+ struct nfp_reprs *reprs;
+ struct nfp_repr *repr;
+ struct nfp_port *port;
+ int err;
+
+ if (ptype == NFP_PORT_PHYS_PORT)
+ rtype = NFP_REPR_TYPE_PHYS_PORT;
+ else
+ rtype = NFP_REPR_TYPE_PF;
+
+ netdev = nfp_repr_alloc(app);
+ if (!netdev)
+ return -ENOMEM;
+ repr = netdev_priv(netdev);
+ repr->app_priv = alink;
+
+ port = nfp_port_alloc(app, ptype, netdev);
+ if (IS_ERR(port)) {
+ err = PTR_ERR(port);
+ goto err_free_repr;
+ }
+
+ if (ptype == NFP_PORT_PHYS_PORT) {
+ port->eth_forced = true;
+ err = nfp_port_init_phy_port(app->pf, app, port, alink->id);
+ if (err)
+ goto err_free_port;
+ } else {
+ port->pf_id = alink->abm->pf_id;
+ port->pf_split = app->pf->max_data_vnics > 1;
+ port->pf_split_id = alink->id;
+ port->vnic = alink->vnic->dp.ctrl_bar;
+ }
+
+ SET_NETDEV_DEV(netdev, &alink->vnic->pdev->dev);
+ eth_hw_addr_random(netdev);
+
+ err = nfp_repr_init(app, netdev, nfp_abm_portid(rtype, alink->id),
+ port, alink->vnic->dp.netdev);
+ if (err)
+ goto err_free_port;
+
+ reprs = nfp_reprs_get_locked(app, rtype);
+ WARN(nfp_repr_get_locked(app, reprs, alink->id), "duplicate repr");
+ rcu_assign_pointer(reprs->reprs[alink->id], netdev);
+
+ nfp_info(app->cpp, "%s Port %d Representor(%s) created\n",
+ ptype == NFP_PORT_PF_PORT ? "PCIe" : "Phys",
+ alink->id, netdev->name);
+
+ return 0;
+
+err_free_port:
+ nfp_port_free(port);
+err_free_repr:
+ nfp_repr_free(netdev);
+ return err;
+}
+
+static void
+nfp_abm_kill_repr(struct nfp_app *app, struct nfp_abm_link *alink,
+ enum nfp_repr_type rtype)
+{
+ struct net_device *netdev;
+ struct nfp_reprs *reprs;
+
+ reprs = nfp_reprs_get_locked(app, rtype);
+ netdev = nfp_repr_get_locked(app, reprs, alink->id);
+ if (!netdev)
+ return;
+ rcu_assign_pointer(reprs->reprs[alink->id], NULL);
+ synchronize_rcu();
+ /* Cast to make sure nfp_repr_clean_and_free() takes a nfp_repr */
+ nfp_repr_clean_and_free((struct nfp_repr *)netdev_priv(netdev));
+}
+
+static void
+nfp_abm_kill_reprs(struct nfp_abm *abm, struct nfp_abm_link *alink)
+{
+ nfp_abm_kill_repr(abm->app, alink, NFP_REPR_TYPE_PF);
+ nfp_abm_kill_repr(abm->app, alink, NFP_REPR_TYPE_PHYS_PORT);
+}
+
+static void nfp_abm_kill_reprs_all(struct nfp_abm *abm)
+{
+ struct nfp_pf *pf = abm->app->pf;
+ struct nfp_net *nn;
+
+ list_for_each_entry(nn, &pf->vnics, vnic_list)
+ nfp_abm_kill_reprs(abm, (struct nfp_abm_link *)nn->app_priv);
+}
+
+static enum devlink_eswitch_mode nfp_abm_eswitch_mode_get(struct nfp_app *app)
+{
+ struct nfp_abm *abm = app->priv;
+
+ return abm->eswitch_mode;
+}
+
+static int nfp_abm_eswitch_set_legacy(struct nfp_abm *abm)
+{
+ nfp_abm_kill_reprs_all(abm);
+
+ abm->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+ return 0;
+}
+
+static void nfp_abm_eswitch_clean_up(struct nfp_abm *abm)
+{
+ if (abm->eswitch_mode != DEVLINK_ESWITCH_MODE_LEGACY)
+ WARN_ON(nfp_abm_eswitch_set_legacy(abm));
+}
+
+static int nfp_abm_eswitch_set_switchdev(struct nfp_abm *abm)
+{
+ struct nfp_app *app = abm->app;
+ struct nfp_pf *pf = app->pf;
+ struct nfp_net *nn;
+ int err;
+
+ list_for_each_entry(nn, &pf->vnics, vnic_list) {
+ struct nfp_abm_link *alink = nn->app_priv;
+
+ err = nfp_abm_spawn_repr(app, alink, NFP_PORT_PHYS_PORT);
+ if (err)
+ goto err_kill_all_reprs;
+
+ err = nfp_abm_spawn_repr(app, alink, NFP_PORT_PF_PORT);
+ if (err)
+ goto err_kill_all_reprs;
+ }
+
+ abm->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+ return 0;
+
+err_kill_all_reprs:
+ nfp_abm_kill_reprs_all(abm);
+ return err;
+}
+
+static int nfp_abm_eswitch_mode_set(struct nfp_app *app, u16 mode)
+{
+ struct nfp_abm *abm = app->priv;
+
+ if (abm->eswitch_mode == mode)
+ return 0;
+
+ switch (mode) {
+ case DEVLINK_ESWITCH_MODE_LEGACY:
+ return nfp_abm_eswitch_set_legacy(abm);
+ case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+ return nfp_abm_eswitch_set_switchdev(abm);
+ default:
+ return -EINVAL;
+ }
+}
+
+static void
+nfp_abm_vnic_set_mac(struct nfp_pf *pf, struct nfp_abm *abm, struct nfp_net *nn,
+ unsigned int id)
+{
+ struct nfp_eth_table_port *eth_port = &pf->eth_tbl->ports[id];
+ u8 mac_addr[ETH_ALEN];
+ const char *mac_str;
+ char name[32];
+
+ if (id > pf->eth_tbl->count) {
+ nfp_warn(pf->cpp, "No entry for persistent MAC address\n");
+ eth_hw_addr_random(nn->dp.netdev);
+ return;
+ }
+
+ snprintf(name, sizeof(name), "eth%u.mac.pf%u",
+ eth_port->eth_index, abm->pf_id);
+
+ mac_str = nfp_hwinfo_lookup(pf->hwinfo, name);
+ if (!mac_str) {
+ nfp_warn(pf->cpp, "Can't lookup persistent MAC address (%s)\n",
+ name);
+ eth_hw_addr_random(nn->dp.netdev);
+ return;
+ }
+
+ if (sscanf(mac_str, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx",
+ &mac_addr[0], &mac_addr[1], &mac_addr[2],
+ &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) {
+ nfp_warn(pf->cpp, "Can't parse persistent MAC address (%s)\n",
+ mac_str);
+ eth_hw_addr_random(nn->dp.netdev);
+ return;
+ }
+
+ ether_addr_copy(nn->dp.netdev->dev_addr, mac_addr);
+ ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr);
+}
+
+static int
+nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
+{
+ struct nfp_eth_table_port *eth_port = &app->pf->eth_tbl->ports[id];
+ struct nfp_abm *abm = app->priv;
+ struct nfp_abm_link *alink;
+ int err;
+
+ alink = kzalloc(sizeof(*alink), GFP_KERNEL);
+ if (!alink)
+ return -ENOMEM;
+ nn->app_priv = alink;
+ alink->abm = abm;
+ alink->vnic = nn;
+ alink->id = id;
+
+ /* This is a multi-host app, make sure MAC/PHY is up, but don't
+ * make the MAC/PHY state follow the state of any of the ports.
+ */
+ err = nfp_eth_set_configured(app->cpp, eth_port->index, true);
+ if (err < 0)
+ goto err_free_alink;
+
+ netif_keep_dst(nn->dp.netdev);
+
+ nfp_abm_vnic_set_mac(app->pf, abm, nn, id);
+ nfp_abm_ctrl_read_params(alink);
+
+ return 0;
+
+err_free_alink:
+ kfree(alink);
+ return err;
+}
+
+static void nfp_abm_vnic_free(struct nfp_app *app, struct nfp_net *nn)
+{
+ struct nfp_abm_link *alink = nn->app_priv;
+
+ nfp_abm_kill_reprs(alink->abm, alink);
+ kfree(alink);
+}
+
+static int nfp_abm_init(struct nfp_app *app)
+{
+ struct nfp_pf *pf = app->pf;
+ struct nfp_reprs *reprs;
+ struct nfp_abm *abm;
+ int err;
+
+ if (!pf->eth_tbl) {
+ nfp_err(pf->cpp, "ABM NIC requires ETH table\n");
+ return -EINVAL;
+ }
+ if (pf->max_data_vnics != pf->eth_tbl->count) {
+ nfp_err(pf->cpp, "ETH entries don't match vNICs (%d vs %d)\n",
+ pf->max_data_vnics, pf->eth_tbl->count);
+ return -EINVAL;
+ }
+ if (!pf->mac_stats_bar) {
+ nfp_warn(app->cpp, "ABM NIC requires mac_stats symbol\n");
+ return -EINVAL;
+ }
+
+ abm = kzalloc(sizeof(*abm), GFP_KERNEL);
+ if (!abm)
+ return -ENOMEM;
+ app->priv = abm;
+ abm->app = app;
+
+ err = nfp_abm_ctrl_find_addrs(abm);
+ if (err)
+ goto err_free_abm;
+
+ err = -ENOMEM;
+ reprs = nfp_reprs_alloc(pf->max_data_vnics);
+ if (!reprs)
+ goto err_free_abm;
+ RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PHYS_PORT], reprs);
+
+ reprs = nfp_reprs_alloc(pf->max_data_vnics);
+ if (!reprs)
+ goto err_free_phys;
+ RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PF], reprs);
+
+ return 0;
+
+err_free_phys:
+ nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+err_free_abm:
+ kfree(abm);
+ app->priv = NULL;
+ return err;
+}
+
+static void nfp_abm_clean(struct nfp_app *app)
+{
+ struct nfp_abm *abm = app->priv;
+
+ nfp_abm_eswitch_clean_up(abm);
+ nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
+ nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+ kfree(abm);
+ app->priv = NULL;
+}
+
+const struct nfp_app_type app_abm = {
+ .id = NFP_APP_ACTIVE_BUFFER_MGMT_NIC,
+ .name = "abm",
+
+ .init = nfp_abm_init,
+ .clean = nfp_abm_clean,
+
+ .vnic_alloc = nfp_abm_vnic_alloc,
+ .vnic_free = nfp_abm_vnic_free,
+
+ .eswitch_mode_get = nfp_abm_eswitch_mode_get,
+ .eswitch_mode_set = nfp_abm_eswitch_mode_set,
+
+ .repr_get = nfp_abm_repr_get,
+};
--- /dev/null
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_ABM_H__
+#define __NFP_ABM_H__ 1
+
+#include <net/devlink.h>
+
+struct nfp_app;
+struct nfp_net;
+
+#define NFP_ABM_PORTID_TYPE GENMASK(23, 16)
+#define NFP_ABM_PORTID_ID GENMASK(7, 0)
+
+/**
+ * struct nfp_abm - ABM NIC app structure
+ * @app: back pointer to nfp_app
+ * @pf_id: ID of our PF link
+ * @eswitch_mode: devlink eswitch mode, advanced functions only visible
+ * in switchdev mode
+ */
+struct nfp_abm {
+ struct nfp_app *app;
+ unsigned int pf_id;
+ enum devlink_eswitch_mode eswitch_mode;
+};
+
+/**
+ * struct nfp_abm_link - port tuple of a ABM NIC
+ * @abm: back pointer to nfp_abm
+ * @vnic: data vNIC
+ * @id: id of the data vNIC
+ * @queue_base: id of base to host queue within PCIe (not QC idx)
+ */
+struct nfp_abm_link {
+ struct nfp_abm *abm;
+ struct nfp_net *vnic;
+ unsigned int id;
+ unsigned int queue_base;
+};
+
+void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
+int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
+#endif
--- /dev/null
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_ABI__
+#define __NFP_ABI__ 1
+
+#include <linux/types.h>
+
+#define NFP_MBOX_SYM_NAME "_abi_nfd_pf%u_mbox"
+#define NFP_MBOX_SYM_MIN_SIZE 16 /* When no data needed */
+
+#define NFP_MBOX_CMD 0x00
+#define NFP_MBOX_RET 0x04
+#define NFP_MBOX_DATA_LEN 0x08
+#define NFP_MBOX_RESERVED 0x0c
+#define NFP_MBOX_DATA 0x10
+
+/**
+ * enum nfp_mbox_cmd - PF mailbox commands
+ *
+ * @NFP_MBOX_NO_CMD: null command
+ * Used to indicate previous command has finished.
+ *
+ * @NFP_MBOX_POOL_GET: get shared buffer pool info/config
+ * Input - struct nfp_shared_buf_pool_id
+ * Output - struct nfp_shared_buf_pool_info_get
+ *
+ * @NFP_MBOX_POOL_SET: set shared buffer pool info/config
+ * Input - struct nfp_shared_buf_pool_info_set
+ * Output - None
+ */
+enum nfp_mbox_cmd {
+ NFP_MBOX_NO_CMD = 0x00,
+
+ NFP_MBOX_POOL_GET = 0x01,
+ NFP_MBOX_POOL_SET = 0x02,
+};
+
+#define NFP_SHARED_BUF_COUNT_SYM_NAME "_abi_nfd_pf%u_sb_cnt"
+#define NFP_SHARED_BUF_TABLE_SYM_NAME "_abi_nfd_pf%u_sb_tbl"
+
+/**
+ * struct nfp_shared_buf - NFP shared buffer description
+ * @id: numerical user-visible id of the shared buffer
+ * @size: size in bytes of the buffer
+ * @ingress_pools_count: number of ingress pools
+ * @egress_pools_count: number of egress pools
+ * @ingress_tc_count: number of ingress trafic classes
+ * @egress_tc_count: number of egress trafic classes
+ * @pool_size_unit: pool size may be in credits, each credit is
+ * @pool_size_unit bytes
+ */
+struct nfp_shared_buf {
+ __le32 id;
+ __le32 size;
+ __le16 ingress_pools_count;
+ __le16 egress_pools_count;
+ __le16 ingress_tc_count;
+ __le16 egress_tc_count;
+
+ __le32 pool_size_unit;
+};
+
+/**
+ * struct nfp_shared_buf_pool_id - shared buffer pool identification
+ * @shared_buf: shared buffer id
+ * @pool: pool index
+ */
+struct nfp_shared_buf_pool_id {
+ __le32 shared_buf;
+ __le32 pool;
+};
+
+/**
+ * struct nfp_shared_buf_pool_info_get - struct devlink_sb_pool_info mirror
+ * @pool_type: one of enum devlink_sb_pool_type
+ * @size: pool size in units of SB's @pool_size_unit
+ * @threshold_type: one of enum devlink_sb_threshold_type
+ */
+struct nfp_shared_buf_pool_info_get {
+ __le32 pool_type;
+ __le32 size;
+ __le32 threshold_type;
+};
+
+/**
+ * struct nfp_shared_buf_pool_info_set - packed args of sb_pool_set
+ * @id: pool identification info
+ * @size: pool size in units of SB's @pool_size_unit
+ * @threshold_type: one of enum devlink_sb_threshold_type
+ */
+struct nfp_shared_buf_pool_info_set {
+ struct nfp_shared_buf_pool_id id;
+ __le32 size;
+ __le32 threshold_type;
+};
+
+#endif
#ifdef CONFIG_NFP_APP_FLOWER
[NFP_APP_FLOWER_NIC] = &app_flower,
#endif
+#ifdef CONFIG_NFP_APP_ABM_NIC
+ [NFP_APP_ACTIVE_BUFFER_MGMT_NIC] = &app_abm,
+#endif
};
struct nfp_app *nfp_app_from_netdev(struct net_device *netdev)
NFP_APP_CORE_NIC = 0x1,
NFP_APP_BPF_NIC = 0x2,
NFP_APP_FLOWER_NIC = 0x3,
+ NFP_APP_ACTIVE_BUFFER_MGMT_NIC = 0x4,
};
extern const struct nfp_app_type app_nic;
extern const struct nfp_app_type app_bpf;
extern const struct nfp_app_type app_flower;
+extern const struct nfp_app_type app_abm;
/**
* struct nfp_app_type - application definition
* @bpf: BPF ndo offload-related calls
* @xdp_offload: offload an XDP program
* @eswitch_mode_get: get SR-IOV eswitch mode
+ * @eswitch_mode_set: set SR-IOV eswitch mode (under pf->lock)
* @sriov_enable: app-specific sriov initialisation
* @sriov_disable: app-specific sriov clean-up
* @repr_get: get representor netdev
void (*sriov_disable)(struct nfp_app *app);
enum devlink_eswitch_mode (*eswitch_mode_get)(struct nfp_app *app);
+ int (*eswitch_mode_set)(struct nfp_app *app, u16 mode);
struct net_device *(*repr_get)(struct nfp_app *app, u32 id);
};
return 0;
}
+static inline int nfp_app_eswitch_mode_set(struct nfp_app *app, u16 mode)
+{
+ if (!app->type->eswitch_mode_set)
+ return -EOPNOTSUPP;
+ return app->type->eswitch_mode_set(app, mode);
+}
+
static inline int nfp_app_sriov_enable(struct nfp_app *app, int num_vfs)
{
if (!app || !app->type->sriov_enable)
int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
unsigned int id);
+int nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
+ struct nfp_net *nn, unsigned int id);
#endif
#include "nfp_net.h"
#include "nfp_port.h"
-static int
-nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
- struct nfp_net *nn, unsigned int id)
+int nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
+ struct nfp_net *nn, unsigned int id)
{
int err;
return ret;
}
+static int
+nfp_devlink_sb_pool_get(struct devlink *devlink, unsigned int sb_index,
+ u16 pool_index, struct devlink_sb_pool_info *pool_info)
+{
+ struct nfp_pf *pf = devlink_priv(devlink);
+
+ return nfp_shared_buf_pool_get(pf, sb_index, pool_index, pool_info);
+}
+
+static int
+nfp_devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
+ u16 pool_index,
+ u32 size, enum devlink_sb_threshold_type threshold_type)
+{
+ struct nfp_pf *pf = devlink_priv(devlink);
+
+ return nfp_shared_buf_pool_set(pf, sb_index, pool_index,
+ size, threshold_type);
+}
+
static int nfp_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
{
struct nfp_pf *pf = devlink_priv(devlink);
return nfp_app_eswitch_mode_get(pf->app, mode);
}
+static int nfp_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+ struct nfp_pf *pf = devlink_priv(devlink);
+ int ret;
+
+ mutex_lock(&pf->lock);
+ ret = nfp_app_eswitch_mode_set(pf->app, mode);
+ mutex_unlock(&pf->lock);
+
+ return ret;
+}
+
const struct devlink_ops nfp_devlink_ops = {
.port_split = nfp_devlink_port_split,
.port_unsplit = nfp_devlink_port_unsplit,
+ .sb_pool_get = nfp_devlink_sb_pool_get,
+ .sb_pool_set = nfp_devlink_sb_pool_set,
.eswitch_mode_get = nfp_devlink_eswitch_mode_get,
+ .eswitch_mode_set = nfp_devlink_eswitch_mode_set,
};
int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port)
#include "nfpcore/nfp6000_pcie.h"
+#include "nfp_abi.h"
#include "nfp_app.h"
#include "nfp_main.h"
#include "nfp_net.h"
};
MODULE_DEVICE_TABLE(pci, nfp_pci_device_ids);
+int nfp_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format,
+ unsigned int default_val)
+{
+ char name[256];
+ int err = 0;
+ u64 val;
+
+ snprintf(name, sizeof(name), format, nfp_cppcore_pcie_unit(pf->cpp));
+
+ val = nfp_rtsym_read_le(pf->rtbl, name, &err);
+ if (err) {
+ if (err == -ENOENT)
+ return default_val;
+ nfp_err(pf->cpp, "Unable to read symbol %s\n", name);
+ return err;
+ }
+
+ return val;
+}
+
+u8 __iomem *
+nfp_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt,
+ unsigned int min_size, struct nfp_cpp_area **area)
+{
+ char pf_symbol[256];
+
+ snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt,
+ nfp_cppcore_pcie_unit(pf->cpp));
+
+ return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area);
+}
+
+/* Callers should hold the devlink instance lock */
+int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length,
+ void *out_data, u64 out_length)
+{
+ unsigned long long addr;
+ unsigned long err_at;
+ u64 max_data_sz;
+ u32 val = 0;
+ u32 cpp_id;
+ int n, err;
+
+ if (!pf->mbox)
+ return -EOPNOTSUPP;
+
+ cpp_id = NFP_CPP_ISLAND_ID(pf->mbox->target, NFP_CPP_ACTION_RW, 0,
+ pf->mbox->domain);
+ addr = pf->mbox->addr;
+ max_data_sz = pf->mbox->size - NFP_MBOX_SYM_MIN_SIZE;
+
+ /* Check if cmd field is clear */
+ err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, &val);
+ if (err || val) {
+ nfp_warn(pf->cpp, "failed to issue command (%u): %u, err: %d\n",
+ cmd, val, err);
+ return err ?: -EBUSY;
+ }
+
+ in_length = min(in_length, max_data_sz);
+ n = nfp_cpp_write(pf->cpp, cpp_id, addr + NFP_MBOX_DATA,
+ in_data, in_length);
+ if (n != in_length)
+ return -EIO;
+ /* Write data_len and wipe reserved */
+ err = nfp_cpp_writeq(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN,
+ in_length);
+ if (err)
+ return err;
+
+ /* Read back for ordering */
+ err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN, &val);
+ if (err)
+ return err;
+
+ /* Write cmd and wipe return value */
+ err = nfp_cpp_writeq(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, cmd);
+ if (err)
+ return err;
+
+ err_at = jiffies + 5 * HZ;
+ while (true) {
+ /* Wait for command to go to 0 (NFP_MBOX_NO_CMD) */
+ err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, &val);
+ if (err)
+ return err;
+ if (!val)
+ break;
+
+ if (time_is_before_eq_jiffies(err_at))
+ return -ETIMEDOUT;
+
+ msleep(5);
+ }
+
+ /* Copy output if any (could be error info, do it before reading ret) */
+ err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN, &val);
+ if (err)
+ return err;
+
+ out_length = min_t(u32, val, min(out_length, max_data_sz));
+ n = nfp_cpp_read(pf->cpp, cpp_id, addr + NFP_MBOX_DATA,
+ out_data, out_length);
+ if (n != out_length)
+ return -EIO;
+
+ /* Check if there is an error */
+ err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_RET, &val);
+ if (err)
+ return err;
+ if (val)
+ return -val;
+
+ return out_length;
+}
+
static bool nfp_board_ready(struct nfp_pf *pf)
{
const char *cp;
nfp_nsp_close(nsp);
}
+static int nfp_pf_find_rtsyms(struct nfp_pf *pf)
+{
+ char pf_symbol[256];
+ unsigned int pf_id;
+
+ pf_id = nfp_cppcore_pcie_unit(pf->cpp);
+
+ /* Optional per-PCI PF mailbox */
+ snprintf(pf_symbol, sizeof(pf_symbol), NFP_MBOX_SYM_NAME, pf_id);
+ pf->mbox = nfp_rtsym_lookup(pf->rtbl, pf_symbol);
+ if (pf->mbox && pf->mbox->size < NFP_MBOX_SYM_MIN_SIZE) {
+ nfp_err(pf->cpp, "PF mailbox symbol too small: %llu < %d\n",
+ pf->mbox->size, NFP_MBOX_SYM_MIN_SIZE);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nfp_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *pci_id)
{
pf->mip = nfp_mip_open(pf->cpp);
pf->rtbl = __nfp_rtsym_table_read(pf->cpp, pf->mip);
+ err = nfp_pf_find_rtsyms(pf);
+ if (err)
+ goto err_fw_unload;
+
pf->dump_flag = NFP_DUMP_NSP_DIAG;
pf->dumpspec = nfp_net_dump_load_dumpspec(pf->cpp, pf->rtbl);
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/workqueue.h>
+#include <net/devlink.h>
struct dentry;
struct device;
-struct devlink_ops;
struct pci_dev;
struct nfp_cpp;
struct nfp_net;
struct nfp_nsp_identify;
struct nfp_port;
+struct nfp_rtsym;
struct nfp_rtsym_table;
+struct nfp_shared_buf;
/**
* struct nfp_dumpspec - NFP FW dump specification structure
* @vf_cfg_mem: Pointer to mapped VF configuration area
* @vfcfg_tbl2_area: Pointer to the CPP area for the VF config table
* @vfcfg_tbl2: Pointer to mapped VF config table
+ * @mbox: RTSym of per-PCI PF mailbox (under devlink lock)
* @irq_entries: Array of MSI-X entries for all vNICs
* @limit_vfs: Number of VFs supported by firmware (~0 for PCI limit)
* @num_vfs: Number of SR-IOV VFs enabled
* @ports: Linked list of port structures (struct nfp_port)
* @wq: Workqueue for running works which need to grab @lock
* @port_refresh_work: Work entry for taking netdevs out
+ * @shared_bufs: Array of shared buffer structures if FW has any SBs
+ * @num_shared_bufs: Number of elements in @shared_bufs
* @lock: Protects all fields which may change after probe
*/
struct nfp_pf {
struct nfp_cpp_area *vfcfg_tbl2_area;
u8 __iomem *vfcfg_tbl2;
+ const struct nfp_rtsym *mbox;
+
struct msix_entry *irq_entries;
unsigned int limit_vfs;
struct workqueue_struct *wq;
struct work_struct port_refresh_work;
+ struct nfp_shared_buf *shared_bufs;
+ unsigned int num_shared_bufs;
+
struct mutex lock;
};
bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
+int nfp_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format,
+ unsigned int default_val);
+u8 __iomem *
+nfp_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt,
+ unsigned int min_size, struct nfp_cpp_area **area);
+int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length,
+ void *out_data, u64 out_length);
+
enum nfp_dump_diag {
NFP_DUMP_NSP_DIAG = 0,
};
int nfp_net_dump_populate_buffer(struct nfp_pf *pf, struct nfp_dumpspec *spec,
struct ethtool_dump *dump_param, void *dest);
+int nfp_shared_buf_register(struct nfp_pf *pf);
+void nfp_shared_buf_unregister(struct nfp_pf *pf);
+int nfp_shared_buf_pool_get(struct nfp_pf *pf, unsigned int sb, u16 pool_index,
+ struct devlink_sb_pool_info *pool_info);
+int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type);
#endif /* NFP_MAIN_H */
/**
* struct nfp_net - NFP network device structure
* @dp: Datapath structure
+ * @id: vNIC id within the PF (0 for VFs)
* @fw_ver: Firmware version
* @cap: Capabilities advertised by the Firmware
* @max_mtu: Maximum support MTU advertised by the Firmware
struct nfp_net_fw_version fw_ver;
+ u32 id;
+
u32 cap;
u32 max_mtu;
void nfp_net_debugfs_create(void);
void nfp_net_debugfs_destroy(void);
struct dentry *nfp_net_debugfs_device_add(struct pci_dev *pdev);
-void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id);
+void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir);
void nfp_net_debugfs_dir_clean(struct dentry **dir);
#else
static inline void nfp_net_debugfs_create(void)
}
static inline void
-nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id)
+nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir)
{
}
return features;
}
+static int
+nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
+{
+ struct nfp_net *nn = netdev_priv(netdev);
+ int n;
+
+ if (nn->port)
+ return nfp_port_get_phys_port_name(netdev, name, len);
+
+ if (!nn->dp.is_vf) {
+ n = snprintf(name, len, "%d", nn->id);
+ if (n >= len)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/**
* nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW
* @nn: NFP Net device to reconfigure
.ndo_set_mac_address = nfp_net_set_mac_address,
.ndo_set_features = nfp_net_set_features,
.ndo_features_check = nfp_net_features_check,
- .ndo_get_phys_port_name = nfp_port_get_phys_port_name,
+ .ndo_get_phys_port_name = nfp_net_get_phys_port_name,
.ndo_udp_tunnel_add = nfp_net_add_vxlan_port,
.ndo_udp_tunnel_del = nfp_net_del_vxlan_port,
.ndo_bpf = nfp_net_xdp,
.llseek = seq_lseek
};
-void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id)
+void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir)
{
struct dentry *queues, *tx, *rx, *xdp;
char name[20];
return;
if (nfp_net_is_data_vnic(nn))
- sprintf(name, "vnic%d", id);
+ sprintf(name, "vnic%d", nn->id);
else
strcpy(name, "ctrl-vnic");
nn->debugfs_dir = debugfs_create_dir(name, ddir);
return NULL;
}
-static int
-nfp_net_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format,
- unsigned int default_val)
-{
- char name[256];
- int err = 0;
- u64 val;
-
- snprintf(name, sizeof(name), format, nfp_cppcore_pcie_unit(pf->cpp));
-
- val = nfp_rtsym_read_le(pf->rtbl, name, &err);
- if (err) {
- if (err == -ENOENT)
- return default_val;
- nfp_err(pf->cpp, "Unable to read symbol %s\n", name);
- return err;
- }
-
- return val;
-}
-
static int nfp_net_pf_get_num_ports(struct nfp_pf *pf)
{
- return nfp_net_pf_rtsym_read_optional(pf, "nfd_cfg_pf%u_num_ports", 1);
+ return nfp_pf_rtsym_read_optional(pf, "nfd_cfg_pf%u_num_ports", 1);
}
static int nfp_net_pf_get_app_id(struct nfp_pf *pf)
{
- return nfp_net_pf_rtsym_read_optional(pf, "_pf%u_net_app_id",
- NFP_APP_CORE_NIC);
-}
-
-static u8 __iomem *
-nfp_net_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt,
- unsigned int min_size, struct nfp_cpp_area **area)
-{
- char pf_symbol[256];
-
- snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt,
- nfp_cppcore_pcie_unit(pf->cpp));
-
- return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area);
+ return nfp_pf_rtsym_read_optional(pf, "_pf%u_net_app_id",
+ NFP_APP_CORE_NIC);
}
static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn)
{
int err;
+ nn->id = id;
+
err = nfp_net_init(nn);
if (err)
return err;
- nfp_net_debugfs_vnic_add(nn, pf->ddir, id);
+ nfp_net_debugfs_vnic_add(nn, pf->ddir);
if (nn->port) {
err = nfp_devlink_port_register(pf->app, nn->port);
if (!nfp_app_needs_ctrl_vnic(pf->app))
return 0;
- ctrl_bar = nfp_net_pf_map_rtsym(pf, "net.ctrl", "_pf%u_net_ctrl_bar",
- NFP_PF_CSR_SLICE_SIZE,
- &pf->ctrl_vnic_bar);
+ ctrl_bar = nfp_pf_map_rtsym(pf, "net.ctrl", "_pf%u_net_ctrl_bar",
+ NFP_PF_CSR_SLICE_SIZE, &pf->ctrl_vnic_bar);
if (IS_ERR(ctrl_bar)) {
nfp_err(pf->cpp, "Failed to find ctrl vNIC memory symbol\n");
err = PTR_ERR(ctrl_bar);
int err;
min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
- mem = nfp_net_pf_map_rtsym(pf, "net.bar0", "_pf%d_net_bar0",
- min_size, &pf->data_vnic_bar);
+ mem = nfp_pf_map_rtsym(pf, "net.bar0", "_pf%d_net_bar0",
+ min_size, &pf->data_vnic_bar);
if (IS_ERR(mem)) {
nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
return PTR_ERR(mem);
}
}
- pf->vf_cfg_mem = nfp_net_pf_map_rtsym(pf, "net.vfcfg",
- "_pf%d_net_vf_bar",
- NFP_NET_CFG_BAR_SZ *
- pf->limit_vfs, &pf->vf_cfg_bar);
+ pf->vf_cfg_mem = nfp_pf_map_rtsym(pf, "net.vfcfg", "_pf%d_net_vf_bar",
+ NFP_NET_CFG_BAR_SZ * pf->limit_vfs,
+ &pf->vf_cfg_bar);
if (IS_ERR(pf->vf_cfg_mem)) {
if (PTR_ERR(pf->vf_cfg_mem) != -ENOENT) {
err = PTR_ERR(pf->vf_cfg_mem);
}
min_size = NFP_NET_VF_CFG_SZ * pf->limit_vfs + NFP_NET_VF_CFG_MB_SZ;
- pf->vfcfg_tbl2 = nfp_net_pf_map_rtsym(pf, "net.vfcfg_tbl2",
- "_pf%d_net_vf_cfg2",
- min_size, &pf->vfcfg_tbl2_area);
+ pf->vfcfg_tbl2 = nfp_pf_map_rtsym(pf, "net.vfcfg_tbl2",
+ "_pf%d_net_vf_cfg2",
+ min_size, &pf->vfcfg_tbl2_area);
if (IS_ERR(pf->vfcfg_tbl2)) {
if (PTR_ERR(pf->vfcfg_tbl2) != -ENOENT) {
err = PTR_ERR(pf->vfcfg_tbl2);
if (err)
goto err_app_clean;
+ err = nfp_shared_buf_register(pf);
+ if (err)
+ goto err_devlink_unreg;
+
mutex_lock(&pf->lock);
pf->ddir = nfp_net_debugfs_device_add(pf->pdev);
err_clean_ddir:
nfp_net_debugfs_dir_clean(&pf->ddir);
mutex_unlock(&pf->lock);
+ nfp_shared_buf_unregister(pf);
+err_devlink_unreg:
cancel_work_sync(&pf->port_refresh_work);
devlink_unregister(devlink);
err_app_clean:
mutex_unlock(&pf->lock);
+ nfp_shared_buf_unregister(pf);
devlink_unregister(priv_to_devlink(pf));
nfp_net_pf_free_irqs(pf);
return NULL;
}
-static void nfp_repr_clean_and_free(struct nfp_repr *repr)
+void nfp_repr_clean_and_free(struct nfp_repr *repr)
{
nfp_info(repr->app->cpp, "Destroying Representor(%s)\n",
repr->netdev->name);
* @port: Port of representor
* @app: APP handle
* @stats: Statistic of packets hitting CPU
+ * @app_priv: Pointer for APP data
*/
struct nfp_repr {
struct net_device *netdev;
struct nfp_port *port;
struct nfp_app *app;
struct nfp_repr_pcpu_stats __percpu *stats;
+ void *app_priv;
};
/**
struct net_device *pf_netdev);
void nfp_repr_free(struct net_device *netdev);
struct net_device *nfp_repr_alloc(struct nfp_app *app);
+void nfp_repr_clean_and_free(struct nfp_repr *repr);
void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs);
void nfp_reprs_clean_and_free_by_type(struct nfp_app *app,
enum nfp_repr_type type);
nfp_net_info(nn);
vf->ddir = nfp_net_debugfs_device_add(pdev);
- nfp_net_debugfs_vnic_add(nn, vf->ddir, 0);
+ nfp_net_debugfs_vnic_add(nn, vf->ddir);
return 0;
eth_port->label_subport);
break;
case NFP_PORT_PF_PORT:
- n = snprintf(name, len, "pf%d", port->pf_id);
+ if (!port->pf_split)
+ n = snprintf(name, len, "pf%d", port->pf_id);
+ else
+ n = snprintf(name, len, "pf%ds%d", port->pf_id,
+ port->pf_split_id);
break;
case NFP_PORT_VF_PORT:
n = snprintf(name, len, "pf%dvf%d", port->pf_id, port->vf_id);
eth_port = __nfp_port_get_eth_port(port);
if (!eth_port)
return 0;
+ if (port->eth_forced)
+ return 0;
err = nfp_eth_set_configured(port->app->cpp, eth_port->index, configed);
return err < 0 && err != -EOPNOTSUPP ? err : 0;
* @app: backpointer to the app structure
* @dl_port: devlink port structure
* @eth_id: for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme
+ * @eth_forced: for %NFP_PORT_PHYS_PORT port is forced UP or DOWN, don't change
* @eth_port: for %NFP_PORT_PHYS_PORT translated ETH Table port entry
* @eth_stats: for %NFP_PORT_PHYS_PORT MAC stats if available
* @pf_id: for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT ID of the PCI PF (0-3)
* @vf_id: for %NFP_PORT_VF_PORT ID of the PCI VF within @pf_id
+ * @pf_split: for %NFP_PORT_PF_PORT %true if PCI PF has more than one vNIC
+ * @pf_split_id:for %NFP_PORT_PF_PORT ID of PCI PF vNIC (valid if @pf_split)
* @vnic: for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT vNIC ctrl memory
* @port_list: entry on pf's list of ports
*/
/* NFP_PORT_PHYS_PORT */
struct {
unsigned int eth_id;
+ bool eth_forced;
struct nfp_eth_table_port *eth_port;
u8 __iomem *eth_stats;
};
struct {
unsigned int pf_id;
unsigned int vf_id;
+ bool pf_split;
+ unsigned int pf_split_id;
u8 __iomem *vnic;
};
};
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <net/devlink.h>
+
+#include "nfpcore/nfp_cpp.h"
+#include "nfpcore/nfp_nffw.h"
+#include "nfp_abi.h"
+#include "nfp_app.h"
+#include "nfp_main.h"
+
+static u32 nfp_shared_buf_pool_unit(struct nfp_pf *pf, unsigned int sb)
+{
+ __le32 sb_id = cpu_to_le32(sb);
+ unsigned int i;
+
+ for (i = 0; i < pf->num_shared_bufs; i++)
+ if (pf->shared_bufs[i].id == sb_id)
+ return le32_to_cpu(pf->shared_bufs[i].pool_size_unit);
+
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+int nfp_shared_buf_pool_get(struct nfp_pf *pf, unsigned int sb, u16 pool_index,
+ struct devlink_sb_pool_info *pool_info)
+{
+ struct nfp_shared_buf_pool_info_get get_data;
+ struct nfp_shared_buf_pool_id id = {
+ .shared_buf = cpu_to_le32(sb),
+ .pool = cpu_to_le32(pool_index),
+ };
+ unsigned int unit_size;
+ int n;
+
+ unit_size = nfp_shared_buf_pool_unit(pf, sb);
+ if (!unit_size)
+ return -EINVAL;
+
+ n = nfp_mbox_cmd(pf, NFP_MBOX_POOL_GET, &id, sizeof(id),
+ &get_data, sizeof(get_data));
+ if (n < 0)
+ return n;
+ if (n < sizeof(get_data))
+ return -EIO;
+
+ pool_info->pool_type = le32_to_cpu(get_data.pool_type);
+ pool_info->threshold_type = le32_to_cpu(get_data.threshold_type);
+ pool_info->size = le32_to_cpu(get_data.size) * unit_size;
+
+ return 0;
+}
+
+int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type)
+{
+ struct nfp_shared_buf_pool_info_set set_data = {
+ .id = {
+ .shared_buf = cpu_to_le32(sb),
+ .pool = cpu_to_le32(pool_index),
+ },
+ .threshold_type = cpu_to_le32(threshold_type),
+ };
+ unsigned int unit_size;
+
+ unit_size = nfp_shared_buf_pool_unit(pf, sb);
+ if (!unit_size || size % unit_size)
+ return -EINVAL;
+ set_data.size = cpu_to_le32(size / unit_size);
+
+ return nfp_mbox_cmd(pf, NFP_MBOX_POOL_SET, &set_data, sizeof(set_data),
+ NULL, 0);
+}
+
+int nfp_shared_buf_register(struct nfp_pf *pf)
+{
+ struct devlink *devlink = priv_to_devlink(pf);
+ unsigned int i, num_entries, entry_sz;
+ struct nfp_cpp_area *sb_desc_area;
+ u8 __iomem *sb_desc;
+ int n, err;
+
+ if (!pf->mbox)
+ return 0;
+
+ n = nfp_pf_rtsym_read_optional(pf, NFP_SHARED_BUF_COUNT_SYM_NAME, 0);
+ if (n <= 0)
+ return n;
+ num_entries = n;
+
+ sb_desc = nfp_pf_map_rtsym(pf, "sb_tbl", NFP_SHARED_BUF_TABLE_SYM_NAME,
+ num_entries * sizeof(pf->shared_bufs[0]),
+ &sb_desc_area);
+ if (IS_ERR(sb_desc))
+ return PTR_ERR(sb_desc);
+
+ entry_sz = nfp_cpp_area_size(sb_desc_area) / num_entries;
+
+ pf->shared_bufs = kmalloc_array(num_entries, sizeof(pf->shared_bufs[0]),
+ GFP_KERNEL);
+ if (!pf->shared_bufs) {
+ err = -ENOMEM;
+ goto err_release_area;
+ }
+
+ for (i = 0; i < num_entries; i++) {
+ struct nfp_shared_buf *sb = &pf->shared_bufs[i];
+
+ /* Entries may be larger in future FW */
+ memcpy_fromio(sb, sb_desc + i * entry_sz, sizeof(*sb));
+
+ err = devlink_sb_register(devlink,
+ le32_to_cpu(sb->id),
+ le32_to_cpu(sb->size),
+ le16_to_cpu(sb->ingress_pools_count),
+ le16_to_cpu(sb->egress_pools_count),
+ le16_to_cpu(sb->ingress_tc_count),
+ le16_to_cpu(sb->egress_tc_count));
+ if (err)
+ goto err_unreg_prev;
+ }
+ pf->num_shared_bufs = num_entries;
+
+ nfp_cpp_area_release_free(sb_desc_area);
+
+ return 0;
+
+err_unreg_prev:
+ while (i--)
+ devlink_sb_unregister(devlink,
+ le32_to_cpu(pf->shared_bufs[i].id));
+ kfree(pf->shared_bufs);
+err_release_area:
+ nfp_cpp_area_release_free(sb_desc_area);
+ return err;
+}
+
+void nfp_shared_buf_unregister(struct nfp_pf *pf)
+{
+ struct devlink *devlink = priv_to_devlink(pf);
+ unsigned int i;
+
+ for (i = 0; i < pf->num_shared_bufs; i++)
+ devlink_sb_unregister(devlink,
+ le32_to_cpu(pf->shared_bufs[i].id));
+ kfree(pf->shared_bufs);
+}
u32 *wrptr32 = kernel_vaddr;
const u32 __iomem *rdptr32;
int n, width;
- bool is_64;
priv = nfp_cpp_area_priv(area);
rdptr64 = priv->iomem + offset;
return -EFAULT;
width = priv->width.read;
-
if (width <= 0)
return -EINVAL;
+ /* MU reads via a PCIe2CPP BAR support 32bit (and other) lengths */
+ if (priv->target == (NFP_CPP_TARGET_MU & NFP_CPP_TARGET_ID_MASK) &&
+ priv->action == NFP_CPP_ACTION_RW &&
+ (offset % sizeof(u64) == 4 || length % sizeof(u64) == 4))
+ width = TARGET_WIDTH_32;
+
/* Unaligned? Translate to an explicit access */
if ((priv->offset + offset) & (width - 1))
return nfp_cpp_explicit_read(nfp_cpp_area_cpp(area),
priv->offset + offset,
kernel_vaddr, length, width);
- is_64 = width == TARGET_WIDTH_64;
-
- /* MU reads via a PCIe2CPP BAR supports 32bit (and other) lengths */
- if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) &&
- priv->action == NFP_CPP_ACTION_RW)
- is_64 = false;
+ if (WARN_ON(!priv->bar))
+ return -EFAULT;
- if (is_64) {
- if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0)
- return -EINVAL;
- } else {
+ switch (width) {
+ case TARGET_WIDTH_32:
if (offset % sizeof(u32) != 0 || length % sizeof(u32) != 0)
return -EINVAL;
- }
- if (WARN_ON(!priv->bar))
- return -EFAULT;
+ for (n = 0; n < length; n += sizeof(u32))
+ *wrptr32++ = __raw_readl(rdptr32++);
+ return n;
+#ifdef __raw_readq
+ case TARGET_WIDTH_64:
+ if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0)
+ return -EINVAL;
- if (is_64)
-#ifndef __raw_readq
- return -EINVAL;
-#else
for (n = 0; n < length; n += sizeof(u64))
*wrptr64++ = __raw_readq(rdptr64++);
+ return n;
#endif
- else
- for (n = 0; n < length; n += sizeof(u32))
- *wrptr32++ = __raw_readl(rdptr32++);
-
- return n;
+ default:
+ return -EINVAL;
+ }
}
static int
struct nfp6000_area_priv *priv;
u32 __iomem *wrptr32;
int n, width;
- bool is_64;
priv = nfp_cpp_area_priv(area);
wrptr64 = priv->iomem + offset;
return -EFAULT;
width = priv->width.write;
-
if (width <= 0)
return -EINVAL;
+ /* MU writes via a PCIe2CPP BAR support 32bit (and other) lengths */
+ if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) &&
+ priv->action == NFP_CPP_ACTION_RW &&
+ (offset % sizeof(u64) == 4 || length % sizeof(u64) == 4))
+ width = TARGET_WIDTH_32;
+
/* Unaligned? Translate to an explicit access */
if ((priv->offset + offset) & (width - 1))
return nfp_cpp_explicit_write(nfp_cpp_area_cpp(area),
priv->offset + offset,
kernel_vaddr, length, width);
- is_64 = width == TARGET_WIDTH_64;
-
- /* MU writes via a PCIe2CPP BAR supports 32bit (and other) lengths */
- if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) &&
- priv->action == NFP_CPP_ACTION_RW)
- is_64 = false;
+ if (WARN_ON(!priv->bar))
+ return -EFAULT;
- if (is_64) {
- if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0)
- return -EINVAL;
- } else {
+ switch (width) {
+ case TARGET_WIDTH_32:
if (offset % sizeof(u32) != 0 || length % sizeof(u32) != 0)
return -EINVAL;
- }
- if (WARN_ON(!priv->bar))
- return -EFAULT;
+ for (n = 0; n < length; n += sizeof(u32)) {
+ __raw_writel(*rdptr32++, wrptr32++);
+ wmb();
+ }
+ return n;
+#ifdef __raw_writeq
+ case TARGET_WIDTH_64:
+ if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0)
+ return -EINVAL;
- if (is_64)
-#ifndef __raw_writeq
- return -EINVAL;
-#else
for (n = 0; n < length; n += sizeof(u64)) {
__raw_writeq(*rdptr64++, wrptr64++);
wmb();
}
+ return n;
#endif
- else
- for (n = 0; n < length; n += sizeof(u32)) {
- __raw_writel(*rdptr32++, wrptr32++);
- wmb();
- }
-
- return n;
+ default:
+ return -EINVAL;
+ }
}
struct nfp6000_explicit_priv {
return ret;
}
+static u32 netvsc_get_msglevel(struct net_device *ndev)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+
+ return ndev_ctx->msg_enable;
+}
+
+static void netvsc_set_msglevel(struct net_device *ndev, u32 val)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+
+ ndev_ctx->msg_enable = val;
+}
+
static const struct ethtool_ops ethtool_ops = {
.get_drvinfo = netvsc_get_drvinfo,
+ .get_msglevel = netvsc_get_msglevel,
+ .set_msglevel = netvsc_set_msglevel,
.get_link = ethtool_op_get_link,
.get_ethtool_stats = netvsc_get_ethtool_stats,
.get_sset_count = netvsc_get_sset_count,
if (pdev->dev.of_node) {
struct device_node *node = pdev->dev.of_node;
const struct of_device_id *id;
+ struct i2c_adapter *i2c;
struct device_node *np;
id = of_match_node(sfp_of_match, node);
sff = sfp->type = id->data;
np = of_parse_phandle(node, "i2c-bus", 0);
- if (np) {
- struct i2c_adapter *i2c;
-
- i2c = of_find_i2c_adapter_by_node(np);
- of_node_put(np);
- if (!i2c)
- return -EPROBE_DEFER;
-
- err = sfp_i2c_configure(sfp, i2c);
- if (err < 0) {
- i2c_put_adapter(i2c);
- return err;
- }
+ if (!np) {
+ dev_err(sfp->dev, "missing 'i2c-bus' property\n");
+ return -ENODEV;
+ }
+
+ i2c = of_find_i2c_adapter_by_node(np);
+ of_node_put(np);
+ if (!i2c)
+ return -EPROBE_DEFER;
+
+ err = sfp_i2c_configure(sfp, i2c);
+ if (err < 0) {
+ i2c_put_adapter(i2c);
+ return err;
}
}
if (poll)
mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
+ /* We could have an issue in cases no Tx disable pin is available or
+ * wired as modules using a laser as their light source will continue to
+ * be active when the fiber is removed. This could be a safety issue and
+ * we should at least warn the user about that.
+ */
+ if (!sfp->gpio[GPIO_TX_DISABLE])
+ dev_warn(sfp->dev,
+ "No tx_disable pin: SFP modules will always be emitting.\n");
+
return 0;
}
}
team->dev->vlan_features = vlan_features;
- team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL;
+ team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
+ NETIF_F_GSO_UDP_L4;
team->dev->hard_header_len = max_hard_header_len;
team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;
- dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
+ dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
dev->features |= dev->hw_features;
}
/*
* sys_execve() executes a new program.
*/
-static int do_execveat_common(int fd, struct filename *filename,
- struct user_arg_ptr argv,
- struct user_arg_ptr envp,
- int flags)
+static int __do_execve_file(int fd, struct filename *filename,
+ struct user_arg_ptr argv,
+ struct user_arg_ptr envp,
+ int flags, struct file *file)
{
char *pathbuf = NULL;
struct linux_binprm *bprm;
- struct file *file;
struct files_struct *displaced;
int retval;
check_unsafe_exec(bprm);
current->in_execve = 1;
- file = do_open_execat(fd, filename, flags);
+ if (!file)
+ file = do_open_execat(fd, filename, flags);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_unmark;
sched_exec();
bprm->file = file;
- if (fd == AT_FDCWD || filename->name[0] == '/') {
+ if (!filename) {
+ bprm->filename = "none";
+ } else if (fd == AT_FDCWD || filename->name[0] == '/') {
bprm->filename = filename->name;
} else {
if (filename->name[0] == '\0')
task_numa_free(current);
free_bprm(bprm);
kfree(pathbuf);
- putname(filename);
+ if (filename)
+ putname(filename);
if (displaced)
put_files_struct(displaced);
return retval;
if (displaced)
reset_files_struct(displaced);
out_ret:
- putname(filename);
+ if (filename)
+ putname(filename);
return retval;
}
+static int do_execveat_common(int fd, struct filename *filename,
+ struct user_arg_ptr argv,
+ struct user_arg_ptr envp,
+ int flags)
+{
+ return __do_execve_file(fd, filename, argv, envp, flags, NULL);
+}
+
+int do_execve_file(struct file *file, void *__argv, void *__envp)
+{
+ struct user_arg_ptr argv = { .ptr.native = __argv };
+ struct user_arg_ptr envp = { .ptr.native = __envp };
+
+ return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
+}
+
int do_execve(struct filename *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp)
const char __user * const __user *,
const char __user * const __user *,
int);
+int do_execve_file(struct file *file, void *__argv, void *__envp);
#endif /* _LINUX_BINFMTS_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BPFILTER_H
+#define _LINUX_BPFILTER_H
+
+#include <uapi/linux/bpfilter.h>
+
+struct sock;
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval,
+ unsigned int optlen);
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval,
+ int *optlen);
+extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
+ char __user *optval,
+ unsigned int optlen, bool is_set);
+#endif
NETIF_F_GSO_GRE_CSUM | \
NETIF_F_GSO_IPXIP4 | \
NETIF_F_GSO_IPXIP6 | \
- NETIF_F_GSO_UDP_L4 | \
NETIF_F_GSO_UDP_TUNNEL | \
NETIF_F_GSO_UDP_TUNNEL_CSUM)
const char *path;
char **argv;
char **envp;
+ struct file *file;
int wait;
int retval;
+ pid_t pid;
int (*init)(struct subprocess_info *info, struct cred *new);
void (*cleanup)(struct subprocess_info *info);
void *data;
int (*init)(struct subprocess_info *info, struct cred *new),
void (*cleanup)(struct subprocess_info *), void *data);
+struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
+ int (*init)(struct subprocess_info *info, struct cred *new),
+ void (*cleanup)(struct subprocess_info *), void *data);
+struct umh_info {
+ struct file *pipe_to_umh;
+ struct file *pipe_from_umh;
+ pid_t pid;
+};
+int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
+
extern int
call_usermodehelper_exec(struct subprocess_info *info, int wait);
int ip_misc_proc_init(void);
#endif
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+ struct netlink_ext_ack *extack);
+
#endif /* _IP_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_LINUX_BPFILTER_H
+#define _UAPI_LINUX_BPFILTER_H
+
+#include <linux/if.h>
+
+enum {
+ BPFILTER_IPT_SO_SET_REPLACE = 64,
+ BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65,
+ BPFILTER_IPT_SET_MAX,
+};
+
+enum {
+ BPFILTER_IPT_SO_GET_INFO = 64,
+ BPFILTER_IPT_SO_GET_ENTRIES = 65,
+ BPFILTER_IPT_SO_GET_REVISION_MATCH = 66,
+ BPFILTER_IPT_SO_GET_REVISION_TARGET = 67,
+ BPFILTER_IPT_GET_MAX,
+};
+
+#endif /* _UAPI_LINUX_BPFILTER_H */
RTA_PAD,
RTA_UID,
RTA_TTL_PROPAGATE,
+ RTA_IP_PROTO,
+ RTA_SPORT,
+ RTA_DPORT,
__RTA_MAX
};
#include <linux/ptrace.h>
#include <linux/async.h>
#include <linux/uaccess.h>
+#include <linux/shmem_fs.h>
+#include <linux/pipe_fs_i.h>
#include <trace/events/module.h>
commit_creds(new);
- retval = do_execve(getname_kernel(sub_info->path),
- (const char __user *const __user *)sub_info->argv,
- (const char __user *const __user *)sub_info->envp);
+ if (sub_info->file)
+ retval = do_execve_file(sub_info->file,
+ sub_info->argv, sub_info->envp);
+ else
+ retval = do_execve(getname_kernel(sub_info->path),
+ (const char __user *const __user *)sub_info->argv,
+ (const char __user *const __user *)sub_info->envp);
out:
sub_info->retval = retval;
/*
if (pid < 0) {
sub_info->retval = pid;
umh_complete(sub_info);
+ } else {
+ sub_info->pid = pid;
}
}
}
}
EXPORT_SYMBOL(call_usermodehelper_setup);
+struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
+ int (*init)(struct subprocess_info *info, struct cred *new),
+ void (*cleanup)(struct subprocess_info *info), void *data)
+{
+ struct subprocess_info *sub_info;
+
+ sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL);
+ if (!sub_info)
+ return NULL;
+
+ INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
+ sub_info->path = "none";
+ sub_info->file = file;
+ sub_info->init = init;
+ sub_info->cleanup = cleanup;
+ sub_info->data = data;
+ return sub_info;
+}
+
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+{
+ struct umh_info *umh_info = info->data;
+ struct file *from_umh[2];
+ struct file *to_umh[2];
+ int err;
+
+ /* create pipe to send data to umh */
+ err = create_pipe_files(to_umh, 0);
+ if (err)
+ return err;
+ err = replace_fd(0, to_umh[0], 0);
+ fput(to_umh[0]);
+ if (err < 0) {
+ fput(to_umh[1]);
+ return err;
+ }
+
+ /* create pipe to receive data from umh */
+ err = create_pipe_files(from_umh, 0);
+ if (err) {
+ fput(to_umh[1]);
+ replace_fd(0, NULL, 0);
+ return err;
+ }
+ err = replace_fd(1, from_umh[1], 0);
+ fput(from_umh[1]);
+ if (err < 0) {
+ fput(to_umh[1]);
+ replace_fd(0, NULL, 0);
+ fput(from_umh[0]);
+ return err;
+ }
+
+ umh_info->pipe_to_umh = to_umh[1];
+ umh_info->pipe_from_umh = from_umh[0];
+ return 0;
+}
+
+static void umh_save_pid(struct subprocess_info *info)
+{
+ struct umh_info *umh_info = info->data;
+
+ umh_info->pid = info->pid;
+}
+
+/**
+ * fork_usermode_blob - fork a blob of bytes as a usermode process
+ * @data: a blob of bytes that can be do_execv-ed as a file
+ * @len: length of the blob
+ * @info: information about usermode process (shouldn't be NULL)
+ *
+ * Returns either negative error or zero which indicates success
+ * in executing a blob of bytes as a usermode process. In such
+ * case 'struct umh_info *info' is populated with two pipes
+ * and a pid of the process. The caller is responsible for health
+ * check of the user process, killing it via pid, and closing the
+ * pipes when user process is no longer needed.
+ */
+int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
+{
+ struct subprocess_info *sub_info;
+ struct file *file;
+ ssize_t written;
+ loff_t pos = 0;
+ int err;
+
+ file = shmem_kernel_file_setup("", len, 0);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ written = kernel_write(file, data, len, &pos);
+ if (written != len) {
+ err = written;
+ if (err >= 0)
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = -ENOMEM;
+ sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup,
+ umh_save_pid, info);
+ if (!sub_info)
+ goto out;
+
+ err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+out:
+ fput(file);
+ return err;
+}
+EXPORT_SYMBOL_GPL(fork_usermode_blob);
+
/**
* call_usermodehelper_exec - start a usermode application
* @sub_info: information about the subprocessa
endif
+source "net/bpfilter/Kconfig"
+
source "net/dccp/Kconfig"
source "net/sctp/Kconfig"
source "net/rds/Kconfig"
obj-$(CONFIG_XFRM) += xfrm/
obj-$(CONFIG_UNIX) += unix/
obj-$(CONFIG_NET) += ipv6/
+obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
--- /dev/null
+menuconfig BPFILTER
+ bool "BPF based packet filtering framework (BPFILTER)"
+ default n
+ depends on NET && BPF
+ help
+ This builds experimental bpfilter framework that is aiming to
+ provide netfilter compatible functionality via BPF
+
+if BPFILTER
+config BPFILTER_UMH
+ tristate "bpfilter kernel module with user mode helper"
+ default m
+ help
+ This builds bpfilter kernel module with embedded user mode helper
+endif
+
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Linux BPFILTER layer.
+#
+
+hostprogs-y := bpfilter_umh
+bpfilter_umh-objs := main.o
+HOSTCFLAGS += -I. -Itools/include/
+ifeq ($(CONFIG_BPFILTER_UMH), y)
+# builtin bpfilter_umh should be compiled with -static
+# since rootfs isn't mounted at the time of __init
+# function is called and do_execv won't find elf interpreter
+HOSTLDFLAGS += -static
+endif
+
+# a bit of elf magic to convert bpfilter_umh binary into a binary blob
+# inside bpfilter_umh.o elf file referenced by
+# _binary_net_bpfilter_bpfilter_umh_start symbol
+# which bpfilter_kern.c passes further into umh blob loader at run-time
+quiet_cmd_copy_umh = GEN $@
+ cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
+ $(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \
+ -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \
+ --rename-section .data=.init.rodata $< $@
+
+$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh
+ $(call cmd,copy_umh)
+
+obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
+bpfilter-objs += bpfilter_kern.o bpfilter_umh.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/umh.h>
+#include <linux/bpfilter.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include "msgfmt.h"
+
+#define UMH_start _binary_net_bpfilter_bpfilter_umh_start
+#define UMH_end _binary_net_bpfilter_bpfilter_umh_end
+
+extern char UMH_start;
+extern char UMH_end;
+
+static struct umh_info info;
+/* since ip_getsockopt() can run in parallel, serialize access to umh */
+static DEFINE_MUTEX(bpfilter_lock);
+
+static void shutdown_umh(struct umh_info *info)
+{
+ struct task_struct *tsk;
+
+ tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID);
+ if (tsk)
+ force_sig(SIGKILL, tsk);
+ fput(info->pipe_to_umh);
+ fput(info->pipe_from_umh);
+}
+
+static void __stop_umh(void)
+{
+ if (bpfilter_process_sockopt) {
+ bpfilter_process_sockopt = NULL;
+ shutdown_umh(&info);
+ }
+}
+
+static void stop_umh(void)
+{
+ mutex_lock(&bpfilter_lock);
+ __stop_umh();
+ mutex_unlock(&bpfilter_lock);
+}
+
+static int __bpfilter_process_sockopt(struct sock *sk, int optname,
+ char __user *optval,
+ unsigned int optlen, bool is_set)
+{
+ struct mbox_request req;
+ struct mbox_reply reply;
+ loff_t pos;
+ ssize_t n;
+ int ret;
+
+ req.is_set = is_set;
+ req.pid = current->pid;
+ req.cmd = optname;
+ req.addr = (long)optval;
+ req.len = optlen;
+ mutex_lock(&bpfilter_lock);
+ n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos);
+ if (n != sizeof(req)) {
+ pr_err("write fail %zd\n", n);
+ __stop_umh();
+ ret = -EFAULT;
+ goto out;
+ }
+ pos = 0;
+ n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos);
+ if (n != sizeof(reply)) {
+ pr_err("read fail %zd\n", n);
+ __stop_umh();
+ ret = -EFAULT;
+ goto out;
+ }
+ ret = reply.status;
+out:
+ mutex_unlock(&bpfilter_lock);
+ return ret;
+}
+
+static int __init load_umh(void)
+{
+ int err;
+
+ /* fork usermode process */
+ err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info);
+ if (err)
+ return err;
+ pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
+
+ /* health check that usermode process started correctly */
+ if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) {
+ stop_umh();
+ return -EFAULT;
+ }
+ bpfilter_process_sockopt = &__bpfilter_process_sockopt;
+ return 0;
+}
+
+static void __exit fini_umh(void)
+{
+ stop_umh();
+}
+module_init(load_umh);
+module_exit(fini_umh);
+MODULE_LICENSE("GPL");
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/uio.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "include/uapi/linux/bpf.h"
+#include <asm/unistd.h>
+#include "msgfmt.h"
+
+int debug_fd;
+
+static int handle_get_cmd(struct mbox_request *cmd)
+{
+ switch (cmd->cmd) {
+ case 0:
+ return 0;
+ default:
+ break;
+ }
+ return -ENOPROTOOPT;
+}
+
+static int handle_set_cmd(struct mbox_request *cmd)
+{
+ return -ENOPROTOOPT;
+}
+
+static void loop(void)
+{
+ while (1) {
+ struct mbox_request req;
+ struct mbox_reply reply;
+ int n;
+
+ n = read(0, &req, sizeof(req));
+ if (n != sizeof(req)) {
+ dprintf(debug_fd, "invalid request %d\n", n);
+ return;
+ }
+
+ reply.status = req.is_set ?
+ handle_set_cmd(&req) :
+ handle_get_cmd(&req);
+
+ n = write(1, &reply, sizeof(reply));
+ if (n != sizeof(reply)) {
+ dprintf(debug_fd, "reply failed %d\n", n);
+ return;
+ }
+ }
+}
+
+int main(void)
+{
+ debug_fd = open("/dev/console", 00000002 | 00000100);
+ dprintf(debug_fd, "Started bpfilter\n");
+ loop();
+ close(debug_fd);
+ return 0;
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_BPFILTER_MSGFMT_H
+#define _NET_BPFILTER_MSGFMT_H
+
+struct mbox_request {
+ __u64 addr;
+ __u32 len;
+ __u32 is_set;
+ __u32 cmd;
+ __u32 pid;
+};
+
+struct mbox_reply {
+ __u32 status;
+};
+
+#endif
.doit = devlink_nl_cmd_eswitch_set_doit,
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
- metrics.o
+ metrics.o netlink.o
+
+obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
--- /dev/null
+obj-$(CONFIG_BPFILTER) += sockopt.o
+
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/uaccess.h>
+#include <linux/bpfilter.h>
+#include <uapi/linux/bpf.h>
+#include <linux/wait.h>
+#include <linux/kmod.h>
+
+int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
+ char __user *optval,
+ unsigned int optlen, bool is_set);
+EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
+
+int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval,
+ unsigned int optlen, bool is_set)
+{
+ if (!bpfilter_process_sockopt) {
+ int err = request_module("bpfilter");
+
+ if (err)
+ return err;
+ if (!bpfilter_process_sockopt)
+ return -ECHILD;
+ }
+ return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set);
+}
+
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
+ unsigned int optlen)
+{
+ return bpfilter_mbox_request(sk, optname, optval, optlen, true);
+}
+
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
+ int __user *optlen)
+{
+ int len;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ return bpfilter_mbox_request(sk, optname, optval, len, false);
+}
[RTA_ENCAP] = { .type = NLA_NESTED },
[RTA_UID] = { .type = NLA_U32 },
[RTA_MARK] = { .type = NLA_U32 },
+ [RTA_IP_PROTO] = { .type = NLA_U8 },
+ [RTA_SPORT] = { .type = NLA_U16 },
+ [RTA_DPORT] = { .type = NLA_U16 },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
#include <linux/errqueue.h>
#include <linux/uaccess.h>
+#include <linux/bpfilter.h>
+
/*
* SOL_IP control messages.
*/
return -ENOPROTOOPT;
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_BPFILTER
+ if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
+ optname < BPFILTER_IPT_SET_MAX)
+ err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
+#endif
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
int err;
err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
+#ifdef CONFIG_BPFILTER
+ if (optname >= BPFILTER_IPT_SO_GET_INFO &&
+ optname < BPFILTER_IPT_GET_MAX)
+ err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
+#endif
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
err = do_ip_getsockopt(sk, level, optname, optval, optlen,
MSG_CMSG_COMPAT);
+#ifdef CONFIG_BPFILTER
+ if (optname >= BPFILTER_IPT_SO_GET_INFO &&
+ optname < BPFILTER_IPT_GET_MAX)
+ err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
+#endif
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
--- /dev/null
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/types.h>
+#include <net/net_namespace.h>
+#include <net/netlink.h>
+#include <net/ip.h>
+
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+ struct netlink_ext_ack *extack)
+{
+ *ip_proto = nla_get_u8(attr);
+
+ switch (*ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_ICMP:
+ return 0;
+ default:
+ NL_SET_ERR_MSG(extack, "Unsupported ip proto");
+ return -EOPNOTSUPP;
+ }
+}
+EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto);
EXPORT_SYMBOL_GPL(ip_route_output_flow);
/* called with rcu_read_lock held */
-static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
- struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
- u32 seq)
+static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
+ struct rtable *rt, u32 table_id, struct flowi4 *fl4,
+ struct sk_buff *skb, u32 portid, u32 seq)
{
- struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
struct nlmsghdr *nlh;
unsigned long expires = 0;
}
} else
#endif
- if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
+ if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
goto nla_put_failure;
}
return -EMSGSIZE;
}
+static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
+ u8 ip_proto, __be16 sport,
+ __be16 dport)
+{
+ struct sk_buff *skb;
+ struct iphdr *iph;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+
+ /* Reserve room for dummy headers, this skb can pass
+ * through good chunk of routing engine.
+ */
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb->protocol = htons(ETH_P_IP);
+ iph = skb_put(skb, sizeof(struct iphdr));
+ iph->protocol = ip_proto;
+ iph->saddr = src;
+ iph->daddr = dst;
+ iph->version = 0x4;
+ iph->frag_off = 0;
+ iph->ihl = 0x5;
+ skb_set_transport_header(skb, skb->len);
+
+ switch (iph->protocol) {
+ case IPPROTO_UDP: {
+ struct udphdr *udph;
+
+ udph = skb_put_zero(skb, sizeof(struct udphdr));
+ udph->source = sport;
+ udph->dest = dport;
+ udph->len = sizeof(struct udphdr);
+ udph->check = 0;
+ break;
+ }
+ case IPPROTO_TCP: {
+ struct tcphdr *tcph;
+
+ tcph = skb_put_zero(skb, sizeof(struct tcphdr));
+ tcph->source = sport;
+ tcph->dest = dport;
+ tcph->doff = sizeof(struct tcphdr) / 4;
+ tcph->rst = 1;
+ tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
+ src, dst, 0);
+ break;
+ }
+ case IPPROTO_ICMP: {
+ struct icmphdr *icmph;
+
+ icmph = skb_put_zero(skb, sizeof(struct icmphdr));
+ icmph->type = ICMP_ECHO;
+ icmph->code = 0;
+ }
+ }
+
+ return skb;
+}
+
static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
- struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
+ u32 table_id = RT_TABLE_MAIN;
+ __be16 sport = 0, dport = 0;
struct fib_result res = {};
+ u8 ip_proto = IPPROTO_UDP;
struct rtable *rt = NULL;
+ struct sk_buff *skb;
+ struct rtmsg *rtm;
struct flowi4 fl4;
__be32 dst = 0;
__be32 src = 0;
+ kuid_t uid;
u32 iif;
int err;
int mark;
- struct sk_buff *skb;
- u32 table_id = RT_TABLE_MAIN;
- kuid_t uid;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
extack);
if (err < 0)
- goto errout;
+ return err;
rtm = nlmsg_data(nlh);
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb) {
- err = -ENOBUFS;
- goto errout;
- }
-
- /* Reserve room for dummy headers, this skb can pass
- through good chunk of routing engine.
- */
- skb_reset_mac_header(skb);
- skb_reset_network_header(skb);
-
src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
else
uid = (iif ? INVALID_UID : current_uid());
- /* Bugfix: need to give ip_route_input enough of an IP header to
- * not gag.
- */
- ip_hdr(skb)->protocol = IPPROTO_UDP;
- ip_hdr(skb)->saddr = src;
- ip_hdr(skb)->daddr = dst;
+ if (tb[RTA_IP_PROTO]) {
+ err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
+ &ip_proto, extack);
+ if (err)
+ return err;
+ }
+
+ if (tb[RTA_SPORT])
+ sport = nla_get_be16(tb[RTA_SPORT]);
- skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+ if (tb[RTA_DPORT])
+ dport = nla_get_be16(tb[RTA_DPORT]);
+
+ skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
+ if (!skb)
+ return -ENOBUFS;
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst;
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
+ if (sport)
+ fl4.fl4_sport = sport;
+ if (dport)
+ fl4.fl4_dport = dport;
+ fl4.flowi4_proto = ip_proto;
rcu_read_lock();
dev = dev_get_by_index_rcu(net, iif);
if (!dev) {
err = -ENODEV;
- goto errout_free;
+ goto errout_rcu;
}
- skb->protocol = htons(ETH_P_IP);
+ fl4.flowi4_iif = iif; /* for rt_fill_info */
skb->dev = dev;
skb->mark = mark;
err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
}
if (err)
- goto errout_free;
+ goto errout_rcu;
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
table_id = res.table ? res.table->tb_id : 0;
+ /* reset skb for netlink reply msg */
+ skb_trim(skb, 0);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_reset_mac_header(skb);
+
if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
if (!res.fi) {
err = fib_props[res.type].error;
if (!err)
err = -EHOSTUNREACH;
- goto errout_free;
+ goto errout_rcu;
}
err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
rt->rt_type, res.prefix, res.prefixlen,
fl4.flowi4_tos, res.fi, 0);
} else {
- err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
+ err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
}
if (err < 0)
- goto errout_free;
+ goto errout_rcu;
rcu_read_unlock();
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
-errout:
- return err;
errout_free:
+ return err;
+errout_rcu:
rcu_read_unlock();
kfree_skb(skb);
- goto errout;
+ goto errout_free;
}
void ip_rt_multicast_event(struct in_device *in_dev)
return -EINVAL;
if (sk->sk_no_check_tx)
return -EINVAL;
- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+ if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
+ dst_xfrm(skb_dst(skb)))
return -EIO;
skb_shinfo(skb)->gso_size = cork->gso_size;
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
+#include <net/ip.h>
#include <trace/events/fib6.h>
#include <linux/uaccess.h>
[RTA_UID] = { .type = NLA_U32 },
[RTA_MARK] = { .type = NLA_U32 },
[RTA_TABLE] = { .type = NLA_U32 },
+ [RTA_IP_PROTO] = { .type = NLA_U8 },
+ [RTA_SPORT] = { .type = NLA_U16 },
+ [RTA_DPORT] = { .type = NLA_U16 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
else
fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
+ if (tb[RTA_SPORT])
+ fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
+
+ if (tb[RTA_DPORT])
+ fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
+
+ if (tb[RTA_IP_PROTO]) {
+ err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
+ &fl6.flowi6_proto, extack);
+ if (err)
+ goto errout;
+ }
+
if (iif) {
struct net_device *dev;
int flags = 0;
return -EINVAL;
if (udp_sk(sk)->no_check6_tx)
return -EINVAL;
- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+ if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
+ dst_xfrm(skb_dst(skb)))
return -EIO;
skb_shinfo(skb)->gso_size = cork->gso_size;
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
-TEST_PROGS += udpgso_bench.sh
+TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB rules API
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
+
+RTABLE=100
+GW_IP4=192.51.100.2
+SRC_IP=192.51.100.3
+GW_IP6=2001:db8:1::2
+SRC_IP6=2001:db8:1::3
+
+DEV_ADDR=192.51.100.1
+DEV=dummy0
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ else
+ nfail=$((nfail+1))
+ printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "######################################################################"
+ echo "TEST SECTION: $*"
+ echo "######################################################################"
+}
+
+setup()
+{
+ set -e
+ ip netns add testns
+ $IP link set dev lo up
+
+ $IP link add dummy0 type dummy
+ $IP link set dev dummy0 up
+ $IP address add 198.51.100.1/24 dev dummy0
+ $IP -6 address add 2001:db8:1::1/64 dev dummy0
+
+ set +e
+}
+
+cleanup()
+{
+ $IP link del dev dummy0 &> /dev/null
+ ip netns del testns
+}
+
+fib_check_iproute_support()
+{
+ ip rule help 2>&1 | grep -q $1
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 iprule too old, missing $1 match"
+ return 1
+ fi
+
+ ip route get help 2>&1 | grep -q $2
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 get route too old, missing $2 match"
+ return 1
+ fi
+
+ return 0
+}
+
+fib_rule6_del()
+{
+ $IP -6 rule del $1
+ log_test $? 0 "rule6 del $1"
+}
+
+fib_rule6_del_by_pref()
+{
+ pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ $IP -6 rule del pref $pref
+}
+
+fib_rule6_test_match_n_redirect()
+{
+ local match="$1"
+ local getmatch="$2"
+
+ $IP -6 rule add $match table $RTABLE
+ $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
+ log_test $? 0 "rule6 check: $1"
+
+ fib_rule6_del_by_pref "$match"
+ log_test $? 0 "rule6 del by pref: $match"
+}
+
+fib_rule6_test()
+{
+ # setup the fib rule redirect route
+ $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
+
+ match="oif $DEV"
+ fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+ match="from $SRC_IP6 iif $DEV"
+ fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+ match="tos 0x10"
+ fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+ match="fwmark 0x64"
+ getmatch="mark 0x64"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+ fib_check_iproute_support "uidrange" "uid"
+ if [ $? -eq 0 ]; then
+ match="uidrange 100-100"
+ getmatch="uid 100"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ fi
+
+ fib_check_iproute_support "sport" "sport"
+ if [ $? -eq 0 ]; then
+ match="sport 666 dport 777"
+ fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto tcp"
+ fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto icmp"
+ fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+ fi
+}
+
+fib_rule4_del()
+{
+ $IP rule del $1
+ log_test $? 0 "del $1"
+}
+
+fib_rule4_del_by_pref()
+{
+ pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ $IP rule del pref $pref
+}
+
+fib_rule4_test_match_n_redirect()
+{
+ local match="$1"
+ local getmatch="$2"
+
+ $IP rule add $match table $RTABLE
+ $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
+ log_test $? 0 "rule4 check: $1"
+
+ fib_rule4_del_by_pref "$match"
+ log_test $? 0 "rule4 del by pref: $match"
+}
+
+fib_rule4_test()
+{
+ # setup the fib rule redirect route
+ $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
+
+ match="oif $DEV"
+ fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+ match="from $SRC_IP iif $DEV"
+ fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+ match="tos 0x10"
+ fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+ match="fwmark 0x64"
+ getmatch="mark 0x64"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+ fib_check_iproute_support "uidrange" "uid"
+ if [ $? -eq 0 ]; then
+ match="uidrange 100-100"
+ getmatch="uid 100"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ fi
+
+ fib_check_iproute_support "sport" "sport"
+ if [ $? -eq 0 ]; then
+ match="sport 666 dport 777"
+ fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto tcp"
+ fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto icmp"
+ fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+ fi
+}
+
+run_fibrule_tests()
+{
+ log_section "IPv4 fib rule"
+ fib_rule4_test
+ log_section "IPv6 fib rule"
+ fib_rule6_test
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+# start clean
+cleanup &> /dev/null
+setup
+run_fibrule_tests
+cleanup
+
+exit $ret
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+BINARIES := uevent_filtering
+CFLAGS += -Wl,-no-as-needed -Wall
+
+uevent_filtering: uevent_filtering.c ../kselftest.h ../kselftest_harness.h
+ $(CC) $(CFLAGS) $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
--- /dev/null
+CONFIG_USER_NS=y
+CONFIG_NET=y
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/netlink.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sched.h>
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
+#define __UEVENT_BUFFER_SIZE (2048 * 2)
+#define __UEVENT_HEADER "add@/devices/virtual/mem/full"
+#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full")
+#define __UEVENT_LISTEN_ALL -1
+
+ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+
+again:
+ ret = read(fd, buf, count);
+ if (ret < 0 && errno == EINTR)
+ goto again;
+
+ return ret;
+}
+
+ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+again:
+ ret = write(fd, buf, count);
+ if (ret < 0 && errno == EINTR)
+ goto again;
+
+ return ret;
+}
+
+int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (ret != pid)
+ goto again;
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+ return -1;
+
+ return 0;
+}
+
+static int uevent_listener(unsigned long post_flags, bool expect_uevent,
+ int sync_fd)
+{
+ int sk_fd, ret;
+ socklen_t sk_addr_len;
+ int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE;
+ uint64_t sync_add = 1;
+ struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
+ char buf[__UEVENT_BUFFER_SIZE] = { 0 };
+ struct iovec iov = { buf, __UEVENT_BUFFER_SIZE };
+ char control[CMSG_SPACE(sizeof(struct ucred))];
+ struct msghdr hdr = {
+ &rcv_addr, sizeof(rcv_addr), &iov, 1,
+ control, sizeof(control), 0,
+ };
+
+ sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
+ NETLINK_KOBJECT_UEVENT);
+ if (sk_fd < 0) {
+ fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno));
+ return -1;
+ }
+
+ ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
+ sizeof(rcv_buf_sz));
+ if (ret < 0) {
+ fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno));
+ goto on_error;
+ }
+
+ sk_addr.nl_family = AF_NETLINK;
+ sk_addr.nl_groups = __UEVENT_LISTEN_ALL;
+
+ sk_addr_len = sizeof(sk_addr);
+ ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len);
+ if (ret < 0) {
+ fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno));
+ goto on_error;
+ }
+
+ ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len);
+ if (ret < 0) {
+ fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno));
+ goto on_error;
+ }
+
+ if ((size_t)sk_addr_len != sizeof(sk_addr)) {
+ fprintf(stderr, "Invalid socket address size\n");
+ goto on_error;
+ }
+
+ if (post_flags & CLONE_NEWUSER) {
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare user namespace\n",
+ strerror(errno));
+ goto on_error;
+ }
+ }
+
+ if (post_flags & CLONE_NEWNET) {
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare network namespace\n",
+ strerror(errno));
+ goto on_error;
+ }
+ }
+
+ ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
+ close(sync_fd);
+ if (ret != sizeof(sync_add)) {
+ fprintf(stderr, "Failed to synchronize with parent process\n");
+ goto on_error;
+ }
+
+ fret = 0;
+ for (;;) {
+ ssize_t r;
+
+ r = recvmsg(sk_fd, &hdr, 0);
+ if (r <= 0) {
+ fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno));
+ ret = -1;
+ break;
+ }
+
+ /* ignore libudev messages */
+ if (memcmp(buf, "libudev", 8) == 0)
+ continue;
+
+ /* ignore uevents we didn't trigger */
+ if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0)
+ continue;
+
+ if (!expect_uevent) {
+ fprintf(stderr, "Received unexpected uevent:\n");
+ ret = -1;
+ }
+
+ if (TH_LOG_ENABLED) {
+ /* If logging is enabled dump the received uevent. */
+ (void)write_nointr(STDERR_FILENO, buf, r);
+ (void)write_nointr(STDERR_FILENO, "\n", 1);
+ }
+
+ break;
+ }
+
+on_error:
+ close(sk_fd);
+
+ return fret;
+}
+
+int trigger_uevent(unsigned int times)
+{
+ int fd, ret;
+ unsigned int i;
+
+ fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return -EINVAL;
+
+ return -1;
+ }
+
+ for (i = 0; i < times; i++) {
+ ret = write_nointr(fd, "add\n", sizeof("add\n") - 1);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to trigger uevent\n");
+ break;
+ }
+ }
+ close(fd);
+
+ return ret;
+}
+
+int set_death_signal(void)
+{
+ int ret;
+ pid_t ppid;
+
+ ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+
+ /* Check whether we have been orphaned. */
+ ppid = getppid();
+ if (ppid == 1) {
+ pid_t self;
+
+ self = getpid();
+ ret = kill(self, SIGKILL);
+ }
+
+ if (ret < 0)
+ return -1;
+
+ return 0;
+}
+
+static int do_test(unsigned long pre_flags, unsigned long post_flags,
+ bool expect_uevent, int sync_fd)
+{
+ int ret;
+ uint64_t wait_val;
+ pid_t pid;
+ sigset_t mask;
+ sigset_t orig_mask;
+ struct timespec timeout;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+
+ ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask);
+ if (ret < 0) {
+ fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno));
+ return -1;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno));
+ return -1;
+ }
+
+ if (pid == 0) {
+ /* Make sure that we go away when our parent dies. */
+ ret = set_death_signal();
+ if (ret < 0) {
+ fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (pre_flags & CLONE_NEWUSER) {
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare user namespace\n",
+ strerror(errno));
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (pre_flags & CLONE_NEWNET) {
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare network namespace\n",
+ strerror(errno));
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val));
+ if (ret != sizeof(wait_val)) {
+ fprintf(stderr, "Failed to synchronize with child process\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* Trigger 10 uevents to account for the case where the kernel might
+ * drop some.
+ */
+ ret = trigger_uevent(10);
+ if (ret < 0)
+ fprintf(stderr, "Failed triggering uevents\n");
+
+ /* Wait for 2 seconds before considering this failed. This should be
+ * plenty of time for the kernel to deliver the uevent even under heavy
+ * load.
+ */
+ timeout.tv_sec = 2;
+ timeout.tv_nsec = 0;
+
+again:
+ ret = sigtimedwait(&mask, NULL, &timeout);
+ if (ret < 0) {
+ if (errno == EINTR)
+ goto again;
+
+ if (!expect_uevent)
+ ret = kill(pid, SIGTERM); /* success */
+ else
+ ret = kill(pid, SIGUSR1); /* error */
+ if (ret < 0)
+ return -1;
+ }
+
+ ret = wait_for_pid(pid);
+ if (ret < 0)
+ return -1;
+
+ return ret;
+}
+
+static void signal_handler(int sig)
+{
+ if (sig == SIGTERM)
+ _exit(EXIT_SUCCESS);
+
+ _exit(EXIT_FAILURE);
+}
+
+TEST(uevent_filtering)
+{
+ int ret, sync_fd;
+ struct sigaction act;
+
+ if (geteuid()) {
+ TH_LOG("Uevent filtering tests require root privileges. Skipping test");
+ _exit(KSFT_SKIP);
+ }
+
+ ret = access(__DEV_FULL, F_OK);
+ EXPECT_EQ(0, ret) {
+ if (errno == ENOENT) {
+ TH_LOG(__DEV_FULL " does not exist. Skipping test");
+ _exit(KSFT_SKIP);
+ }
+
+ _exit(KSFT_FAIL);
+ }
+
+ act.sa_handler = signal_handler;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+
+ ret = sigaction(SIGTERM, &act, NULL);
+ ASSERT_EQ(0, ret);
+
+ sync_fd = eventfd(0, EFD_CLOEXEC);
+ ASSERT_GE(sync_fd, 0);
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace owned by
+ * initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, 0, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in non-initial network namespace
+ * owned by initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(CLONE_NEWNET, 0, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - unshare user namespace
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(CLONE_NEWUSER, 0, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in non-initial network namespace
+ * owned by non-initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives no uevent
+ */
+ ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - unshare network namespace
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, CLONE_NEWNET, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - unshare user namespace
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, CLONE_NEWUSER, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - unshare user namespace
+ * - unshare network namespace
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+do_cleanup:
+ close(sync_fd);
+}
+
+TEST_HARNESS_MAIN