HINIC_PORT_CMD_SET_RX_CSUM = 26,
+ HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD = 27,
+
HINIC_PORT_CMD_GET_PORT_STATISTICS = 28,
HINIC_PORT_CMD_CLEAR_PORT_STATISTICS = 29,
#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT 0
#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK 0xFFFU
+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT 21
+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK 0x1U
#define RQ_CQE_OFFOLAD_TYPE_GET(val, member) (((val) >> \
RQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \
#define HINIC_GET_RX_PKT_TYPE(offload_type) \
RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE)
+#define HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) \
+ RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN)
+
+#define RQ_CQE_SGE_VLAN_MASK 0xFFFFU
+#define RQ_CQE_SGE_VLAN_SHIFT 0
+
+#define RQ_CQE_SGE_GET(val, member) (((val) >> \
+ RQ_CQE_SGE_##member##_SHIFT) & \
+ RQ_CQE_SGE_##member##_MASK)
+
+#define HINIC_GET_RX_VLAN_TAG(vlan_len) \
+ RQ_CQE_SGE_GET(vlan_len, VLAN)
+
#define HINIC_RSS_TYPE_VALID_SHIFT 23
#define HINIC_RSS_TYPE_TCP_IPV6_EXT_SHIFT 24
#define HINIC_RSS_TYPE_IPV6_EXT_SHIFT 25
.ndo_get_stats64 = hinic_get_stats64,
.ndo_fix_features = hinic_fix_features,
.ndo_set_features = hinic_set_features,
-
};
static void netdev_features_init(struct net_device *netdev)
{
netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 |
- NETIF_F_RXCSUM | NETIF_F_LRO;
+ NETIF_F_RXCSUM | NETIF_F_LRO |
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
netdev->vlan_features = netdev->hw_features;
HINIC_LRO_MAX_WQE_NUM_DEFAULT);
}
+ if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+ err = hinic_set_rx_vlan_offload(nic_dev,
+ !!(features &
+ NETIF_F_HW_VLAN_CTAG_RX));
+
return err;
}
return 0;
}
+int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en)
+{
+ struct hinic_hwdev *hwdev = nic_dev->hwdev;
+ struct hinic_vlan_cfg vlan_cfg;
+ struct hinic_hwif *hwif;
+ struct pci_dev *pdev;
+ u16 out_size;
+ int err;
+
+ if (!hwdev)
+ return -EINVAL;
+
+ hwif = hwdev->hwif;
+ pdev = hwif->pdev;
+ vlan_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+ vlan_cfg.vlan_rx_offload = en;
+
+ err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD,
+ &vlan_cfg, sizeof(vlan_cfg),
+ &vlan_cfg, &out_size);
+ if (err || !out_size || vlan_cfg.status) {
+ dev_err(&pdev->dev,
+ "Failed to set rx vlan offload, err: %d, status: 0x%x, out size: 0x%x\n",
+ err, vlan_cfg.status, out_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs)
{
struct hinic_hwdev *hwdev = nic_dev->hwdev;
u32 timer;
};
+struct hinic_vlan_cfg {
+ u8 status;
+ u8 version;
+ u8 rsvd0[6];
+
+ u16 func_id;
+ u8 vlan_rx_offload;
+ u8 rsvd1[5];
+};
+
struct hinic_rss_template_mgmt {
u8 status;
u8 version;
int hinic_get_vport_stats(struct hinic_dev *nic_dev,
struct hinic_vport_stats *stats);
+
+int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en);
+
#endif
#include <linux/dma-mapping.h>
#include <linux/prefetch.h>
#include <linux/cpumask.h>
+#include <linux/if_vlan.h>
#include <asm/barrier.h>
#include "hinic_common.h"
static int rxq_recv(struct hinic_rxq *rxq, int budget)
{
struct hinic_qp *qp = container_of(rxq->rq, struct hinic_qp, rq);
+ struct net_device *netdev = rxq->netdev;
u64 pkt_len = 0, rx_bytes = 0;
struct hinic_rq *rq = rxq->rq;
struct hinic_rq_wqe *rq_wqe;
struct hinic_sge sge;
unsigned int status;
struct sk_buff *skb;
+ u32 offload_type;
u16 ci, num_lro;
u16 num_wqe = 0;
+ u32 vlan_len;
+ u16 vid;
while (pkts < budget) {
num_wqes = 0;
hinic_rq_put_wqe(rq, ci,
(num_wqes + 1) * HINIC_RQ_WQE_SIZE);
+ offload_type = be32_to_cpu(cqe->offload_type);
+ vlan_len = be32_to_cpu(cqe->len);
+ if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+ HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)) {
+ vid = HINIC_GET_RX_VLAN_TAG(vlan_len);
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+ }
+
skb_record_rx_queue(skb, qp->q_id);
skb->protocol = eth_type_trans(skb, rxq->netdev);
return 1;
}
+static void offload_vlan(struct hinic_sq_task *task, u32 *queue_info,
+ u16 vlan_tag, u16 vlan_pri)
+{
+ task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
+ HINIC_SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
+
+ *queue_info |= HINIC_SQ_CTRL_SET(vlan_pri, QUEUE_INFO_PRI);
+}
+
static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
u32 *queue_info)
{
enum hinic_offload_type offload = 0;
+ u16 vlan_tag;
int enabled;
enabled = offload_tso(task, queue_info, skb);
return -EPROTONOSUPPORT;
}
+ if (unlikely(skb_vlan_tag_present(skb))) {
+ vlan_tag = skb_vlan_tag_get(skb);
+ offload_vlan(task, queue_info, vlan_tag,
+ vlan_tag >> VLAN_PRIO_SHIFT);
+ offload |= TX_OFFLOAD_VLAN;
+ }
+
if (offload)
hinic_task_set_l2hdr(task, skb_network_offset(skb));
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
- wmb();
+ dma_wmb();
tx_ring->next_to_use = i;
}
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
- wmb();
+ dma_wmb();
writel(i, adapter->hw.hw_addr + rx_ring->rdt);
}
}
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
- wmb();
+ dma_wmb();
writel(i, hw->hw_addr + rx_ring->rdt);
}
}
ew32(TCTL, E1000_TCTL_PSP);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
ctrl = er32(CTRL);
ew32(TCTL, tctl);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
/* Must acquire the MDIO ownership before MAC reset.
* Ownership defaults to firmware after a reset.
#define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */
#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master Req status */
+/* PCIm function state */
+#define E1000_STATUS_PCIM_STATE 0x40000000
+
#define HALF_DUPLEX 1
#define FULL_DUPLEX 2
/* board specific private data structure */
struct e1000_adapter {
- struct timer_list watchdog_timer;
struct timer_list phy_info_timer;
struct timer_list blink_timer;
struct work_struct reset_task;
- struct work_struct watchdog_task;
+ struct delayed_work watchdog_task;
+
+ struct workqueue_struct *e1000_workqueue;
const struct e1000_info *ei;
/* Disable all the interrupts */
ew32(IMC, 0xFFFFFFFF);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
/* Test each interrupt */
for (i = 0; i < 10; i++) {
ew32(IMC, mask);
ew32(ICS, mask);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
if (adapter->test_icr & mask) {
*data = 3;
ew32(IMS, mask);
ew32(ICS, mask);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
if (!(adapter->test_icr & mask)) {
*data = 4;
ew32(IMC, ~mask & 0x00007FFF);
ew32(ICS, ~mask & 0x00007FFF);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
if (adapter->test_icr) {
*data = 5;
/* Disable all the interrupts */
ew32(IMC, 0xFFFFFFFF);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
/* Unhook test interrupt handler */
free_irq(irq, netdev);
*/
ew32(SCTL, E1000_SCTL_ENABLE_SERDES_LOOPBACK);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
return 0;
}
hw->phy.media_type == e1000_media_type_internal_serdes) {
ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
break;
}
/* Fall Through */
u16 count = 20;
do {
- usleep_range(5000, 10000);
+ usleep_range(5000, 6000);
} while (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--);
msleep(30);
/* Ungate automatic PHY configuration on non-managed 82579 */
if ((hw->mac.type == e1000_pch2lan) &&
!(fwsm & E1000_ICH_FWSM_FW_VALID)) {
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
e1000_gate_hw_phy_config_ich8lan(hw, false);
}
phy->id = 0;
while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) &&
(i++ < 100)) {
- usleep_range(1000, 2000);
+ usleep_range(1000, 1100);
ret_val = e1000e_get_phy_id(hw);
if (ret_val)
return ret_val;
goto out;
}
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
}
e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10);
while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
(i++ < 30))
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
return blocked ? E1000_BLK_PHY_RESET : 0;
}
return 0;
/* Allow time for h/w to get to quiescent state after reset */
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
/* Perform any necessary post-reset workarounds */
switch (hw->mac.type) {
if (hw->mac.type == e1000_pch2lan) {
/* Ungate automatic PHY configuration on non-managed 82579 */
if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
e1000_gate_hw_phy_config_ich8lan(hw, false);
}
*/
if (!ret_val) {
nvm->ops.reload(hw);
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
}
out:
*/
if (!ret_val) {
nvm->ops.reload(hw);
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
}
out:
ew32(TCTL, E1000_TCTL_PSP);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
/* Workaround for ICH8 bit corruption issue in FIFO memory */
if (hw->mac.type == e1000_ich8lan) {
* milliseconds even if the other end is doing it in SW).
*/
for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
status = er32(STATUS);
if (status & E1000_STATUS_LU)
break;
}
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
- mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ queue_delayed_work(adapter->e1000_workqueue,
+ &adapter->watchdog_task, 1);
}
/* Reset on uncorrectable ECC error */
}
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
- mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ queue_delayed_work(adapter->e1000_workqueue,
+ &adapter->watchdog_task, 1);
}
/* Reset on uncorrectable ECC error */
hw->mac.get_link_status = true;
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
- mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ queue_delayed_work(adapter->e1000_workqueue,
+ &adapter->watchdog_task, 1);
}
if (!test_bit(__E1000_DOWN, &adapter->state))
if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
ew32(RCTL, rctl & ~E1000_RCTL_EN);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
if (adapter->flags2 & FLAG2_DMA_BURST) {
/* set the writeback threshold (only takes effect if the RDTR
case e1000_pch_lpt:
case e1000_pch_spt:
case e1000_pch_cnp:
- fc->refresh_time = 0x0400;
+ fc->refresh_time = 0xFFFF;
+ fc->pause_time = 0xFFFF;
if (adapter->netdev->mtu <= ETH_DATA_LEN) {
fc->high_water = 0x05C20;
fc->low_water = 0x05048;
- fc->pause_time = 0x0650;
break;
}
/* flush both disables and wait for them to finish */
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
e1000_irq_disable(adapter);
napi_synchronize(&adapter->napi);
- del_timer_sync(&adapter->watchdog_timer);
del_timer_sync(&adapter->phy_info_timer);
spin_lock(&adapter->stats64_lock);
{
might_sleep();
while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
- usleep_range(1000, 2000);
+ usleep_range(1000, 1100);
e1000e_down(adapter, true);
e1000e_up(adapter);
clear_bit(__E1000_RESETTING, &adapter->state);
int count = E1000_CHECK_RESET_COUNT;
while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
}
}
-/**
- * e1000_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
- **/
-static void e1000_watchdog(struct timer_list *t)
-{
- struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
-
- /* Do the rest outside of interrupt context */
- schedule_work(&adapter->watchdog_task);
-
- /* TODO: make this use queue_delayed_work() */
-}
-
static void e1000_watchdog_task(struct work_struct *work)
{
struct e1000_adapter *adapter = container_of(work,
struct e1000_adapter,
- watchdog_task);
+ watchdog_task.work);
struct net_device *netdev = adapter->netdev;
struct e1000_mac_info *mac = &adapter->hw.mac;
struct e1000_phy_info *phy = &adapter->hw.phy;
struct e1000_ring *tx_ring = adapter->tx_ring;
+ u32 dmoff_exit_timeout = 100, tries = 0;
struct e1000_hw *hw = &adapter->hw;
- u32 link, tctl;
+ u32 link, tctl, pcim_state;
if (test_bit(__E1000_DOWN, &adapter->state))
return;
/* Cancel scheduled suspend requests. */
pm_runtime_resume(netdev->dev.parent);
+ /* Checking if MAC is in DMoff state*/
+ pcim_state = er32(STATUS);
+ while (pcim_state & E1000_STATUS_PCIM_STATE) {
+ if (tries++ == dmoff_exit_timeout) {
+ e_dbg("Error in exiting dmoff\n");
+ break;
+ }
+ usleep_range(10000, 20000);
+ pcim_state = er32(STATUS);
+
+ /* Checking if MAC exited DMoff state */
+ if (!(pcim_state & E1000_STATUS_PCIM_STATE))
+ e1000_phy_hw_reset(&adapter->hw);
+ }
+
/* update snapshot of PHY registers on LSC */
e1000_phy_read_status(adapter);
mac->ops.get_link_up_info(&adapter->hw,
/* Reset the timer */
if (!test_bit(__E1000_DOWN, &adapter->state))
- mod_timer(&adapter->watchdog_timer,
- round_jiffies(jiffies + 2 * HZ));
+ queue_delayed_work(adapter->e1000_workqueue,
+ &adapter->watchdog_task,
+ round_jiffies(2 * HZ));
}
#define E1000_TX_FLAGS_CSUM 0x00000001
}
while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
- usleep_range(1000, 2000);
+ usleep_range(1000, 1100);
/* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */
adapter->max_frame_size = max_frame;
e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu);
int count = E1000_CHECK_RESET_COUNT;
while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
int count = E1000_CHECK_RESET_COUNT;
while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
goto err_eeprom;
}
- timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
+ adapter->e1000_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
+ e1000e_driver_name);
+
+ if (!adapter->e1000_workqueue) {
+ err = -ENOMEM;
+ goto err_workqueue;
+ }
+
+ INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog_task);
+ queue_delayed_work(adapter->e1000_workqueue, &adapter->watchdog_task,
+ 0);
+
timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0);
INIT_WORK(&adapter->reset_task, e1000_reset_task);
- INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang);
return 0;
err_register:
+ flush_workqueue(adapter->e1000_workqueue);
+ destroy_workqueue(adapter->e1000_workqueue);
+err_workqueue:
if (!(adapter->flags & FLAG_HAS_AMT))
e1000e_release_hw_control(adapter);
err_eeprom:
*/
if (!down)
set_bit(__E1000_DOWN, &adapter->state);
- del_timer_sync(&adapter->watchdog_timer);
del_timer_sync(&adapter->phy_info_timer);
cancel_work_sync(&adapter->reset_task);
- cancel_work_sync(&adapter->watchdog_task);
cancel_work_sync(&adapter->downshift_task);
cancel_work_sync(&adapter->update_phy_task);
cancel_work_sync(&adapter->print_hang_task);
+ cancel_delayed_work(&adapter->watchdog_task);
+ flush_workqueue(adapter->e1000_workqueue);
+ destroy_workqueue(adapter->e1000_workqueue);
+
if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) {
cancel_work_sync(&adapter->tx_hwtstamp_work);
if (adapter->tx_hwtstamp_skb) {
break;
}
}
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
nvm->ops.release(hw);
}
#include <net/ip6_checksum.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
+#include <linux/if_macvlan.h>
#include <linux/if_bridge.h>
#include <linux/clocksource.h>
#include <linux/net_tstamp.h>
u8 pit_index;
};
+struct i40e_fwd_adapter {
+ struct net_device *netdev;
+ int bit_no;
+};
+
struct i40e_channel {
struct list_head list;
bool initialized;
struct i40e_aqc_vsi_properties_data info;
u64 max_tx_rate;
+ struct i40e_fwd_adapter *fwd;
/* track this channel belongs to which VSI */
struct i40e_vsi *parent_vsi;
};
+static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch)
+{
+ return !!ch->fwd;
+}
+
+static inline u8 *i40e_channel_mac(struct i40e_channel *ch)
+{
+ if (i40e_is_channel_macvlan(ch))
+ return ch->fwd->netdev->dev_addr;
+ else
+ return NULL;
+}
+
/* struct that defines the Ethernet device */
struct i40e_pf {
struct pci_dev *pdev;
struct list_head ch_list;
u16 tc_seid_map[I40E_MAX_TRAFFIC_CLASS];
+ /* macvlan fields */
+#define I40E_MAX_MACVLANS 128 /* Max HW vectors - 1 on FVL */
+#define I40E_MIN_MACVLAN_VECTORS 2 /* Min vectors to enable macvlans */
+ DECLARE_BITMAP(fwd_bitmask, I40E_MAX_MACVLANS);
+ struct list_head macvlan_list;
+ int macvlan_cnt;
+
void *priv; /* client driver data reference. */
/* VSI specific handlers */
return -ENOENT;
}
- /* Success, update channel */
- ch->enabled_tc = enabled_tc;
+ /* Success, update channel, set enabled_tc only if the channel
+ * is not a macvlan
+ */
+ ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc;
ch->seid = ctxt.seid;
ch->vsi_number = ctxt.vsi_number;
ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx);
}
}
+/**
+ * i40e_del_macvlan_filter
+ * @hw: pointer to the HW structure
+ * @seid: seid of the channel VSI
+ * @macaddr: the mac address to apply as a filter
+ * @aq_err: store the admin Q error
+ *
+ * This function deletes a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+static i40e_status i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid,
+ const u8 *macaddr, int *aq_err)
+{
+ struct i40e_aqc_remove_macvlan_element_data element;
+ i40e_status status;
+
+ memset(&element, 0, sizeof(element));
+ ether_addr_copy(element.mac_addr, macaddr);
+ element.vlan_tag = 0;
+ element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
+ status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL);
+ *aq_err = hw->aq.asq_last_status;
+
+ return status;
+}
+
+/**
+ * i40e_add_macvlan_filter
+ * @hw: pointer to the HW structure
+ * @seid: seid of the channel VSI
+ * @macaddr: the mac address to apply as a filter
+ * @aq_err: store the admin Q error
+ *
+ * This function adds a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+static i40e_status i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid,
+ const u8 *macaddr, int *aq_err)
+{
+ struct i40e_aqc_add_macvlan_element_data element;
+ i40e_status status;
+ u16 cmd_flags = 0;
+
+ ether_addr_copy(element.mac_addr, macaddr);
+ element.vlan_tag = 0;
+ element.queue_number = 0;
+ element.match_method = I40E_AQC_MM_ERR_NO_RES;
+ cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
+ element.flags = cpu_to_le16(cmd_flags);
+ status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL);
+ *aq_err = hw->aq.asq_last_status;
+
+ return status;
+}
+
+/**
+ * i40e_reset_ch_rings - Reset the queue contexts in a channel
+ * @vsi: the VSI we want to access
+ * @ch: the channel we want to access
+ */
+static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch)
+{
+ struct i40e_ring *tx_ring, *rx_ring;
+ u16 pf_q;
+ int i;
+
+ for (i = 0; i < ch->num_queue_pairs; i++) {
+ pf_q = ch->base_queue + i;
+ tx_ring = vsi->tx_rings[pf_q];
+ tx_ring->ch = NULL;
+ rx_ring = vsi->rx_rings[pf_q];
+ rx_ring->ch = NULL;
+ }
+}
+
+/**
+ * i40e_free_macvlan_channels
+ * @vsi: the VSI we want to access
+ *
+ * This function frees the Qs of the channel VSI from
+ * the stack and also deletes the channel VSIs which
+ * serve as macvlans.
+ */
+static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
+{
+ struct i40e_channel *ch, *ch_tmp;
+ int ret;
+
+ if (list_empty(&vsi->macvlan_list))
+ return;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+ struct i40e_vsi *parent_vsi;
+
+ if (i40e_is_channel_macvlan(ch)) {
+ i40e_reset_ch_rings(vsi, ch);
+ clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+ netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev);
+ netdev_set_sb_channel(ch->fwd->netdev, 0);
+ kfree(ch->fwd);
+ ch->fwd = NULL;
+ }
+
+ list_del(&ch->list);
+ parent_vsi = ch->parent_vsi;
+ if (!parent_vsi || !ch->initialized) {
+ kfree(ch);
+ continue;
+ }
+
+ /* remove the VSI */
+ ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+ NULL);
+ if (ret)
+ dev_err(&vsi->back->pdev->dev,
+ "unable to remove channel (%d) for parent VSI(%d)\n",
+ ch->seid, parent_vsi->seid);
+ kfree(ch);
+ }
+ vsi->macvlan_cnt = 0;
+}
+
+/**
+ * i40e_fwd_ring_up - bring the macvlan device up
+ * @vsi: the VSI we want to access
+ * @vdev: macvlan netdevice
+ * @fwd: the private fwd structure
+ */
+static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
+ struct i40e_fwd_adapter *fwd)
+{
+ int ret = 0, num_tc = 1, i, aq_err;
+ struct i40e_channel *ch, *ch_tmp;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+
+ if (list_empty(&vsi->macvlan_list))
+ return -EINVAL;
+
+ /* Go through the list and find an available channel */
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+ if (!i40e_is_channel_macvlan(ch)) {
+ ch->fwd = fwd;
+ /* record configuration for macvlan interface in vdev */
+ for (i = 0; i < num_tc; i++)
+ netdev_bind_sb_channel_queue(vsi->netdev, vdev,
+ i,
+ ch->num_queue_pairs,
+ ch->base_queue);
+ for (i = 0; i < ch->num_queue_pairs; i++) {
+ struct i40e_ring *tx_ring, *rx_ring;
+ u16 pf_q;
+
+ pf_q = ch->base_queue + i;
+
+ /* Get to TX ring ptr */
+ tx_ring = vsi->tx_rings[pf_q];
+ tx_ring->ch = ch;
+
+ /* Get the RX ring ptr */
+ rx_ring = vsi->rx_rings[pf_q];
+ rx_ring->ch = ch;
+ }
+ break;
+ }
+ }
+
+ /* Guarantee all rings are updated before we update the
+ * MAC address filter.
+ */
+ wmb();
+
+ /* Add a mac filter */
+ ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err);
+ if (ret) {
+ /* if we cannot add the MAC rule then disable the offload */
+ macvlan_release_l2fw_offload(vdev);
+ for (i = 0; i < ch->num_queue_pairs; i++) {
+ struct i40e_ring *rx_ring;
+ u16 pf_q;
+
+ pf_q = ch->base_queue + i;
+ rx_ring = vsi->rx_rings[pf_q];
+ rx_ring->netdev = NULL;
+ }
+ dev_info(&pf->pdev->dev,
+ "Error adding mac filter on macvlan err %s, aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, aq_err));
+ netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n");
+ }
+
+ return ret;
+}
+
+/**
+ * i40e_setup_macvlans - create the channels which will be macvlans
+ * @vsi: the VSI we want to access
+ * @macvlan_cnt: no. of macvlans to be setup
+ * @qcnt: no. of Qs per macvlan
+ * @vdev: macvlan netdevice
+ */
+static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt,
+ struct net_device *vdev)
+{
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ struct i40e_vsi_context ctxt;
+ u16 sections, qmap, num_qps;
+ struct i40e_channel *ch;
+ int i, pow, ret = 0;
+ u8 offset = 0;
+
+ if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt)
+ return -EINVAL;
+
+ num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt);
+
+ /* find the next higher power-of-2 of num queue pairs */
+ pow = fls(roundup_pow_of_two(num_qps) - 1);
+
+ qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+ (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+ /* Setup context bits for the main VSI */
+ sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+ sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+ memset(&ctxt, 0, sizeof(ctxt));
+ ctxt.seid = vsi->seid;
+ ctxt.pf_num = vsi->back->hw.pf_id;
+ ctxt.vf_num = 0;
+ ctxt.uplink_seid = vsi->uplink_seid;
+ ctxt.info = vsi->info;
+ ctxt.info.tc_mapping[0] = cpu_to_le16(qmap);
+ ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+ ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+ ctxt.info.valid_sections |= cpu_to_le16(sections);
+
+ /* Reconfigure RSS for main VSI with new max queue count */
+ vsi->rss_size = max_t(u16, num_qps, qcnt);
+ ret = i40e_vsi_config_rss(vsi);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Failed to reconfig RSS for num_queues (%u)\n",
+ vsi->rss_size);
+ return ret;
+ }
+ vsi->reconfig_rss = true;
+ dev_dbg(&vsi->back->pdev->dev,
+ "Reconfigured RSS with num_queues (%u)\n", vsi->rss_size);
+ vsi->next_base_queue = num_qps;
+ vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps;
+
+ /* Update the VSI after updating the VSI queue-mapping
+ * information
+ */
+ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Update vsi tc config failed, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return ret;
+ }
+ /* update the local VSI info with updated queue map */
+ i40e_vsi_update_queue_map(vsi, &ctxt);
+ vsi->info.valid_sections = 0;
+
+ /* Create channels for macvlans */
+ INIT_LIST_HEAD(&vsi->macvlan_list);
+ for (i = 0; i < macvlan_cnt; i++) {
+ ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+ if (!ch) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+ INIT_LIST_HEAD(&ch->list);
+ ch->num_queue_pairs = qcnt;
+ if (!i40e_setup_channel(pf, vsi, ch)) {
+ ret = -EINVAL;
+ goto err_free;
+ }
+ ch->parent_vsi = vsi;
+ vsi->cnt_q_avail -= ch->num_queue_pairs;
+ vsi->macvlan_cnt++;
+ list_add_tail(&ch->list, &vsi->macvlan_list);
+ }
+
+ return ret;
+
+err_free:
+ dev_info(&pf->pdev->dev, "Failed to setup macvlans\n");
+ i40e_free_macvlan_channels(vsi);
+
+ return ret;
+}
+
+/**
+ * i40e_fwd_add - configure macvlans
+ * @netdev: net device to configure
+ * @vdev: macvlan netdevice
+ **/
+static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
+{
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors;
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_fwd_adapter *fwd;
+ int avail_macvlan, ret;
+
+ if ((pf->flags & I40E_FLAG_DCB_ENABLED)) {
+ netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
+ return ERR_PTR(-EINVAL);
+ }
+ if ((pf->flags & I40E_FLAG_TC_MQPRIO)) {
+ netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n");
+ return ERR_PTR(-EINVAL);
+ }
+ if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) {
+ netdev_info(netdev, "Not enough vectors available to support macvlans\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* The macvlan device has to be a single Q device so that the
+ * tc_to_txq field can be reused to pick the tx queue.
+ */
+ if (netif_is_multiqueue(vdev))
+ return ERR_PTR(-ERANGE);
+
+ if (!vsi->macvlan_cnt) {
+ /* reserve bit 0 for the pf device */
+ set_bit(0, vsi->fwd_bitmask);
+
+ /* Try to reserve as many queues as possible for macvlans. First
+ * reserve 3/4th of max vectors, then half, then quarter and
+ * calculate Qs per macvlan as you go
+ */
+ vectors = pf->num_lan_msix;
+ if (vectors <= I40E_MAX_MACVLANS && vectors > 64) {
+ /* allocate 4 Qs per macvlan and 32 Qs to the PF*/
+ q_per_macvlan = 4;
+ macvlan_cnt = (vectors - 32) / 4;
+ } else if (vectors <= 64 && vectors > 32) {
+ /* allocate 2 Qs per macvlan and 16 Qs to the PF*/
+ q_per_macvlan = 2;
+ macvlan_cnt = (vectors - 16) / 2;
+ } else if (vectors <= 32 && vectors > 16) {
+ /* allocate 1 Q per macvlan and 16 Qs to the PF*/
+ q_per_macvlan = 1;
+ macvlan_cnt = vectors - 16;
+ } else if (vectors <= 16 && vectors > 8) {
+ /* allocate 1 Q per macvlan and 8 Qs to the PF */
+ q_per_macvlan = 1;
+ macvlan_cnt = vectors - 8;
+ } else {
+ /* allocate 1 Q per macvlan and 1 Q to the PF */
+ q_per_macvlan = 1;
+ macvlan_cnt = vectors - 1;
+ }
+
+ if (macvlan_cnt == 0)
+ return ERR_PTR(-EBUSY);
+
+ /* Quiesce VSI queues */
+ i40e_quiesce_vsi(vsi);
+
+ /* sets up the macvlans but does not "enable" them */
+ ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan,
+ vdev);
+ if (ret)
+ return ERR_PTR(ret);
+
+ /* Unquiesce VSI */
+ i40e_unquiesce_vsi(vsi);
+ }
+ avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask,
+ vsi->macvlan_cnt);
+ if (avail_macvlan >= I40E_MAX_MACVLANS)
+ return ERR_PTR(-EBUSY);
+
+ /* create the fwd struct */
+ fwd = kzalloc(sizeof(*fwd), GFP_KERNEL);
+ if (!fwd)
+ return ERR_PTR(-ENOMEM);
+
+ set_bit(avail_macvlan, vsi->fwd_bitmask);
+ fwd->bit_no = avail_macvlan;
+ netdev_set_sb_channel(vdev, avail_macvlan);
+ fwd->netdev = vdev;
+
+ if (!netif_running(netdev))
+ return fwd;
+
+ /* Set fwd ring up */
+ ret = i40e_fwd_ring_up(vsi, vdev, fwd);
+ if (ret) {
+ /* unbind the queues and drop the subordinate channel config */
+ netdev_unbind_sb_channel(netdev, vdev);
+ netdev_set_sb_channel(vdev, 0);
+
+ kfree(fwd);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return fwd;
+}
+
+/**
+ * i40e_del_all_macvlans - Delete all the mac filters on the channels
+ * @vsi: the VSI we want to access
+ */
+static void i40e_del_all_macvlans(struct i40e_vsi *vsi)
+{
+ struct i40e_channel *ch, *ch_tmp;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ int aq_err, ret = 0;
+
+ if (list_empty(&vsi->macvlan_list))
+ return;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+ if (i40e_is_channel_macvlan(ch)) {
+ ret = i40e_del_macvlan_filter(hw, ch->seid,
+ i40e_channel_mac(ch),
+ &aq_err);
+ if (!ret) {
+ /* Reset queue contexts */
+ i40e_reset_ch_rings(vsi, ch);
+ clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+ netdev_unbind_sb_channel(vsi->netdev,
+ ch->fwd->netdev);
+ netdev_set_sb_channel(ch->fwd->netdev, 0);
+ kfree(ch->fwd);
+ ch->fwd = NULL;
+ }
+ }
+ }
+}
+
+/**
+ * i40e_fwd_del - delete macvlan interfaces
+ * @netdev: net device to configure
+ * @vdev: macvlan netdevice
+ */
+static void i40e_fwd_del(struct net_device *netdev, void *vdev)
+{
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_fwd_adapter *fwd = vdev;
+ struct i40e_channel *ch, *ch_tmp;
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ int aq_err, ret = 0;
+
+ /* Find the channel associated with the macvlan and del mac filter */
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+ if (i40e_is_channel_macvlan(ch) &&
+ ether_addr_equal(i40e_channel_mac(ch),
+ fwd->netdev->dev_addr)) {
+ ret = i40e_del_macvlan_filter(hw, ch->seid,
+ i40e_channel_mac(ch),
+ &aq_err);
+ if (!ret) {
+ /* Reset queue contexts */
+ i40e_reset_ch_rings(vsi, ch);
+ clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+ netdev_unbind_sb_channel(netdev, fwd->netdev);
+ netdev_set_sb_channel(fwd->netdev, 0);
+ kfree(ch->fwd);
+ ch->fwd = NULL;
+ } else {
+ dev_info(&pf->pdev->dev,
+ "Error deleting mac filter on macvlan err %s, aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, aq_err));
+ }
+ break;
+ }
+ }
+}
+
/**
* i40e_setup_tc - configure multiple traffic classes
* @netdev: net device to configure
return -EINVAL;
}
+ if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt)
+ i40e_del_all_macvlans(vsi);
+
need_reset = i40e_set_ntuple(pf, features);
if (need_reset)
.ndo_bpf = i40e_xdp,
.ndo_xdp_xmit = i40e_xdp_xmit,
.ndo_xsk_async_xmit = i40e_xsk_async_xmit,
+ .ndo_dfwd_add_station = i40e_fwd_add,
+ .ndo_dfwd_del_station = i40e_fwd_del,
};
/**
/* record features VLANs can make use of */
netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
+ /* enable macvlan offloads */
+ netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
+
hw_features = hw_enc_features |
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX;
#define iavf_allocate_virt_mem(h, m, s) iavf_allocate_virt_mem_d(h, m, s)
#define iavf_free_virt_mem(h, m) iavf_free_virt_mem_d(h, m)
-#define iavf_debug(h, m, s, ...) iavf_debug_d(h, m, s, ##__VA_ARGS__)
-extern void iavf_debug_d(void *hw, u32 mask, char *fmt_str, ...)
- __printf(3, 4);
+#define iavf_debug(h, m, s, ...) \
+do { \
+ if (((m) & (h)->debug_mask)) \
+ pr_info("iavf %02x:%02x.%x " s, \
+ (h)->bus.bus_id, (h)->bus.device, \
+ (h)->bus.func, ##__VA_ARGS__); \
+} while (0)
#endif /* _IAVF_OSDEP_H_ */
struct iavf_rx_buffer *rx_buffer,
unsigned int size)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+ void *va;
#if (PAGE_SIZE < 8192)
unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
#else
if (!rx_buffer)
return NULL;
/* prefetch first cache line of first page */
+ va = page_address(rx_buffer->page) + rx_buffer->page_offset;
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
struct iavf_rx_buffer *rx_buffer,
unsigned int size)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+ void *va;
#if (PAGE_SIZE < 8192)
unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
#else
if (!rx_buffer)
return NULL;
/* prefetch first cache line of first page */
+ va = page_address(rx_buffer->page) + rx_buffer->page_offset;
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
struct virtchnl_vsi_queue_config_info *vqci;
struct virtchnl_queue_pair_info *vqpi;
int pairs = adapter->num_active_queues;
- int i, len, max_frame = IAVF_MAX_RXBUFFER;
+ int i, max_frame = IAVF_MAX_RXBUFFER;
+ size_t len;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
/* bail because we already have a command pending */
return;
}
adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES;
- len = sizeof(struct virtchnl_vsi_queue_config_info) +
- (sizeof(struct virtchnl_queue_pair_info) * pairs);
+ len = struct_size(vqci, qpair, pairs);
vqci = kzalloc(len, GFP_KERNEL);
if (!vqci)
return;
{
struct virtchnl_irq_map_info *vimi;
struct virtchnl_vector_map *vecmap;
- int v_idx, q_vectors, len;
struct iavf_q_vector *q_vector;
+ int v_idx, q_vectors;
+ size_t len;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
/* bail because we already have a command pending */
q_vectors = adapter->num_msix_vectors - NONQ_VECS;
- len = sizeof(struct virtchnl_irq_map_info) +
- (adapter->num_msix_vectors *
- sizeof(struct virtchnl_vector_map));
+ len = struct_size(vimi, vecmap, adapter->num_msix_vectors);
vimi = kzalloc(len, GFP_KERNEL);
if (!vimi)
return;
void iavf_add_ether_addrs(struct iavf_adapter *adapter)
{
struct virtchnl_ether_addr_list *veal;
- int len, i = 0, count = 0;
struct iavf_mac_filter *f;
+ int i = 0, count = 0;
bool more = false;
+ size_t len;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
/* bail because we already have a command pending */
}
adapter->current_op = VIRTCHNL_OP_ADD_ETH_ADDR;
- len = sizeof(struct virtchnl_ether_addr_list) +
- (count * sizeof(struct virtchnl_ether_addr));
+ len = struct_size(veal, list, count);
if (len > IAVF_MAX_AQ_BUF_SIZE) {
dev_warn(&adapter->pdev->dev, "Too many add MAC changes in one request\n");
count = (IAVF_MAX_AQ_BUF_SIZE -
sizeof(struct virtchnl_ether_addr_list)) /
sizeof(struct virtchnl_ether_addr);
- len = sizeof(struct virtchnl_ether_addr_list) +
- (count * sizeof(struct virtchnl_ether_addr));
+ len = struct_size(veal, list, count);
more = true;
}
{
struct virtchnl_ether_addr_list *veal;
struct iavf_mac_filter *f, *ftmp;
- int len, i = 0, count = 0;
+ int i = 0, count = 0;
bool more = false;
+ size_t len;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
/* bail because we already have a command pending */
}
adapter->current_op = VIRTCHNL_OP_DEL_ETH_ADDR;
- len = sizeof(struct virtchnl_ether_addr_list) +
- (count * sizeof(struct virtchnl_ether_addr));
+ len = struct_size(veal, list, count);
if (len > IAVF_MAX_AQ_BUF_SIZE) {
dev_warn(&adapter->pdev->dev, "Too many delete MAC changes in one request\n");
count = (IAVF_MAX_AQ_BUF_SIZE -
sizeof(struct virtchnl_ether_addr_list)) /
sizeof(struct virtchnl_ether_addr);
- len = sizeof(struct virtchnl_ether_addr_list) +
- (count * sizeof(struct virtchnl_ether_addr));
+ len = struct_size(veal, list, count);
more = true;
}
veal = kzalloc(len, GFP_ATOMIC);
void iavf_enable_channels(struct iavf_adapter *adapter)
{
struct virtchnl_tc_info *vti = NULL;
- u16 len;
+ size_t len;
int i;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
return;
}
- len = ((adapter->num_tc - 1) * sizeof(struct virtchnl_channel_info)) +
- sizeof(struct virtchnl_tc_info);
-
+ len = struct_size(vti, list, adapter->num_tc - 1);
vti = kzalloc(len, GFP_KERNEL);
if (!vti)
return;
u16 i, num_groups_added = 0;
enum ice_status status = 0;
struct ice_hw *hw = pi->hw;
- u16 buf_size;
+ size_t buf_size;
u32 teid;
- buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
+ buf_size = struct_size(buf, generic, num_nodes - 1);
buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
if (!buf)
return ICE_ERR_NO_MEMORY;
#define E1000_I210_TQAVCC(_n) (0x3004 + ((_n) * 0x40))
#define E1000_I210_TQAVHC(_n) (0x300C + ((_n) * 0x40))
+#define E1000_I210_RR2DCDELAY 0x5BF4
+
#define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n))
#define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */
static int igb_get_regs_len(struct net_device *netdev)
{
-#define IGB_REGS_LEN 739
+#define IGB_REGS_LEN 740
return IGB_REGS_LEN * sizeof(u32);
}
regs_buff[554] = adapter->stats.b2ogprc;
}
- if (hw->mac.type != e1000_82576)
- return;
- for (i = 0; i < 12; i++)
- regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4));
- for (i = 0; i < 4; i++)
- regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[607 + i] = rd32(E1000_RDH(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[619 + i] = rd32(E1000_RDT(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4));
-
- for (i = 0; i < 12; i++)
- regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[679 + i] = rd32(E1000_TDH(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[691 + i] = rd32(E1000_TDT(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4));
+ if (hw->mac.type == e1000_82576) {
+ for (i = 0; i < 12; i++)
+ regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4));
+ for (i = 0; i < 4; i++)
+ regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[607 + i] = rd32(E1000_RDH(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[619 + i] = rd32(E1000_RDT(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4));
+
+ for (i = 0; i < 12; i++)
+ regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[679 + i] = rd32(E1000_TDH(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[691 + i] = rd32(E1000_TDT(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4));
+ }
+
+ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211)
+ regs_buff[739] = rd32(E1000_I210_RR2DCDELAY);
}
static int igb_get_eeprom_len(struct net_device *netdev)
*/
if (tx_ring->launchtime_enable) {
ts = ns_to_timespec64(first->skb->tstamp);
+ first->skb->tstamp = 0;
context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
} else {
context_desc->seqnum_seed = 0;
struct ixgbe_ipsec *ipsec = adapter->ipsec;
int i;
+ if (!ipsec)
+ return;
+
/* search rx sa table */
for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_rx_sa; i++) {
if (!ipsec->rx_tbl[i].used)
*/
rem = (NS_PER_SEC - rem);
- /* Adjust the clock edge to align with the next full second. This
- * assumes that the cycle counter shift is small enough to avoid
- * overflowing when shifting the remainder.
- */
- clock_edge += div_u64((rem << cc->shift), cc->mult);
+ /* Adjust the clock edge to align with the next full second. */
+ clock_edge += div_u64(((u64)rem << cc->shift), cc->mult);
trgttiml = (u32)clock_edge;
trgttimh = (u32)(clock_edge >> 32);
*/
rem = (NS_PER_SEC - rem);
- /* Adjust the clock edge to align with the next full second. This
- * assumes that the cycle counter shift is small enough to avoid
- * overflowing when shifting the remainder.
- */
- clock_edge += div_u64((rem << cc->shift), cc->mult);
+ /* Adjust the clock edge to align with the next full second. */
+ clock_edge += div_u64(((u64)rem << cc->shift), cc->mult);
/* X550 hardware stores the time in 32bits of 'billions of cycles' and
* 32bits of 'cycles'. There's no guarantee that cycles represents
struct ethtool_link_ksettings *cmd)
{
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
- struct ixgbe_hw *hw = &adapter->hw;
- u32 link_speed = 0;
- bool link_up;
ethtool_link_ksettings_zero_link_mode(cmd, supported);
ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full);
cmd->base.autoneg = AUTONEG_DISABLE;
cmd->base.port = -1;
- hw->mac.get_link_status = 1;
- hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
-
- if (link_up) {
+ if (adapter->link_up) {
__u32 speed = SPEED_10000;
- switch (link_speed) {
+ switch (adapter->link_speed) {
case IXGBE_LINK_SPEED_10GB_FULL:
speed = SPEED_10000;
break;
static inline u32 rtl8169_tx_vlan_tag(struct sk_buff *skb)
{
return (skb_vlan_tag_present(skb)) ?
- TxVlanTag | swab16(skb_vlan_tag_get(skb)) : 0x00;
+ TxVlanTag | htons(skb_vlan_tag_get(skb)) : 0x00;
}
static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb)
u32 opts2 = le32_to_cpu(desc->opts2);
if (opts2 & RxVlanTag)
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff));
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ ntohs(opts2 & 0xffff));
}
static void rtl8169_get_regs(struct net_device *dev, struct ethtool_regs *regs,
skb = napi_alloc_skb(&tp->napi, pkt_size);
if (skb)
skb_copy_to_linear_data(skb, data, pkt_size);
- dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
return skb;
}
tp->cp_cmd = RTL_R16(tp, CPlusCmd);
if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 &&
- !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
+ !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)))
dev->features |= NETIF_F_HIGHDMA;
- } else {
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (rc < 0) {
- dev_err(&pdev->dev, "DMA configuration failed\n");
- return rc;
- }
- }
rtl_init_rxcfg(tp);
#include <net/net_namespace.h>
/* TC action not accessible from user space */
-#define TC_ACT_REINSERT (TC_ACT_VALUE_MAX + 1)
+#define TC_ACT_CONSUMED (TC_ACT_VALUE_MAX + 1)
/* Basic packet classifier frontend definitions. */
};
const struct tcf_proto *goto_tp;
- /* used by the TC_ACT_REINSERT action */
+ /* used in the skb_tc_reinsert function */
struct {
bool ingress;
struct gnet_stats_queue *qstats;
__s32 delta;
__s32 clockid;
__u32 flags;
-#define TC_ETF_DEADLINE_MODE_ON BIT(0)
-#define TC_ETF_OFFLOAD_ON BIT(1)
+#define TC_ETF_DEADLINE_MODE_ON _BITUL(0)
+#define TC_ETF_OFFLOAD_ON _BITUL(1)
+#define TC_ETF_SKIP_SOCK_CHECK _BITUL(2)
};
enum {
* [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL]
*/
+#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST 0x1
+
enum {
TCA_TAPRIO_ATTR_UNSPEC,
TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */
TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */
TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */
+ TCA_TAPRIO_ATTR_FLAGS, /* u32 */
+ TCA_TAPRIO_ATTR_TXTIME_DELAY, /* s32 */
__TCA_TAPRIO_ATTR_MAX,
};
__skb_push(skb, skb->mac_len);
skb_do_redirect(skb);
return NULL;
- case TC_ACT_REINSERT:
- /* this does not scrub the packet, and updates stats on error */
- skb_tc_reinsert(skb, &cl_res);
+ case TC_ACT_CONSUMED:
return NULL;
default:
break;
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
+#define MIRRED_RECURSION_LIMIT 4
+static DEFINE_PER_CPU(unsigned int, mirred_rec_level);
+
static bool tcf_mirred_is_act_redirect(int action)
{
return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
struct sk_buff *skb2 = skb;
bool m_mac_header_xmit;
struct net_device *dev;
+ unsigned int rec_level;
int retval, err = 0;
bool use_reinsert;
bool want_ingress;
int m_eaction;
int mac_len;
+ rec_level = __this_cpu_inc_return(mirred_rec_level);
+ if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
+ net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
+ netdev_name(skb->dev));
+ __this_cpu_dec(mirred_rec_level);
+ return TC_ACT_SHOT;
+ }
+
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
if (use_reinsert) {
res->ingress = want_ingress;
res->qstats = this_cpu_ptr(m->common.cpu_qstats);
- return TC_ACT_REINSERT;
+ skb_tc_reinsert(skb, res);
+ __this_cpu_dec(mirred_rec_level);
+ return TC_ACT_CONSUMED;
}
}
if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
+ __this_cpu_dec(mirred_rec_level);
return retval;
}
struct em_ipt_match {
const struct xt_match *match;
u32 hook;
+ u8 nfproto;
u8 match_data[0] __aligned(8);
};
return 0;
}
+static int addrtype_validate_match_data(struct nlattr **tb, u8 mrev)
+{
+ if (mrev != 1) {
+ pr_err("only addrtype match revision 1 supported");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
{
.match_name = "policy",
.validate_match_data = policy_validate_match_data
},
+ {
+ .match_name = "addrtype",
+ .validate_match_data = addrtype_validate_match_data
+ },
{}
};
struct em_ipt_match *im = NULL;
struct xt_match *match;
int mdata_len, ret;
+ u8 nfproto;
ret = nla_parse_deprecated(tb, TCA_EM_IPT_MAX, data, data_len,
em_ipt_policy, NULL);
!tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
return -EINVAL;
+ nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
+ switch (nfproto) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ break;
+ default:
+ return -EINVAL;
+ }
+
match = get_xt_match(tb);
if (IS_ERR(match)) {
pr_err("unable to load match\n");
im->match = match;
im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+ im->nfproto = nfproto;
nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
ret = check_match(net, im, mdata_len);
const struct em_ipt_match *im = (const void *)em->data;
struct xt_action_param acpar = {};
struct net_device *indev = NULL;
+ u8 nfproto = im->match->family;
struct nf_hook_state state;
int ret;
+ switch (tc_skb_protocol(skb)) {
+ case htons(ETH_P_IP):
+ if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+ return 0;
+ if (nfproto == NFPROTO_UNSPEC)
+ nfproto = NFPROTO_IPV4;
+ break;
+ case htons(ETH_P_IPV6):
+ if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+ return 0;
+ if (nfproto == NFPROTO_UNSPEC)
+ nfproto = NFPROTO_IPV6;
+ break;
+ default:
+ return 0;
+ }
+
rcu_read_lock();
if (skb->skb_iif)
indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
- nf_hook_state_init(&state, im->hook, im->match->family,
+ nf_hook_state_init(&state, im->hook, nfproto,
indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
acpar.match = im->match;
return -EMSGSIZE;
if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
return -EMSGSIZE;
- if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
+ if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->nfproto) < 0)
return -EMSGSIZE;
if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
im->match->usersize ?: im->match->matchsize,
#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
+#define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
struct etf_sched_data {
bool offload;
bool deadline_mode;
+ bool skip_sock_check;
int clockid;
int queue;
s32 delta; /* in ns */
struct sock *sk = nskb->sk;
ktime_t now;
+ if (q->skip_sock_check)
+ goto skip;
+
if (!sk)
return false;
if (sk->sk_txtime_deadline_mode != q->deadline_mode)
return false;
+skip:
now = q->get_time();
if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
return false;
q->clockid = qopt->clockid;
q->offload = OFFLOAD_IS_ON(qopt);
q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
+ q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
switch (q->clockid) {
case CLOCK_REALTIME:
if (q->deadline_mode)
opt.flags |= TC_ETF_DEADLINE_MODE_ON;
+ if (q->skip_sock_check)
+ opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
+
if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
+#include <net/sock.h>
+#include <net/tcp.h>
static LIST_HEAD(taprio_list);
static DEFINE_SPINLOCK(taprio_list_lock);
#define TAPRIO_ALL_GATES_OPEN -1
+#define FLAGS_VALID(flags) (!((flags) & ~TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST))
+#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
+
struct sched_entry {
struct list_head list;
* packet leaves after this time.
*/
ktime_t close_time;
+ ktime_t next_txtime;
atomic_t budget;
int index;
u32 gate_mask;
struct taprio_sched {
struct Qdisc **qdiscs;
struct Qdisc *root;
+ u32 flags;
+ enum tk_offsets tk_offset;
int clockid;
atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
* speeds it's sub-nanoseconds per byte
struct sched_entry __rcu *current_entry;
struct sched_gate_list __rcu *oper_sched;
struct sched_gate_list __rcu *admin_sched;
- ktime_t (*get_time)(void);
struct hrtimer advance_timer;
struct list_head taprio_list;
+ int txtime_delay;
};
static ktime_t sched_base_time(const struct sched_gate_list *sched)
return ns_to_ktime(sched->base_time);
}
+static ktime_t taprio_get_time(struct taprio_sched *q)
+{
+ ktime_t mono = ktime_get();
+
+ switch (q->tk_offset) {
+ case TK_OFFS_MAX:
+ return mono;
+ default:
+ return ktime_mono_to_any(mono, q->tk_offset);
+ }
+
+ return KTIME_MAX;
+}
+
static void taprio_free_sched_cb(struct rcu_head *head)
{
struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
*admin = NULL;
}
-static ktime_t get_cycle_time(struct sched_gate_list *sched)
+/* Get how much time has been already elapsed in the current cycle. */
+static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time)
+{
+ ktime_t time_since_sched_start;
+ s32 time_elapsed;
+
+ time_since_sched_start = ktime_sub(time, sched->base_time);
+ div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed);
+
+ return time_elapsed;
+}
+
+static ktime_t get_interval_end_time(struct sched_gate_list *sched,
+ struct sched_gate_list *admin,
+ struct sched_entry *entry,
+ ktime_t intv_start)
+{
+ s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start);
+ ktime_t intv_end, cycle_ext_end, cycle_end;
+
+ cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed);
+ intv_end = ktime_add_ns(intv_start, entry->interval);
+ cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension);
+
+ if (ktime_before(intv_end, cycle_end))
+ return intv_end;
+ else if (admin && admin != sched &&
+ ktime_after(admin->base_time, cycle_end) &&
+ ktime_before(admin->base_time, cycle_ext_end))
+ return admin->base_time;
+ else
+ return cycle_end;
+}
+
+static int length_to_duration(struct taprio_sched *q, int len)
+{
+ return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
+}
+
+/* Returns the entry corresponding to next available interval. If
+ * validate_interval is set, it only validates whether the timestamp occurs
+ * when the gate corresponding to the skb's traffic class is open.
+ */
+static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb,
+ struct Qdisc *sch,
+ struct sched_gate_list *sched,
+ struct sched_gate_list *admin,
+ ktime_t time,
+ ktime_t *interval_start,
+ ktime_t *interval_end,
+ bool validate_interval)
+{
+ ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time;
+ ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time;
+ struct sched_entry *entry = NULL, *entry_found = NULL;
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ bool entry_available = false;
+ s32 cycle_elapsed;
+ int tc, n;
+
+ tc = netdev_get_prio_tc_map(dev, skb->priority);
+ packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb));
+
+ *interval_start = 0;
+ *interval_end = 0;
+
+ if (!sched)
+ return NULL;
+
+ cycle = sched->cycle_time;
+ cycle_elapsed = get_cycle_time_elapsed(sched, time);
+ curr_intv_end = ktime_sub_ns(time, cycle_elapsed);
+ cycle_end = ktime_add_ns(curr_intv_end, cycle);
+
+ list_for_each_entry(entry, &sched->entries, list) {
+ curr_intv_start = curr_intv_end;
+ curr_intv_end = get_interval_end_time(sched, admin, entry,
+ curr_intv_start);
+
+ if (ktime_after(curr_intv_start, cycle_end))
+ break;
+
+ if (!(entry->gate_mask & BIT(tc)) ||
+ packet_transmit_time > entry->interval)
+ continue;
+
+ txtime = entry->next_txtime;
+
+ if (ktime_before(txtime, time) || validate_interval) {
+ transmit_end_time = ktime_add_ns(time, packet_transmit_time);
+ if ((ktime_before(curr_intv_start, time) &&
+ ktime_before(transmit_end_time, curr_intv_end)) ||
+ (ktime_after(curr_intv_start, time) && !validate_interval)) {
+ entry_found = entry;
+ *interval_start = curr_intv_start;
+ *interval_end = curr_intv_end;
+ break;
+ } else if (!entry_available && !validate_interval) {
+ /* Here, we are just trying to find out the
+ * first available interval in the next cycle.
+ */
+ entry_available = 1;
+ entry_found = entry;
+ *interval_start = ktime_add_ns(curr_intv_start, cycle);
+ *interval_end = ktime_add_ns(curr_intv_end, cycle);
+ }
+ } else if (ktime_before(txtime, earliest_txtime) &&
+ !entry_available) {
+ earliest_txtime = txtime;
+ entry_found = entry;
+ n = div_s64(ktime_sub(txtime, curr_intv_start), cycle);
+ *interval_start = ktime_add(curr_intv_start, n * cycle);
+ *interval_end = ktime_add(curr_intv_end, n * cycle);
+ }
+ }
+
+ return entry_found;
+}
+
+static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct sched_gate_list *sched, *admin;
+ ktime_t interval_start, interval_end;
struct sched_entry *entry;
- ktime_t cycle = 0;
- if (sched->cycle_time != 0)
- return sched->cycle_time;
+ rcu_read_lock();
+ sched = rcu_dereference(q->oper_sched);
+ admin = rcu_dereference(q->admin_sched);
+
+ entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp,
+ &interval_start, &interval_end, true);
+ rcu_read_unlock();
- list_for_each_entry(entry, &sched->entries, list)
- cycle = ktime_add_ns(cycle, entry->interval);
+ return entry;
+}
- sched->cycle_time = cycle;
+/* This returns the tstamp value set by TCP in terms of the set clock. */
+static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
+{
+ unsigned int offset = skb_network_offset(skb);
+ const struct ipv6hdr *ipv6h;
+ const struct iphdr *iph;
+ struct ipv6hdr _ipv6h;
- return cycle;
+ ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
+ if (!ipv6h)
+ return 0;
+
+ if (ipv6h->version == 4) {
+ iph = (struct iphdr *)ipv6h;
+ offset += iph->ihl * 4;
+
+ /* special-case 6in4 tunnelling, as that is a common way to get
+ * v6 connectivity in the home
+ */
+ if (iph->protocol == IPPROTO_IPV6) {
+ ipv6h = skb_header_pointer(skb, offset,
+ sizeof(_ipv6h), &_ipv6h);
+
+ if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
+ return 0;
+ } else if (iph->protocol != IPPROTO_TCP) {
+ return 0;
+ }
+ } else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) {
+ return 0;
+ }
+
+ return ktime_mono_to_any(skb->skb_mstamp_ns, q->tk_offset);
+}
+
+/* There are a few scenarios where we will have to modify the txtime from
+ * what is read from next_txtime in sched_entry. They are:
+ * 1. If txtime is in the past,
+ * a. The gate for the traffic class is currently open and packet can be
+ * transmitted before it closes, schedule the packet right away.
+ * b. If the gate corresponding to the traffic class is going to open later
+ * in the cycle, set the txtime of packet to the interval start.
+ * 2. If txtime is in the future, there are packets corresponding to the
+ * current traffic class waiting to be transmitted. So, the following
+ * possibilities exist:
+ * a. We can transmit the packet before the window containing the txtime
+ * closes.
+ * b. The window might close before the transmission can be completed
+ * successfully. So, schedule the packet in the next open window.
+ */
+static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
+{
+ ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp;
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct sched_gate_list *sched, *admin;
+ ktime_t minimum_time, now, txtime;
+ int len, packet_transmit_time;
+ struct sched_entry *entry;
+ bool sched_changed;
+
+ now = taprio_get_time(q);
+ minimum_time = ktime_add_ns(now, q->txtime_delay);
+
+ tcp_tstamp = get_tcp_tstamp(q, skb);
+ minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp);
+
+ rcu_read_lock();
+ admin = rcu_dereference(q->admin_sched);
+ sched = rcu_dereference(q->oper_sched);
+ if (admin && ktime_after(minimum_time, admin->base_time))
+ switch_schedules(q, &admin, &sched);
+
+ /* Until the schedule starts, all the queues are open */
+ if (!sched || ktime_before(minimum_time, sched->base_time)) {
+ txtime = minimum_time;
+ goto done;
+ }
+
+ len = qdisc_pkt_len(skb);
+ packet_transmit_time = length_to_duration(q, len);
+
+ do {
+ sched_changed = 0;
+
+ entry = find_entry_to_transmit(skb, sch, sched, admin,
+ minimum_time,
+ &interval_start, &interval_end,
+ false);
+ if (!entry) {
+ txtime = 0;
+ goto done;
+ }
+
+ txtime = entry->next_txtime;
+ txtime = max_t(ktime_t, txtime, minimum_time);
+ txtime = max_t(ktime_t, txtime, interval_start);
+
+ if (admin && admin != sched &&
+ ktime_after(txtime, admin->base_time)) {
+ sched = admin;
+ sched_changed = 1;
+ continue;
+ }
+
+ transmit_end_time = ktime_add(txtime, packet_transmit_time);
+ minimum_time = transmit_end_time;
+
+ /* Update the txtime of current entry to the next time it's
+ * interval starts.
+ */
+ if (ktime_after(transmit_end_time, interval_end))
+ entry->next_txtime = ktime_add(interval_start, sched->cycle_time);
+ } while (sched_changed || ktime_after(transmit_end_time, interval_end));
+
+ entry->next_txtime = transmit_end_time;
+
+done:
+ rcu_read_unlock();
+ return txtime;
}
static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(!child))
return qdisc_drop(skb, sch, to_free);
+ if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
+ if (!is_valid_interval(skb, sch))
+ return qdisc_drop(skb, sch, to_free);
+ } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
+ skb->tstamp = get_packet_txtime(skb, sch);
+ if (!skb->tstamp)
+ return qdisc_drop(skb, sch, to_free);
+ }
+
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
if (!skb)
continue;
+ if (TXTIME_ASSIST_IS_ENABLED(q->flags))
+ return skb;
+
prio = skb->priority;
tc = netdev_get_prio_tc_map(dev, prio);
return NULL;
}
-static inline int length_to_duration(struct taprio_sched *q, int len)
-{
- return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
-}
-
static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
{
atomic_set(&entry->budget,
if (unlikely(!child))
continue;
+ if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
+ skb = child->ops->dequeue(child);
+ if (!skb)
+ continue;
+ goto skb_found;
+ }
+
skb = child->ops->peek(child);
if (!skb)
continue;
continue;
len = qdisc_pkt_len(skb);
- guard = ktime_add_ns(q->get_time(),
+ guard = ktime_add_ns(taprio_get_time(q),
length_to_duration(q, len));
/* In the case that there's no gate entry, there's no
if (unlikely(!skb))
goto done;
+skb_found:
qdisc_bstats_update(sch, skb);
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
if (err < 0)
return err;
+ if (!new->cycle_time) {
+ struct sched_entry *entry;
+ ktime_t cycle = 0;
+
+ list_for_each_entry(entry, &new->entries, list)
+ cycle = ktime_add_ns(cycle, entry->interval);
+ new->cycle_time = cycle;
+ }
+
return 0;
}
static int taprio_parse_mqprio_opt(struct net_device *dev,
struct tc_mqprio_qopt *qopt,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ u32 taprio_flags)
{
int i, j;
return -EINVAL;
}
+ if (TXTIME_ASSIST_IS_ENABLED(taprio_flags))
+ continue;
+
/* Verify that the offset and counts do not overlap */
for (j = i + 1; j < qopt->num_tc; j++) {
if (last > qopt->offset[j]) {
s64 n;
base = sched_base_time(sched);
- now = q->get_time();
+ now = taprio_get_time(q);
if (ktime_after(base, now)) {
*start = base;
return 0;
}
- cycle = get_cycle_time(sched);
+ cycle = sched->cycle_time;
/* The qdisc is expected to have at least one sched_entry. Moreover,
* any entry must have 'interval' > 0. Thus if the cycle time is zero,
first = list_first_entry(&sched->entries,
struct sched_entry, list);
- cycle = get_cycle_time(sched);
+ cycle = sched->cycle_time;
/* FIXME: find a better place to do this */
sched->cycle_close_time = ktime_add_ns(base, cycle);
return NOTIFY_DONE;
}
+static void setup_txtime(struct taprio_sched *q,
+ struct sched_gate_list *sched, ktime_t base)
+{
+ struct sched_entry *entry;
+ u32 interval = 0;
+
+ list_for_each_entry(entry, &sched->entries, list) {
+ entry->next_txtime = ktime_add_ns(base, interval);
+ interval += entry->interval;
+ }
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
+ u32 taprio_flags = 0;
int i, err, clockid;
unsigned long flags;
ktime_t start;
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
- err = taprio_parse_mqprio_opt(dev, mqprio, extack);
+ if (tb[TCA_TAPRIO_ATTR_FLAGS]) {
+ taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]);
+
+ if (q->flags != 0 && q->flags != taprio_flags) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
+ return -EOPNOTSUPP;
+ } else if (!FLAGS_VALID(taprio_flags)) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
+ return -EINVAL;
+ }
+
+ q->flags = taprio_flags;
+ }
+
+ err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags);
if (err < 0)
return err;
/* Protects against enqueue()/dequeue() */
spin_lock_bh(qdisc_lock(sch));
- if (!hrtimer_active(&q->advance_timer)) {
+ if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) {
+ if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) {
+ NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled");
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ q->txtime_delay = nla_get_s32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
+ }
+
+ if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
+ !hrtimer_active(&q->advance_timer)) {
hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
q->advance_timer.function = advance_sched;
}
switch (q->clockid) {
case CLOCK_REALTIME:
- q->get_time = ktime_get_real;
+ q->tk_offset = TK_OFFS_REAL;
break;
case CLOCK_MONOTONIC:
- q->get_time = ktime_get;
+ q->tk_offset = TK_OFFS_MAX;
break;
case CLOCK_BOOTTIME:
- q->get_time = ktime_get_boottime;
+ q->tk_offset = TK_OFFS_BOOT;
break;
case CLOCK_TAI:
- q->get_time = ktime_get_clocktai;
+ q->tk_offset = TK_OFFS_TAI;
break;
default:
NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
goto unlock;
}
- setup_first_close_time(q, new_admin, start);
+ if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) {
+ setup_txtime(q, new_admin, start);
- /* Protects against advance_sched() */
- spin_lock_irqsave(&q->current_entry_lock, flags);
+ if (!oper) {
+ rcu_assign_pointer(q->oper_sched, new_admin);
+ err = 0;
+ new_admin = NULL;
+ goto unlock;
+ }
- taprio_start_sched(sch, start, new_admin);
+ rcu_assign_pointer(q->admin_sched, new_admin);
+ if (admin)
+ call_rcu(&admin->rcu, taprio_free_sched_cb);
+ } else {
+ setup_first_close_time(q, new_admin, start);
- rcu_assign_pointer(q->admin_sched, new_admin);
- if (admin)
- call_rcu(&admin->rcu, taprio_free_sched_cb);
- new_admin = NULL;
+ /* Protects against advance_sched() */
+ spin_lock_irqsave(&q->current_entry_lock, flags);
- spin_unlock_irqrestore(&q->current_entry_lock, flags);
+ taprio_start_sched(sch, start, new_admin);
+ rcu_assign_pointer(q->admin_sched, new_admin);
+ if (admin)
+ call_rcu(&admin->rcu, taprio_free_sched_cb);
+
+ spin_unlock_irqrestore(&q->current_entry_lock, flags);
+ }
+
+ new_admin = NULL;
err = 0;
unlock:
if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
goto options_error;
+ if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
+ goto options_error;
+
+ if (q->txtime_delay &&
+ nla_put_s32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
+ goto options_error;
+
if (oper && dump_schedule(skb, oper))
goto options_error;
sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
sysfsf=$sysfsd/ipsec
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
+ probed=false
# setup netdevsim since dummydev doesn't have offload support
- modprobe netdevsim
- check_err $?
- if [ $ret -ne 0 ]; then
- echo "FAIL: ipsec_offload can't load netdevsim"
- return 1
+ if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+ modprobe -q netdevsim
+ check_err $?
+ if [ $ret -ne 0 ]; then
+ echo "SKIP: ipsec_offload can't load netdevsim"
+ return $ksft_skip
+ fi
+ probed=true
fi
echo "0" > /sys/bus/netdevsim/new_device
fi
# clean up any leftovers
- rmmod netdevsim
+ $probed && rmmod netdevsim
if [ $ret -ne 0 ]; then
echo "FAIL: ipsec_offload"