Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

author David S. Miller <davem@davemloft.net>

Sat, 2 Sep 2017 00:42:05 +0000 (17:42 -0700)

committer David S. Miller <davem@davemloft.net>

Sat, 2 Sep 2017 00:42:05 +0000 (17:42 -0700)
author David S. Miller <davem@davemloft.net>
Sat, 2 Sep 2017 00:42:05 +0000 (17:42 -0700)
committer David S. Miller <davem@davemloft.net>
Sat, 2 Sep 2017 00:42:05 +0000 (17:42 -0700)
diff --combined drivers/net/dsa/bcm_sf2.c

index 8492c9d64004d1a76cc0a63fdc572b2e001d1d2d,9b6ce7c3f6c3228c88fced286a31c3be0c19900f..554fe2df9365c1dcac7da70c17da37a8dcb0fa74
--- 1/drivers/net/dsa/bcm_sf2.c
--- 2/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@@ -327,8 -327,12 +327,8 @@@ static void bcm_sf2_port_disable(struc
   static int bcm_sf2_eee_init(struct dsa_switch *ds, int port,
                             struct phy_device *phy)
   {
- -      struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- -      struct ethtool_eee *p = &priv->port_sts[port].eee;
         int ret;
   
- -      p->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full);
- -
         ret = phy_init_eee(phy, 0);
         if (ret)
                 return 0;
@@@ -338,8 -342,8 +338,8 @@@
         return 1;
   }
   
- -static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port,
- -                            struct ethtool_eee *e)
+ +static int bcm_sf2_sw_get_mac_eee(struct dsa_switch *ds, int port,
+ +                                struct ethtool_eee *e)
   {
         struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
         struct ethtool_eee *p = &priv->port_sts[port].eee;
@@@ -352,14 -356,22 +352,14 @@@
         return 0;
   }
   
- -static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port,
- -                            struct phy_device *phydev,
- -                            struct ethtool_eee *e)
+ +static int bcm_sf2_sw_set_mac_eee(struct dsa_switch *ds, int port,
+ +                                struct ethtool_eee *e)
   {
         struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
         struct ethtool_eee *p = &priv->port_sts[port].eee;
   
         p->eee_enabled = e->eee_enabled;
- -
- -      if (!p->eee_enabled) {
- -              bcm_sf2_eee_enable_set(ds, port, false);
- -      } else {
- -              p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev);
- -              if (!p->eee_enabled)
- -                      return -EOPNOTSUPP;
- -      }
+ +      bcm_sf2_eee_enable_set(ds, port, e->eee_enabled);
   
         return 0;
   }
@@@ -788,7 -800,7 +788,7 @@@ static int bcm_sf2_sw_resume(struct dsa
   static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
                                struct ethtool_wolinfo *wol)
   {
- -      struct net_device *p = ds->dst[ds->index].cpu_dp->netdev;
+ +      struct net_device *p = ds->dst->cpu_dp->netdev;
         struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
         struct ethtool_wolinfo pwol;
   
@@@ -811,7 -823,7 +811,7 @@@
   static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port,
                               struct ethtool_wolinfo *wol)
   {
- -      struct net_device *p = ds->dst[ds->index].cpu_dp->netdev;
+ +      struct net_device *p = ds->dst->cpu_dp->netdev;
         struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
         s8 cpu_port = ds->dst->cpu_dp->index;
         struct ethtool_wolinfo pwol;
@@@ -983,7 -995,7 +983,7 @@@ static int bcm_sf2_core_write64(struct 
         return 0;
   }
   
- -static struct b53_io_ops bcm_sf2_io_ops = {
+ +static const struct b53_io_ops bcm_sf2_io_ops = {
         .read8  = bcm_sf2_core_read8,
         .read16 = bcm_sf2_core_read16,
         .read32 = bcm_sf2_core_read32,
@@@ -1011,8 -1023,8 +1011,8 @@@ static const struct dsa_switch_ops bcm_
         .set_wol                = bcm_sf2_sw_set_wol,
         .port_enable            = bcm_sf2_port_setup,
         .port_disable           = bcm_sf2_port_disable,
- -      .get_eee                = bcm_sf2_sw_get_eee,
- -      .set_eee                = bcm_sf2_sw_set_eee,
+ +      .get_mac_eee            = bcm_sf2_sw_get_mac_eee,
+ +      .set_mac_eee            = bcm_sf2_sw_set_mac_eee,
         .port_bridge_join       = b53_br_join,
         .port_bridge_leave      = b53_br_leave,
         .port_stp_state_set     = b53_br_set_stp_state,
@@@ -1021,6 -1033,8 +1021,6 @@@
         .port_vlan_prepare      = b53_vlan_prepare,
         .port_vlan_add          = b53_vlan_add,
         .port_vlan_del          = b53_vlan_del,
- -      .port_vlan_dump         = b53_vlan_dump,
- -      .port_fdb_prepare       = b53_fdb_prepare,
         .port_fdb_dump          = b53_fdb_dump,
         .port_fdb_add           = b53_fdb_add,
         .port_fdb_del           = b53_fdb_del,
@@@ -1034,6 -1048,7 +1034,7 @@@ struct bcm_sf2_of_data 
         u32 type;
         const u16 *reg_offsets;
         unsigned int core_reg_align;
+       unsigned int num_cfp_rules;
   };
   
   /* Register offsets for the SWITCH_REG_* block */
@@@ -1057,6 -1072,7 +1058,7 @@@ static const struct bcm_sf2_of_data bcm
         .type           = BCM7445_DEVICE_ID,
         .core_reg_align = 0,
         .reg_offsets    = bcm_sf2_7445_reg_offsets,
+       .num_cfp_rules  = 256,
   };
   
   static const u16 bcm_sf2_7278_reg_offsets[] = {
@@@ -1079,6 -1095,7 +1081,7 @@@ static const struct bcm_sf2_of_data bcm
         .type           = BCM7278_DEVICE_ID,
         .core_reg_align = 1,
         .reg_offsets    = bcm_sf2_7278_reg_offsets,
+       .num_cfp_rules  = 128,
   };
   
   static const struct of_device_id bcm_sf2_of_match[] = {
@@@ -1135,6 -1152,7 +1138,7 @@@ static int bcm_sf2_sw_probe(struct plat
         priv->type = data->type;
         priv->reg_offsets = data->reg_offsets;
         priv->core_reg_align = data->core_reg_align;
+       priv->num_cfp_rules = data->num_cfp_rules;
   
         /* Auto-detection using standard registers will not work, so
          * provide an indication of what kind of device we are for
diff --combined drivers/net/dsa/bcm_sf2.h

index d9c96b281fc0af229d505aa4fa58a7568b34bbda,7f9125eef3df42038fdce368d4a5fe2690d3276c..02c499f9c56b3bdd0cccb277ce0be9e13d318055
--- 1/drivers/net/dsa/bcm_sf2.h
--- 2/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@@ -72,6 -72,7 +72,7 @@@ struct bcm_sf2_priv 
         u32                             type;
         const u16                       *reg_offsets;
         unsigned int                    core_reg_align;
+       unsigned int                    num_cfp_rules;
   
         /* spinlock protecting access to the indirect registers */
         spinlock_t                      indir_lock;
@@@ -130,12 -131,12 +131,12 @@@ static inline u32 bcm_sf2_mangle_addr(s
   #define SF2_IO_MACRO(name) \
   static inline u32 name##_readl(struct bcm_sf2_priv *priv, u32 off)    \
   {                                                                     \
- -      return __raw_readl(priv->name + off);                           \
+ +      return readl_relaxed(priv->name + off);                         \
   }                                                                     \
   static inline void name##_writel(struct bcm_sf2_priv *priv,           \
                                   u32 val, u32 off)                     \
   {                                                                     \
- -      __raw_writel(val, priv->name + off);                            \
+ +      writel_relaxed(val, priv->name + off);                          \
   }                                                                     \
   
   /* Accesses to 64-bits register requires us to latch the hi/lo pairs
@@@ -179,23 -180,23 +180,23 @@@ static inline void intrl2_##which##_mas
   static inline u32 core_readl(struct bcm_sf2_priv *priv, u32 off)
   {
         u32 tmp = bcm_sf2_mangle_addr(priv, off);
- -      return __raw_readl(priv->core + tmp);
+ +      return readl_relaxed(priv->core + tmp);
   }
   
   static inline void core_writel(struct bcm_sf2_priv *priv, u32 val, u32 off)
   {
         u32 tmp = bcm_sf2_mangle_addr(priv, off);
- -      __raw_writel(val, priv->core + tmp);
+ +      writel_relaxed(val, priv->core + tmp);
   }
   
   static inline u32 reg_readl(struct bcm_sf2_priv *priv, u16 off)
   {
- -      return __raw_readl(priv->reg + priv->reg_offsets[off]);
+ +      return readl_relaxed(priv->reg + priv->reg_offsets[off]);
   }
   
   static inline void reg_writel(struct bcm_sf2_priv *priv, u32 val, u16 off)
   {
- -      __raw_writel(val, priv->reg + priv->reg_offsets[off]);
+ +      writel_relaxed(val, priv->reg + priv->reg_offsets[off]);
   }
   
   SF2_IO64_MACRO(core);
diff --combined drivers/net/ethernet/aquantia/atlantic/aq_ring.c

index 4b445750b93e15733ff63b67945f88d7a2ca84df,ec5579fb8268b29c6040f524fe20495b43fa617d..4eee1996a8259e561c15a17b9a3792ef79f280e2
--- 1/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
--- 2/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@@ -101,7 -101,6 +101,6 @@@ int aq_ring_init(struct aq_ring_s *self
         self->hw_head = 0;
         self->sw_head = 0;
         self->sw_tail = 0;
-       spin_lock_init(&self->header.lock);
         return 0;
   }
   
@@@ -134,10 -133,7 +133,10 @@@ static inline unsigned int aq_ring_dx_i
   }
   
   #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
- -int aq_ring_rx_clean(struct aq_ring_s *self, int *work_done, int budget)
+ +int aq_ring_rx_clean(struct aq_ring_s *self,
+ +                   struct napi_struct *napi,
+ +                   int *work_done,
+ +                   int budget)
   {
         struct net_device *ndev = aq_nic_get_ndev(self->aq_nic);
         int err = 0;
@@@ -243,7 -239,7 +242,7 @@@
   
                 skb_record_rx_queue(skb, self->idx);
   
- -              netif_receive_skb(skb);
+ +              napi_gro_receive(napi, skb);
   
                 ++self->stats.rx.packets;
                 self->stats.rx.bytes += skb->len;
diff --combined drivers/net/ethernet/aquantia/atlantic/aq_vec.c

index ec390c5eed35efb54d838a94f466833cdd3806eb,fee446af748ff1a64a984cf4b5ec12dce46471d1..ebf588004c4677140934b152eeab13d4d3aecbf7
--- 1/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
--- 2/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@@ -34,8 -34,6 +34,6 @@@ struct aq_vec_s 
   #define AQ_VEC_RX_ID 1
   
   static int aq_vec_poll(struct napi_struct *napi, int budget)
- __releases(&self->lock)
- __acquires(&self->lock)
   {
         struct aq_vec_s *self = container_of(napi, struct aq_vec_s, napi);
         struct aq_ring_s *ring = NULL;
@@@ -47,7 -45,7 +45,7 @@@
   
         if (!self) {
                 err = -EINVAL;
-       } else if (spin_trylock(&self->header.lock)) {
+       } else {
                 for (i = 0U, ring = self->ring[0];
                         self->tx_rings > i; ++i, ring = self->ring[i]) {
                         if (self->aq_hw_ops->hw_ring_tx_head_update) {
@@@ -78,7 -76,6 +76,7 @@@
                         if (ring[AQ_VEC_RX_ID].sw_head !=
                                 ring[AQ_VEC_RX_ID].hw_head) {
                                 err = aq_ring_rx_clean(&ring[AQ_VEC_RX_ID],
+ +                                                     napi,
                                                        &work_done,
                                                        budget - work_done);
                                 if (err < 0)
@@@ -106,11 -103,8 +104,8 @@@
                         self->aq_hw_ops->hw_irq_enable(self->aq_hw,
                                         1U << self->aq_ring_param.vec_idx);
                 }
- 
- err_exit:
-               spin_unlock(&self->header.lock);
         }
- 
+ err_exit:
         return work_done;
   }
   
@@@ -186,8 -180,6 +181,6 @@@ int aq_vec_init(struct aq_vec_s *self, 
         self->aq_hw_ops = aq_hw_ops;
         self->aq_hw = aq_hw;
   
-       spin_lock_init(&self->header.lock);
- 
         for (i = 0U, ring = self->ring[0];
                 self->tx_rings > i; ++i, ring = self->ring[i]) {
                 err = aq_ring_init(&ring[AQ_VEC_TX_ID]);
diff --combined drivers/net/ethernet/broadcom/bcmsysport.c

index 931751e4f3692ffa00dfa385a68aad34675117a0,c28fa5a8734cbc769adc16dfc5e36a8cd13b35cb..eec77fae12a14c5c1c1f1136dd3f286968f57069
--- 1/drivers/net/ethernet/broadcom/bcmsysport.c
--- 2/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@@ -32,13 -32,13 +32,13 @@@
   #define BCM_SYSPORT_IO_MACRO(name, offset) \
   static inline u32 name##_readl(struct bcm_sysport_priv *priv, u32 off)        \
   {                                                                     \
- -      u32 reg = __raw_readl(priv->base + offset + off);               \
+ +      u32 reg = readl_relaxed(priv->base + offset + off);             \
         return reg;                                                     \
   }                                                                     \
   static inline void name##_writel(struct bcm_sysport_priv *priv,               \
                                   u32 val, u32 off)                     \
   {                                                                     \
- -      __raw_writel(val, priv->base + offset + off);                   \
+ +      writel_relaxed(val, priv->base + offset + off);                 \
   }                                                                     \
   
   BCM_SYSPORT_IO_MACRO(intrl2_0, SYS_PORT_INTRL2_0_OFFSET);
@@@ -59,14 -59,14 +59,14 @@@ static inline u32 rdma_readl(struct bcm
   {
         if (priv->is_lite && off >= RDMA_STATUS)
                 off += 4;
- -      return __raw_readl(priv->base + SYS_PORT_RDMA_OFFSET + off);
+ +      return readl_relaxed(priv->base + SYS_PORT_RDMA_OFFSET + off);
   }
   
   static inline void rdma_writel(struct bcm_sysport_priv *priv, u32 val, u32 off)
   {
         if (priv->is_lite && off >= RDMA_STATUS)
                 off += 4;
- -      __raw_writel(val, priv->base + SYS_PORT_RDMA_OFFSET + off);
+ +      writel_relaxed(val, priv->base + SYS_PORT_RDMA_OFFSET + off);
   }
   
   static inline u32 tdma_control_bit(struct bcm_sysport_priv *priv, u32 bit)
@@@ -110,10 -110,10 +110,10 @@@ static inline void dma_desc_set_addr(st
                                      dma_addr_t addr)
   {
   #ifdef CONFIG_PHYS_ADDR_T_64BIT
- -      __raw_writel(upper_32_bits(addr) & DESC_ADDR_HI_MASK,
+ +      writel_relaxed(upper_32_bits(addr) & DESC_ADDR_HI_MASK,
                      d + DESC_ADDR_HI_STATUS_LEN);
   #endif
- -      __raw_writel(lower_32_bits(addr), d + DESC_ADDR_LO);
+ +      writel_relaxed(lower_32_bits(addr), d + DESC_ADDR_LO);
   }
   
   static inline void tdma_port_write_desc_addr(struct bcm_sysport_priv *priv,
@@@ -201,10 -201,10 +201,10 @@@ static int bcm_sysport_set_features(str
    */
   static const struct bcm_sysport_stats bcm_sysport_gstrings_stats[] = {
         /* general stats */
- -      STAT_NETDEV(rx_packets),
- -      STAT_NETDEV(tx_packets),
- -      STAT_NETDEV(rx_bytes),
- -      STAT_NETDEV(tx_bytes),
+ +      STAT_NETDEV64(rx_packets),
+ +      STAT_NETDEV64(tx_packets),
+ +      STAT_NETDEV64(rx_bytes),
+ +      STAT_NETDEV64(tx_bytes),
         STAT_NETDEV(rx_errors),
         STAT_NETDEV(tx_errors),
         STAT_NETDEV(rx_dropped),
@@@ -316,7 -316,6 +316,7 @@@ static inline bool bcm_sysport_lite_sta
   {
         switch (type) {
         case BCM_SYSPORT_STAT_NETDEV:
+ +      case BCM_SYSPORT_STAT_NETDEV64:
         case BCM_SYSPORT_STAT_RXCHK:
         case BCM_SYSPORT_STAT_RBUF:
         case BCM_SYSPORT_STAT_SOFT:
@@@ -399,7 -398,6 +399,7 @@@ static void bcm_sysport_update_mib_coun
                 s = &bcm_sysport_gstrings_stats[i];
                 switch (s->type) {
                 case BCM_SYSPORT_STAT_NETDEV:
+ +              case BCM_SYSPORT_STAT_NETDEV64:
                 case BCM_SYSPORT_STAT_SOFT:
                         continue;
                 case BCM_SYSPORT_STAT_MIB_RX:
@@@ -436,10 -434,7 +436,10 @@@ static void bcm_sysport_get_stats(struc
                                   struct ethtool_stats *stats, u64 *data)
   {
         struct bcm_sysport_priv *priv = netdev_priv(dev);
+ +      struct bcm_sysport_stats64 *stats64 = &priv->stats64;
+ +      struct u64_stats_sync *syncp = &priv->syncp;
         struct bcm_sysport_tx_ring *ring;
+ +      unsigned int start;
         int i, j;
   
         if (netif_running(dev))
@@@ -452,22 -447,14 +452,22 @@@
                 s = &bcm_sysport_gstrings_stats[i];
                 if (s->type == BCM_SYSPORT_STAT_NETDEV)
                         p = (char *)&dev->stats;
+ +              else if (s->type == BCM_SYSPORT_STAT_NETDEV64)
+ +                      p = (char *)stats64;
                 else
                         p = (char *)priv;
   
                 if (priv->is_lite && !bcm_sysport_lite_stat_valid(s->type))
                         continue;
- -
                 p += s->stat_offset;
- -              data[j] = *(unsigned long *)p;
+ +
+ +              if (s->stat_sizeof == sizeof(u64))
+ +                      do {
+ +                              start = u64_stats_fetch_begin_irq(syncp);
+ +                              data[i] = *(u64 *)p;
+ +                      } while (u64_stats_fetch_retry_irq(syncp, start));
+ +              else
+ +                      data[i] = *(u32 *)p;
                 j++;
         }
   
@@@ -610,7 -597,7 +610,7 @@@ static int bcm_sysport_set_coalesce(str
   
   static void bcm_sysport_free_cb(struct bcm_sysport_cb *cb)
   {
-       dev_kfree_skb_any(cb->skb);
+       dev_consume_skb_any(cb->skb);
         cb->skb = NULL;
         dma_unmap_addr_set(cb, dma_addr, 0);
   }
@@@ -679,7 -666,6 +679,7 @@@ static int bcm_sysport_alloc_rx_bufs(st
   static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
                                         unsigned int budget)
   {
+ +      struct bcm_sysport_stats64 *stats64 = &priv->stats64;
         struct net_device *ndev = priv->netdev;
         unsigned int processed = 0, to_process;
         struct bcm_sysport_cb *cb;
@@@ -783,10 -769,6 +783,10 @@@
                 skb->protocol = eth_type_trans(skb, ndev);
                 ndev->stats.rx_packets++;
                 ndev->stats.rx_bytes += len;
+ +              u64_stats_update_begin(&priv->syncp);
+ +              stats64->rx_packets++;
+ +              stats64->rx_bytes += len;
+ +              u64_stats_update_end(&priv->syncp);
   
                 napi_gro_receive(&priv->napi, skb);
   next:
@@@ -809,15 -791,17 +809,15 @@@ static void bcm_sysport_tx_reclaim_one(
         struct device *kdev = &priv->pdev->dev;
   
         if (cb->skb) {
- -              ring->bytes += cb->skb->len;
                 *bytes_compl += cb->skb->len;
                 dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr),
                                  dma_unmap_len(cb, dma_len),
                                  DMA_TO_DEVICE);
- -              ring->packets++;
                 (*pkts_compl)++;
                 bcm_sysport_free_cb(cb);
         /* SKB fragment */
         } else if (dma_unmap_addr(cb, dma_addr)) {
- -              ring->bytes += dma_unmap_len(cb, dma_len);
+ +              *bytes_compl += dma_unmap_len(cb, dma_len);
                 dma_unmap_page(kdev, dma_unmap_addr(cb, dma_addr),
                                dma_unmap_len(cb, dma_len), DMA_TO_DEVICE);
                 dma_unmap_addr_set(cb, dma_addr, 0);
@@@ -828,9 -812,9 +828,9 @@@
   static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
                                              struct bcm_sysport_tx_ring *ring)
   {
- -      struct net_device *ndev = priv->netdev;
         unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs;
         unsigned int pkts_compl = 0, bytes_compl = 0;
+ +      struct net_device *ndev = priv->netdev;
         struct bcm_sysport_cb *cb;
         u32 hw_ind;
   
@@@ -869,11 -853,6 +869,11 @@@
                 last_c_index &= (num_tx_cbs - 1);
         }
   
+ +      u64_stats_update_begin(&priv->syncp);
+ +      ring->packets += pkts_compl;
+ +      ring->bytes += bytes_compl;
+ +      u64_stats_update_end(&priv->syncp);
+ +
         ring->c_index = c_index;
   
         netif_dbg(priv, tx_done, ndev,
@@@ -1367,6 -1346,8 +1367,8 @@@ static int bcm_sysport_init_tx_ring(str
   
         ring->cbs = kcalloc(size, sizeof(struct bcm_sysport_cb), GFP_KERNEL);
         if (!ring->cbs) {
+               dma_free_coherent(kdev, sizeof(struct dma_desc),
+                                 ring->desc_cpu, ring->desc_dma);
                 netif_err(priv, hw, priv->netdev, "CB allocation failed\n");
                 return -ENOMEM;
         }
@@@ -1696,41 -1677,22 +1698,41 @@@ static int bcm_sysport_change_mac(struc
         return 0;
   }
   
- -static struct net_device_stats *bcm_sysport_get_nstats(struct net_device *dev)
+ +static void bcm_sysport_get_stats64(struct net_device *dev,
+ +                                  struct rtnl_link_stats64 *stats)
   {
         struct bcm_sysport_priv *priv = netdev_priv(dev);
- -      unsigned long tx_bytes = 0, tx_packets = 0;
+ +      struct bcm_sysport_stats64 *stats64 = &priv->stats64;
         struct bcm_sysport_tx_ring *ring;
+ +      u64 tx_packets = 0, tx_bytes = 0;
+ +      unsigned int start;
         unsigned int q;
   
+ +      netdev_stats_to_stats64(stats, &dev->stats);
+ +
         for (q = 0; q < dev->num_tx_queues; q++) {
                 ring = &priv->tx_rings[q];
- -              tx_bytes += ring->bytes;
- -              tx_packets += ring->packets;
+ +              do {
+ +                      start = u64_stats_fetch_begin_irq(&priv->syncp);
+ +                      tx_bytes = ring->bytes;
+ +                      tx_packets = ring->packets;
+ +              } while (u64_stats_fetch_retry_irq(&priv->syncp, start));
+ +
+ +              stats->tx_bytes += tx_bytes;
+ +              stats->tx_packets += tx_packets;
         }
   
- -      dev->stats.tx_bytes = tx_bytes;
- -      dev->stats.tx_packets = tx_packets;
- -      return &dev->stats;
+ +      /* lockless update tx_bytes and tx_packets */
+ +      u64_stats_update_begin(&priv->syncp);
+ +      stats64->tx_bytes = stats->tx_bytes;
+ +      stats64->tx_packets = stats->tx_packets;
+ +      u64_stats_update_end(&priv->syncp);
+ +
+ +      do {
+ +              start = u64_stats_fetch_begin_irq(&priv->syncp);
+ +              stats->rx_packets = stats64->rx_packets;
+ +              stats->rx_bytes = stats64->rx_bytes;
+ +      } while (u64_stats_fetch_retry_irq(&priv->syncp, start));
   }
   
   static void bcm_sysport_netif_start(struct net_device *dev)
@@@ -1762,14 -1724,10 +1764,14 @@@ static void rbuf_init(struct bcm_syspor
         reg = rbuf_readl(priv, RBUF_CONTROL);
         reg |= RBUF_4B_ALGN | RBUF_RSB_EN;
         /* Set a correct RSB format on SYSTEMPORT Lite */
- -      if (priv->is_lite) {
+ +      if (priv->is_lite)
                 reg &= ~RBUF_RSB_SWAP1;
+ +
+ +      /* Set a correct RSB format based on host endian */
+ +      if (!IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
                 reg |= RBUF_RSB_SWAP0;
- -      }
+ +      else
+ +              reg &= ~RBUF_RSB_SWAP0;
         rbuf_writel(priv, reg, RBUF_CONTROL);
   }
   
@@@ -1998,7 -1956,7 +2000,7 @@@ static const struct net_device_ops bcm_
   #ifdef CONFIG_NET_POLL_CONTROLLER
         .ndo_poll_controller    = bcm_sysport_poll_controller,
   #endif
- -      .ndo_get_stats          = bcm_sysport_get_nstats,
+ +      .ndo_get_stats64        = bcm_sysport_get_stats64,
   };
   
   #define REV_FMT       "v%2x.%02x"
@@@ -2146,8 -2104,6 +2148,8 @@@ static int bcm_sysport_probe(struct pla
         /* libphy will adjust the link state accordingly */
         netif_carrier_off(dev);
   
+ +      u64_stats_init(&priv->syncp);
+ +
         ret = register_netdev(dev);
         if (ret) {
                 dev_err(&pdev->dev, "failed to register net_device\n");
diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c

index d6367c10afb56f3daa9d938b896f49e3d7593575,f20b3d2a4c2330543f64eee1334ae4e543317f9c..aacec8bc19d5fbf6fe0f007d8a6a59fe2df23c8d
--- 1/drivers/net/ethernet/broadcom/bnxt/bnxt.c
--- 2/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@@ -33,7 -33,6 +33,7 @@@
   #include <linux/mii.h>
   #include <linux/if.h>
   #include <linux/if_vlan.h>
+ +#include <linux/if_bridge.h>
   #include <linux/rtc.h>
   #include <linux/bpf.h>
   #include <net/ip.h>
@@@ -49,8 -48,6 +49,8 @@@
   #include <linux/aer.h>
   #include <linux/bitmap.h>
   #include <linux/cpu_rmap.h>
+ +#include <linux/cpumask.h>
+ +#include <net/pkt_cls.h>
   
   #include "bnxt_hsi.h"
   #include "bnxt.h"
@@@ -59,8 -56,6 +59,8 @@@
   #include "bnxt_ethtool.h"
   #include "bnxt_dcb.h"
   #include "bnxt_xdp.h"
+ +#include "bnxt_vfr.h"
+ +#include "bnxt_tc.h"
   
   #define BNXT_TX_TIMEOUT               (5 * HZ)
   
@@@ -106,8 -101,6 +106,8 @@@ enum board_idx 
         BCM57416_NPAR,
         BCM57452,
         BCM57454,
+ +      BCM58802,
+ +      BCM58808,
         NETXTREME_E_VF,
         NETXTREME_C_VF,
   };
@@@ -116,42 -109,39 +116,42 @@@
   static const struct {
         char *name;
   } board_info[] = {
- -      { "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet" },
- -      { "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet" },
- -      { "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" },
- -      { "Broadcom BCM57417 NetXtreme-E Ethernet Partition" },
- -      { "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet" },
- -      { "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet" },
- -      { "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet" },
- -      { "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet" },
- -      { "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet" },
- -      { "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet" },
- -      { "Broadcom BCM57402 NetXtreme-E Ethernet Partition" },
- -      { "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet" },
- -      { "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet" },
- -      { "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet" },
- -      { "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet" },
- -      { "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet" },
- -      { "Broadcom BCM57412 NetXtreme-E Ethernet Partition" },
- -      { "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" },
- -      { "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet" },
- -      { "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet" },
- -      { "Broadcom BCM57404 NetXtreme-E Ethernet Partition" },
- -      { "Broadcom BCM57406 NetXtreme-E Ethernet Partition" },
- -      { "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet" },
- -      { "Broadcom BCM57407 NetXtreme-E Ethernet Partition" },
- -      { "Broadcom BCM57414 NetXtreme-E Ethernet Partition" },
- -      { "Broadcom BCM57416 NetXtreme-E Ethernet Partition" },
- -      { "Broadcom BCM57452 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet" },
- -      { "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
- -      { "Broadcom NetXtreme-E Ethernet Virtual Function" },
- -      { "Broadcom NetXtreme-C Ethernet Virtual Function" },
+ +      [BCM57301] = { "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet" },
+ +      [BCM57302] = { "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet" },
+ +      [BCM57304] = { "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" },
+ +      [BCM57417_NPAR] = { "Broadcom BCM57417 NetXtreme-E Ethernet Partition" },
+ +      [BCM58700] = { "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet" },
+ +      [BCM57311] = { "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet" },
+ +      [BCM57312] = { "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet" },
+ +      [BCM57402] = { "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet" },
+ +      [BCM57404] = { "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet" },
+ +      [BCM57406] = { "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet" },
+ +      [BCM57402_NPAR] = { "Broadcom BCM57402 NetXtreme-E Ethernet Partition" },
+ +      [BCM57407] = { "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet" },
+ +      [BCM57412] = { "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet" },
+ +      [BCM57414] = { "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet" },
+ +      [BCM57416] = { "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet" },
+ +      [BCM57417] = { "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet" },
+ +      [BCM57412_NPAR] = { "Broadcom BCM57412 NetXtreme-E Ethernet Partition" },
+ +      [BCM57314] = { "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" },
+ +      [BCM57417_SFP] = { "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet" },
+ +      [BCM57416_SFP] = { "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet" },
+ +      [BCM57404_NPAR] = { "Broadcom BCM57404 NetXtreme-E Ethernet Partition" },
+ +      [BCM57406_NPAR] = { "Broadcom BCM57406 NetXtreme-E Ethernet Partition" },
+ +      [BCM57407_SFP] = { "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet" },
+ +      [BCM57407_NPAR] = { "Broadcom BCM57407 NetXtreme-E Ethernet Partition" },
+ +      [BCM57414_NPAR] = { "Broadcom BCM57414 NetXtreme-E Ethernet Partition" },
+ +      [BCM57416_NPAR] = { "Broadcom BCM57416 NetXtreme-E Ethernet Partition" },
+ +      [BCM57452] = { "Broadcom BCM57452 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet" },
+ +      [BCM57454] = { "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
+ +      [BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" },
+ +      [BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
+ +      [NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" },
+ +      [NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" },
   };
   
   static const struct pci_device_id bnxt_pci_tbl[] = {
+ +      { PCI_VDEVICE(BROADCOM, 0x1614), .driver_data = BCM57454 },
         { PCI_VDEVICE(BROADCOM, 0x16c0), .driver_data = BCM57417_NPAR },
         { PCI_VDEVICE(BROADCOM, 0x16c8), .driver_data = BCM57301 },
         { PCI_VDEVICE(BROADCOM, 0x16c9), .driver_data = BCM57302 },
@@@ -182,9 -172,8 +182,9 @@@
         { PCI_VDEVICE(BROADCOM, 0x16ed), .driver_data = BCM57414_NPAR },
         { PCI_VDEVICE(BROADCOM, 0x16ee), .driver_data = BCM57416_NPAR },
         { PCI_VDEVICE(BROADCOM, 0x16ef), .driver_data = BCM57416_NPAR },
+ +      { PCI_VDEVICE(BROADCOM, 0x16f0), .driver_data = BCM58808 },
         { PCI_VDEVICE(BROADCOM, 0x16f1), .driver_data = BCM57452 },
- -      { PCI_VDEVICE(BROADCOM, 0x1614), .driver_data = BCM57454 },
+ +      { PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 },
   #ifdef CONFIG_BNXT_SRIOV
         { PCI_VDEVICE(BROADCOM, 0x1606), .driver_data = NETXTREME_E_VF },
         { PCI_VDEVICE(BROADCOM, 0x1609), .driver_data = NETXTREME_E_VF },
@@@ -254,16 -243,6 +254,16 @@@ const u16 bnxt_lhint_arr[] = 
         TX_BD_FLAGS_LHINT_2048_AND_LARGER,
   };
   
+ +static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb)
+ +{
+ +      struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ +
+ +      if (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)
+ +              return 0;
+ +
+ +      return md_dst->u.port_info.port_id;
+ +}
+ +
   static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
   {
         struct bnxt *bp = netdev_priv(dev);
@@@ -308,7 -287,7 +308,7 @@@
         tx_buf->nr_frags = last_frag;
   
         vlan_tag_flags = 0;
- -      cfa_action = 0;
+ +      cfa_action = bnxt_xmit_get_cfa_action(skb);
         if (skb_vlan_tag_present(skb)) {
                 vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN |
                                  skb_vlan_tag_get(skb);
@@@ -343,8 -322,7 +343,8 @@@
                         tx_push1->tx_bd_hsize_lflags = 0;
   
                 tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
- -              tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
+ +              tx_push1->tx_bd_cfa_action =
+ +                      cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT);
   
                 end = pdata + length;
                 end = PTR_ALIGN(end, 8) - 1;
@@@ -449,8 -427,7 +449,8 @@@ normal_tx
         txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
   
         txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
- -      txbd1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
+ +      txbd1->tx_bd_cfa_action =
+ +                      cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT);
         for (i = 0; i < last_frag; i++) {
                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
   
@@@ -1055,10 -1032,7 +1055,10 @@@ static void bnxt_tpa_start(struct bnxt 
                 bnxt_sched_reset(bp, rxr);
                 return;
         }
- -
+ +      /* Store cfa_code in tpa_info to use in tpa_end
+ +       * completion processing.
+ +       */
+ +      tpa_info->cfa_code = TPA_START_CFA_CODE(tpa_start1);
         prod_rx_buf->data = tpa_info->data;
         prod_rx_buf->data_ptr = tpa_info->data_ptr;
   
@@@ -1293,17 -1267,6 +1293,17 @@@ static inline struct sk_buff *bnxt_gro_
         return skb;
   }
   
+ +/* Given the cfa_code of a received packet determine which
+ + * netdev (vf-rep or PF) the packet is destined to.
+ + */
+ +static struct net_device *bnxt_get_pkt_dev(struct bnxt *bp, u16 cfa_code)
+ +{
+ +      struct net_device *dev = bnxt_get_vf_rep(bp, cfa_code);
+ +
+ +      /* if vf-rep dev is NULL, the must belongs to the PF */
+ +      return dev ? dev : bp->dev;
+ +}
+ +
   static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
                                            struct bnxt_napi *bnapi,
                                            u32 *raw_cons,
@@@ -1397,9 -1360,7 +1397,9 @@@
                         return NULL;
                 }
         }
- -      skb->protocol = eth_type_trans(skb, bp->dev);
+ +
+ +      skb->protocol =
+ +              eth_type_trans(skb, bnxt_get_pkt_dev(bp, tpa_info->cfa_code));
   
         if (tpa_info->hash_type != PKT_HASH_TYPE_NONE)
                 skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type);
@@@ -1426,18 -1387,6 +1426,18 @@@
         return skb;
   }
   
+ +static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
+ +                           struct sk_buff *skb)
+ +{
+ +      if (skb->dev != bp->dev) {
+ +              /* this packet belongs to a vf-rep */
+ +              bnxt_vf_rep_rx(bp, skb);
+ +              return;
+ +      }
+ +      skb_record_rx_queue(skb, bnapi->index);
+ +      napi_gro_receive(&bnapi->napi, skb);
+ +}
+ +
   /* returns the following:
    * 1       - 1 packet successfully received
    * 0       - successful TPA_START, packet not completed yet
@@@ -1454,7 -1403,7 +1454,7 @@@ static int bnxt_rx_pkt(struct bnxt *bp
         struct rx_cmp *rxcmp;
         struct rx_cmp_ext *rxcmp1;
         u32 tmp_raw_cons = *raw_cons;
- -      u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
+ +      u16 cfa_code, cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
         struct bnxt_sw_rx_bd *rx_buf;
         unsigned int len;
         u8 *data_ptr, agg_bufs, cmp_type;
@@@ -1496,7 -1445,8 +1496,7 @@@
   
                 rc = -ENOMEM;
                 if (likely(skb)) {
- -                      skb_record_rx_queue(skb, bnapi->index);
- -                      napi_gro_receive(&bnapi->napi, skb);
+ +                      bnxt_deliver_skb(bp, bnapi, skb);
                         rc = 1;
                 }
                 *event |= BNXT_RX_EVENT;
@@@ -1585,8 -1535,7 +1585,8 @@@
                 skb_set_hash(skb, le32_to_cpu(rxcmp->rx_cmp_rss_hash), type);
         }
   
- -      skb->protocol = eth_type_trans(skb, dev);
+ +      cfa_code = RX_CMP_CFA_CODE(rxcmp1);
+ +      skb->protocol = eth_type_trans(skb, bnxt_get_pkt_dev(bp, cfa_code));
   
         if ((rxcmp1->rx_cmp_flags2 &
              cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) &&
@@@ -1611,7 -1560,8 +1611,7 @@@
                 }
         }
   
- -      skb_record_rx_queue(skb, bnapi->index);
- -      napi_gro_receive(&bnapi->napi, skb);
+ +      bnxt_deliver_skb(bp, bnapi, skb);
         rc = 1;
   
   next_rx:
@@@ -1852,13 -1802,6 +1852,13 @@@ static int bnxt_poll_work(struct bnxt *
                                                            &event);
                         if (likely(rc >= 0))
                                 rx_pkts += rc;
+ +                      /* Increment rx_pkts when rc is -ENOMEM to count towards
+ +                       * the NAPI budget.  Otherwise, we may potentially loop
+ +                       * here forever if we consistently cannot allocate
+ +                       * buffers.
+ +                       */
+ +                      else if (rc == -ENOMEM)
+ +                              rx_pkts++;
                         else if (rc == -EBUSY)  /* partial completion */
                                 break;
                 } else if (unlikely((TX_CMP_TYPE(txcmp) ==
@@@ -4477,33 -4420,9 +4477,33 @@@ static int bnxt_hwrm_reserve_tx_rings(s
         mutex_lock(&bp->hwrm_cmd_lock);
         rc = __bnxt_hwrm_get_tx_rings(bp, 0xffff, tx_rings);
         mutex_unlock(&bp->hwrm_cmd_lock);
+ +      if (!rc)
+ +              bp->tx_reserved_rings = *tx_rings;
         return rc;
   }
   
+ +static int bnxt_hwrm_check_tx_rings(struct bnxt *bp, int tx_rings)
+ +{
+ +      struct hwrm_func_cfg_input req = {0};
+ +      int rc;
+ +
+ +      if (bp->hwrm_spec_code < 0x10801)
+ +              return 0;
+ +
+ +      if (BNXT_VF(bp))
+ +              return 0;
+ +
+ +      bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
+ +      req.fid = cpu_to_le16(0xffff);
+ +      req.flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST);
+ +      req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS);
+ +      req.num_tx_rings = cpu_to_le16(tx_rings);
+ +      rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ +      if (rc)
+ +              return -ENOMEM;
+ +      return 0;
+ +}
+ +
   static void bnxt_hwrm_set_coal_params(struct bnxt *bp, u32 max_bufs,
         u32 buf_tmrs, u16 flags,
         struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req)
@@@ -4658,7 -4577,6 +4658,7 @@@ static int bnxt_hwrm_func_qcfg(struct b
   {
         struct hwrm_func_qcfg_input req = {0};
         struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+ +      u16 flags;
         int rc;
   
         bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
@@@ -4675,15 -4593,15 +4675,15 @@@
                 vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
         }
   #endif
- -      if (BNXT_PF(bp)) {
- -              u16 flags = le16_to_cpu(resp->flags);
- -
- -              if (flags & (FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED |
- -                           FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED))
- -                      bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
- -              if (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST)
- -                      bp->flags |= BNXT_FLAG_MULTI_HOST;
+ +      flags = le16_to_cpu(resp->flags);
+ +      if (flags & (FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED |
+ +                   FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED)) {
+ +              bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
+ +              if (flags & FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED)
+ +                      bp->flags |= BNXT_FLAG_FW_DCBX_AGENT;
         }
+ +      if (BNXT_PF(bp) && (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST))
+ +              bp->flags |= BNXT_FLAG_MULTI_HOST;
   
         switch (resp->port_partition_type) {
         case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
@@@ -4692,13 -4610,6 +4692,13 @@@
                 bp->port_partition_type = resp->port_partition_type;
                 break;
         }
+ +      if (bp->hwrm_spec_code < 0x10707 ||
+ +          resp->evb_mode == FUNC_QCFG_RESP_EVB_MODE_VEB)
+ +              bp->br_mode = BRIDGE_MODE_VEB;
+ +      else if (resp->evb_mode == FUNC_QCFG_RESP_EVB_MODE_VEPA)
+ +              bp->br_mode = BRIDGE_MODE_VEPA;
+ +      else
+ +              bp->br_mode = BRIDGE_MODE_UNDEF;
   
   func_qcfg_exit:
         mutex_unlock(&bp->hwrm_cmd_lock);
@@@ -4736,7 -4647,6 +4736,6 @@@ static int bnxt_hwrm_func_qcaps(struct 
                 pf->port_id = le16_to_cpu(resp->port_id);
                 bp->dev->dev_port = pf->port_id;
                 memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN);
-               memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN);
                 pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
                 pf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings);
                 pf->max_tx_rings = le16_to_cpu(resp->max_tx_rings);
@@@ -4776,16 -4686,6 +4775,6 @@@
                 vf->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx);
   
                 memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN);
-               mutex_unlock(&bp->hwrm_cmd_lock);
- 
-               if (is_valid_ether_addr(vf->mac_addr)) {
-                       /* overwrite netdev dev_adr with admin VF MAC */
-                       memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN);
-               } else {
-                       eth_hw_addr_random(bp->dev);
-                       rc = bnxt_approve_mac(bp, bp->dev->dev_addr);
-               }
-               return rc;
   #endif
         }
   
@@@ -5000,26 -4900,6 +4989,26 @@@ static void bnxt_hwrm_resource_free(str
         }
   }
   
+ +static int bnxt_hwrm_set_br_mode(struct bnxt *bp, u16 br_mode)
+ +{
+ +      struct hwrm_func_cfg_input req = {0};
+ +      int rc;
+ +
+ +      bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
+ +      req.fid = cpu_to_le16(0xffff);
+ +      req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_EVB_MODE);
+ +      if (br_mode == BRIDGE_MODE_VEB)
+ +              req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEB;
+ +      else if (br_mode == BRIDGE_MODE_VEPA)
+ +              req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEPA;
+ +      else
+ +              return -EINVAL;
+ +      rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ +      if (rc)
+ +              rc = -EIO;
+ +      return rc;
+ +}
+ +
   static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
   {
         struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
@@@ -5155,15 -5035,6 +5144,15 @@@ static int bnxt_init_chip(struct bnxt *
                                    rc);
                         goto err_out;
                 }
+ +              if (bp->tx_reserved_rings != bp->tx_nr_rings) {
+ +                      int tx = bp->tx_nr_rings;
+ +
+ +                      if (bnxt_hwrm_reserve_tx_rings(bp, &tx) ||
+ +                          tx < bp->tx_nr_rings) {
+ +                              rc = -ENOMEM;
+ +                              goto err_out;
+ +                      }
+ +              }
         }
   
         rc = bnxt_hwrm_ring_alloc(bp);
@@@ -5570,15 -5441,8 +5559,15 @@@ static void bnxt_free_irq(struct bnxt *
   
         for (i = 0; i < bp->cp_nr_rings; i++) {
                 irq = &bp->irq_tbl[i];
- -              if (irq->requested)
+ +              if (irq->requested) {
+ +                      if (irq->have_cpumask) {
+ +                              irq_set_affinity_hint(irq->vector, NULL);
+ +                              free_cpumask_var(irq->cpu_mask);
+ +                              irq->have_cpumask = 0;
+ +                      }
                         free_irq(irq->vector, bp->bnapi[i]);
+ +              }
+ +
                 irq->requested = 0;
         }
   }
@@@ -5611,21 -5475,6 +5600,21 @@@ static int bnxt_request_irq(struct bnx
                         break;
   
                 irq->requested = 1;
+ +
+ +              if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) {
+ +                      int numa_node = dev_to_node(&bp->pdev->dev);
+ +
+ +                      irq->have_cpumask = 1;
+ +                      cpumask_set_cpu(cpumask_local_spread(i, numa_node),
+ +                                      irq->cpu_mask);
+ +                      rc = irq_set_affinity_hint(irq->vector, irq->cpu_mask);
+ +                      if (rc) {
+ +                              netdev_warn(bp->dev,
+ +                                          "Set affinity failed, IRQ = %d\n",
+ +                                          irq->vector);
+ +                              break;
+ +                      }
+ +              }
         }
         return rc;
   }
@@@ -5699,10 -5548,12 +5688,10 @@@ void bnxt_tx_disable(struct bnxt *bp
   {
         int i;
         struct bnxt_tx_ring_info *txr;
- -      struct netdev_queue *txq;
   
         if (bp->tx_ring) {
                 for (i = 0; i < bp->tx_nr_rings; i++) {
                         txr = &bp->tx_ring[i];
- -                      txq = netdev_get_tx_queue(bp->dev, i);
                         txr->dev_state = BNXT_DEV_STATE_CLOSING;
                 }
         }
@@@ -5715,9 -5566,11 +5704,9 @@@ void bnxt_tx_enable(struct bnxt *bp
   {
         int i;
         struct bnxt_tx_ring_info *txr;
- -      struct netdev_queue *txq;
   
         for (i = 0; i < bp->tx_nr_rings; i++) {
                 txr = &bp->tx_ring[i];
- -              txq = netdev_get_tx_queue(bp->dev, i);
                 txr->dev_state = 0;
         }
         netif_tx_wake_all_queues(bp->dev);
@@@ -5782,7 -5635,7 +5771,7 @@@ static int bnxt_hwrm_phy_qcaps(struct b
         if (rc)
                 goto hwrm_phy_qcaps_exit;
   
- -      if (resp->eee_supported & PORT_PHY_QCAPS_RESP_EEE_SUPPORTED) {
+ +      if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) {
                 struct ethtool_eee *eee = &bp->eee;
                 u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode);
   
@@@ -5797,8 -5650,6 +5786,8 @@@
                 link_info->support_auto_speeds =
                         le16_to_cpu(resp->supported_speeds_auto_mode);
   
+ +      bp->port_count = resp->port_cnt;
+ +
   hwrm_phy_qcaps_exit:
         mutex_unlock(&bp->hwrm_cmd_lock);
         return rc;
@@@ -5824,15 -5675,13 +5813,15 @@@ static int bnxt_update_link(struct bnx
   
         memcpy(&link_info->phy_qcfg_resp, resp, sizeof(*resp));
         link_info->phy_link_status = resp->link;
- -      link_info->duplex =  resp->duplex;
+ +      link_info->duplex = resp->duplex_cfg;
+ +      if (bp->hwrm_spec_code >= 0x10800)
+ +              link_info->duplex = resp->duplex_state;
         link_info->pause = resp->pause;
         link_info->auto_mode = resp->auto_mode;
         link_info->auto_pause_setting = resp->auto_pause;
         link_info->lp_pause = resp->link_partner_adv_pause;
         link_info->force_pause_setting = resp->force_pause;
- -      link_info->duplex_setting = resp->duplex;
+ +      link_info->duplex_setting = resp->duplex_cfg;
         if (link_info->phy_link_status == BNXT_LINK_LINK)
                 link_info->link_speed = le16_to_cpu(resp->link_speed);
         else
@@@ -6354,9 -6203,6 +6343,9 @@@ static int __bnxt_open_nic(struct bnxt 
         /* Poll link status and check for SFP+ module status */
         bnxt_get_port_module_status(bp);
   
+ +      /* VF-reps may need to be re-opened after the PF is re-opened */
+ +      if (BNXT_PF(bp))
+ +              bnxt_vf_reps_open(bp);
         return 0;
   
   open_err:
@@@ -6445,10 -6291,6 +6434,10 @@@ int bnxt_close_nic(struct bnxt *bp, boo
                 if (rc)
                         netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete!\n");
         }
+ +
+ +      /* Close the VF-reps before closing PF */
+ +      if (BNXT_PF(bp))
+ +              bnxt_vf_reps_close(bp);
   #endif
         /* Change device state to avoid TX queue wake up's */
         bnxt_tx_disable(bp);
@@@ -6960,8 -6802,7 +6949,8 @@@ static void bnxt_timer(unsigned long da
         if (atomic_read(&bp->intr_sem) != 0)
                 goto bnxt_restart_timer;
   
- -      if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS)) {
+ +      if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS) &&
+ +          bp->stats_coal_ticks) {
                 set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event);
                 schedule_work(&bp->sp_task);
         }
@@@ -7071,8 -6912,8 +7060,8 @@@ static void bnxt_sp_task(struct work_st
   }
   
   /* Under rtnl_lock */
- -int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
- -                     int tx_xdp)
+ +int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
+ +                   int tx_xdp)
   {
         int max_rx, max_tx, tx_sets = 1;
         int tx_rings_needed;
@@@ -7092,7 -6933,10 +7081,7 @@@
         if (max_tx < tx_rings_needed)
                 return -ENOMEM;
   
- -      if (bnxt_hwrm_reserve_tx_rings(bp, &tx_rings_needed) ||
- -          tx_rings_needed < (tx * tx_sets + tx_xdp))
- -              return -ENOMEM;
- -      return 0;
+ +      return bnxt_hwrm_check_tx_rings(bp, tx_rings_needed);
   }
   
   static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev)
@@@ -7281,8 -7125,8 +7270,8 @@@ int bnxt_setup_mq_tc(struct net_device 
         if (bp->flags & BNXT_FLAG_SHARED_RINGS)
                 sh = true;
   
- -      rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
- -                              sh, tc, bp->tx_nr_rings_xdp);
+ +      rc = bnxt_check_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
+ +                            sh, tc, bp->tx_nr_rings_xdp);
         if (rc)
                 return rc;
   
@@@ -7297,6 -7141,7 +7286,7 @@@
                 bp->tx_nr_rings = bp->tx_nr_rings_per_tc;
                 netdev_reset_tc(dev);
         }
+       bp->tx_nr_rings += bp->tx_nr_rings_xdp;
         bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
                                bp->tx_nr_rings + bp->rx_nr_rings;
         bp->num_stat_ctxs = bp->cp_nr_rings;
@@@ -7307,33 -7152,15 +7297,33 @@@
         return 0;
   }
   
- -static int bnxt_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
- -                       __be16 proto, struct tc_to_netdev *ntc)
+ +static int bnxt_setup_flower(struct net_device *dev,
+ +                           struct tc_cls_flower_offload *cls_flower)
   {
- -      if (ntc->type != TC_SETUP_MQPRIO)
- -              return -EINVAL;
+ +      struct bnxt *bp = netdev_priv(dev);
   
- -      ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+ +      if (BNXT_VF(bp))
+ +              return -EOPNOTSUPP;
   
- -      return bnxt_setup_mq_tc(dev, ntc->mqprio->num_tc);
+ +      return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, cls_flower);
+ +}
+ +
+ +static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ +                       void *type_data)
+ +{
+ +      switch (type) {
+ +      case TC_SETUP_CLSFLOWER:
+ +              return bnxt_setup_flower(dev, type_data);
+ +      case TC_SETUP_MQPRIO: {
+ +              struct tc_mqprio_qopt *mqprio = type_data;
+ +
+ +              mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+ +
+ +              return bnxt_setup_mq_tc(dev, mqprio->num_tc);
+ +      }
+ +      default:
+ +              return -EOPNOTSUPP;
+ +      }
   }
   
   #ifdef CONFIG_RFS_ACCEL
@@@ -7585,102 -7412,6 +7575,102 @@@ static void bnxt_udp_tunnel_del(struct 
         schedule_work(&bp->sp_task);
   }
   
+ +static int bnxt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ +                             struct net_device *dev, u32 filter_mask,
+ +                             int nlflags)
+ +{
+ +      struct bnxt *bp = netdev_priv(dev);
+ +
+ +      return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bp->br_mode, 0, 0,
+ +                                     nlflags, filter_mask, NULL);
+ +}
+ +
+ +static int bnxt_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+ +                             u16 flags)
+ +{
+ +      struct bnxt *bp = netdev_priv(dev);
+ +      struct nlattr *attr, *br_spec;
+ +      int rem, rc = 0;
+ +
+ +      if (bp->hwrm_spec_code < 0x10708 || !BNXT_SINGLE_PF(bp))
+ +              return -EOPNOTSUPP;
+ +
+ +      br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+ +      if (!br_spec)
+ +              return -EINVAL;
+ +
+ +      nla_for_each_nested(attr, br_spec, rem) {
+ +              u16 mode;
+ +
+ +              if (nla_type(attr) != IFLA_BRIDGE_MODE)
+ +                      continue;
+ +
+ +              if (nla_len(attr) < sizeof(mode))
+ +                      return -EINVAL;
+ +
+ +              mode = nla_get_u16(attr);
+ +              if (mode == bp->br_mode)
+ +                      break;
+ +
+ +              rc = bnxt_hwrm_set_br_mode(bp, mode);
+ +              if (!rc)
+ +                      bp->br_mode = mode;
+ +              break;
+ +      }
+ +      return rc;
+ +}
+ +
+ +static int bnxt_get_phys_port_name(struct net_device *dev, char *buf,
+ +                                 size_t len)
+ +{
+ +      struct bnxt *bp = netdev_priv(dev);
+ +      int rc;
+ +
+ +      /* The PF and it's VF-reps only support the switchdev framework */
+ +      if (!BNXT_PF(bp))
+ +              return -EOPNOTSUPP;
+ +
+ +      rc = snprintf(buf, len, "p%d", bp->pf.port_id);
+ +
+ +      if (rc >= len)
+ +              return -EOPNOTSUPP;
+ +      return 0;
+ +}
+ +
+ +int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr)
+ +{
+ +      if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+ +              return -EOPNOTSUPP;
+ +
+ +      /* The PF and it's VF-reps only support the switchdev framework */
+ +      if (!BNXT_PF(bp))
+ +              return -EOPNOTSUPP;
+ +
+ +      switch (attr->id) {
+ +      case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+ +              /* In SRIOV each PF-pool (PF + child VFs) serves as a
+ +               * switching domain, the PF's perm mac-addr can be used
+ +               * as the unique parent-id
+ +               */
+ +              attr->u.ppid.id_len = ETH_ALEN;
+ +              ether_addr_copy(attr->u.ppid.id, bp->pf.mac_addr);
+ +              break;
+ +      default:
+ +              return -EOPNOTSUPP;
+ +      }
+ +      return 0;
+ +}
+ +
+ +static int bnxt_swdev_port_attr_get(struct net_device *dev,
+ +                                  struct switchdev_attr *attr)
+ +{
+ +      return bnxt_port_attr_get(netdev_priv(dev), attr);
+ +}
+ +
+ +static const struct switchdev_ops bnxt_switchdev_ops = {
+ +      .switchdev_port_attr_get        = bnxt_swdev_port_attr_get
+ +};
+ +
   static const struct net_device_ops bnxt_netdev_ops = {
         .ndo_open               = bnxt_open,
         .ndo_start_xmit         = bnxt_start_xmit,
@@@ -7712,9 -7443,6 +7702,9 @@@
         .ndo_udp_tunnel_add     = bnxt_udp_tunnel_add,
         .ndo_udp_tunnel_del     = bnxt_udp_tunnel_del,
         .ndo_xdp                = bnxt_xdp,
+ +      .ndo_bridge_getlink     = bnxt_bridge_getlink,
+ +      .ndo_bridge_setlink     = bnxt_bridge_setlink,
+ +      .ndo_get_phys_port_name = bnxt_get_phys_port_name
   };
   
   static void bnxt_remove_one(struct pci_dev *pdev)
@@@ -7722,14 -7450,11 +7712,14 @@@
         struct net_device *dev = pci_get_drvdata(pdev);
         struct bnxt *bp = netdev_priv(dev);
   
- -      if (BNXT_PF(bp))
+ +      if (BNXT_PF(bp)) {
                 bnxt_sriov_disable(bp);
+ +              bnxt_dl_unregister(bp);
+ +      }
   
         pci_disable_pcie_error_reporting(pdev);
         unregister_netdev(dev);
+ +      bnxt_shutdown_tc(bp);
         cancel_work_sync(&bp->sp_task);
         bp->sp_event = 0;
   
@@@ -7898,9 -7623,6 +7888,9 @@@ static int bnxt_set_dflt_rings(struct b
         if (sh)
                 bp->flags |= BNXT_FLAG_SHARED_RINGS;
         dflt_rings = netif_get_num_default_rss_queues();
+ +      /* Reduce default rings to reduce memory usage on multi-port cards */
+ +      if (bp->port_count > 1)
+ +              dflt_rings = min_t(int, dflt_rings, 4);
         rc = bnxt_get_dflt_rings(bp, &max_rx_rings, &max_tx_rings, sh);
         if (rc)
                 return rc;
@@@ -7929,6 -7651,28 +7919,28 @@@ void bnxt_restore_pf_fw_resources(struc
         bnxt_subtract_ulp_resources(bp, BNXT_ROCE_ULP);
   }
   
+ static int bnxt_init_mac_addr(struct bnxt *bp)
+ {
+       int rc = 0;
+ 
+       if (BNXT_PF(bp)) {
+               memcpy(bp->dev->dev_addr, bp->pf.mac_addr, ETH_ALEN);
+       } else {
+ #ifdef CONFIG_BNXT_SRIOV
+               struct bnxt_vf_info *vf = &bp->vf;
+ 
+               if (is_valid_ether_addr(vf->mac_addr)) {
+                       /* overwrite netdev dev_adr with admin VF MAC */
+                       memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN);
+               } else {
+                       eth_hw_addr_random(bp->dev);
+                       rc = bnxt_approve_mac(bp, bp->dev->dev_addr);
+               }
+ #endif
+       }
+       return rc;
+ }
+ 
   static void bnxt_parse_log_pcie_link(struct bnxt *bp)
   {
         enum pcie_link_width width = PCIE_LNK_WIDTH_UNKNOWN;
@@@ -7978,7 -7722,6 +7990,7 @@@ static int bnxt_init_one(struct pci_de
         dev->netdev_ops = &bnxt_netdev_ops;
         dev->watchdog_timeo = BNXT_TX_TIMEOUT;
         dev->ethtool_ops = &bnxt_ethtool_ops;
+ +      SWITCHDEV_SET_OPS(dev, &bnxt_switchdev_ops);
         pci_set_drvdata(pdev, dev);
   
         rc = bnxt_alloc_hwrm_resources(bp);
@@@ -8033,7 -7776,6 +8045,7 @@@
   
   #ifdef CONFIG_BNXT_SRIOV
         init_waitqueue_head(&bp->sriov_cfg_wait);
+ +      mutex_init(&bp->sriov_lock);
   #endif
         bp->gro_func = bnxt_gro_func_5730x;
         if (BNXT_CHIP_P4_PLUS(bp))
@@@ -8059,7 -7801,12 +8071,12 @@@
                 rc = -1;
                 goto init_err_pci_clean;
         }
- 
+       rc = bnxt_init_mac_addr(bp);
+       if (rc) {
+               dev_err(&pdev->dev, "Unable to initialize mac address.\n");
+               rc = -EADDRNOTAVAIL;
+               goto init_err_pci_clean;
+       }
         rc = bnxt_hwrm_queue_qportcfg(bp);
         if (rc) {
                 netdev_err(bp->dev, "hwrm query qportcfg failure rc: %x\n",
@@@ -8073,10 -7820,6 +8090,10 @@@
         bnxt_ethtool_init(bp);
         bnxt_dcb_init(bp);
   
+ +      rc = bnxt_probe_phy(bp);
+ +      if (rc)
+ +              goto init_err_pci_clean;
+ +
         bnxt_set_rx_skb_mode(bp, false);
         bnxt_set_tpa_flags(bp);
         bnxt_set_ring_params(bp);
@@@ -8111,6 -7854,10 +8128,6 @@@
         if (dev->hw_features & NETIF_F_HW_VLAN_CTAG_RX)
                 bp->flags |= BNXT_FLAG_STRIP_VLAN;
   
- -      rc = bnxt_probe_phy(bp);
- -      if (rc)
- -              goto init_err_pci_clean;
- -
         rc = bnxt_init_int_mode(bp);
         if (rc)
                 goto init_err_pci_clean;
@@@ -8121,15 -7868,9 +8138,15 @@@
         else
                 device_set_wakeup_capable(&pdev->dev, false);
   
+ +      if (BNXT_PF(bp))
+ +              bnxt_init_tc(bp);
+ +
         rc = register_netdev(dev);
         if (rc)
- -              goto init_err_clr_int;
+ +              goto init_err_cleanup_tc;
+ +
+ +      if (BNXT_PF(bp))
+ +              bnxt_dl_register(bp);
   
         netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
                     board_info[ent->driver_data].name,
@@@ -8139,8 -7880,7 +8156,8 @@@
   
         return 0;
   
- -init_err_clr_int:
+ +init_err_cleanup_tc:
+ +      bnxt_shutdown_tc(bp);
         bnxt_clear_int_mode(bp);
   
   init_err_pci_clean:
diff --combined drivers/net/ethernet/broadcom/genet/bcmgenet.c

index 612d1ef3b5f5ebe622ae1b74b74697d8d8a2c001,fea3f9a5fb2d37221cbf9abed77cdcdcf906a00c..9cebca89691380fd34aacb9c7dbb341c80b68229
--- 1/drivers/net/ethernet/broadcom/genet/bcmgenet.c
--- 2/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@@ -72,42 -72,23 +72,42 @@@
   #define GENET_RDMA_REG_OFF    (priv->hw_params->rdma_offset + \
                                 TOTAL_DESC * DMA_DESC_SIZE)
   
+ +static inline void bcmgenet_writel(u32 value, void __iomem *offset)
+ +{
+ +      /* MIPS chips strapped for BE will automagically configure the
+ +       * peripheral registers for CPU-native byte order.
+ +       */
+ +      if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ +              __raw_writel(value, offset);
+ +      else
+ +              writel_relaxed(value, offset);
+ +}
+ +
+ +static inline u32 bcmgenet_readl(void __iomem *offset)
+ +{
+ +      if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ +              return __raw_readl(offset);
+ +      else
+ +              return readl_relaxed(offset);
+ +}
+ +
   static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv,
                                              void __iomem *d, u32 value)
   {
- -      __raw_writel(value, d + DMA_DESC_LENGTH_STATUS);
+ +      bcmgenet_writel(value, d + DMA_DESC_LENGTH_STATUS);
   }
   
   static inline u32 dmadesc_get_length_status(struct bcmgenet_priv *priv,
                                             void __iomem *d)
   {
- -      return __raw_readl(d + DMA_DESC_LENGTH_STATUS);
+ +      return bcmgenet_readl(d + DMA_DESC_LENGTH_STATUS);
   }
   
   static inline void dmadesc_set_addr(struct bcmgenet_priv *priv,
                                     void __iomem *d,
                                     dma_addr_t addr)
   {
- -      __raw_writel(lower_32_bits(addr), d + DMA_DESC_ADDRESS_LO);
+ +      bcmgenet_writel(lower_32_bits(addr), d + DMA_DESC_ADDRESS_LO);
   
         /* Register writes to GISB bus can take couple hundred nanoseconds
          * and are done for each packet, save these expensive writes unless
@@@ -115,7 -96,7 +115,7 @@@
          */
   #ifdef CONFIG_PHYS_ADDR_T_64BIT
         if (priv->hw_params->flags & GENET_HAS_40BITS)
- -              __raw_writel(upper_32_bits(addr), d + DMA_DESC_ADDRESS_HI);
+ +              bcmgenet_writel(upper_32_bits(addr), d + DMA_DESC_ADDRESS_HI);
   #endif
   }
   
@@@ -132,7 -113,7 +132,7 @@@ static inline dma_addr_t dmadesc_get_ad
   {
         dma_addr_t addr;
   
- -      addr = __raw_readl(d + DMA_DESC_ADDRESS_LO);
+ +      addr = bcmgenet_readl(d + DMA_DESC_ADDRESS_LO);
   
         /* Register writes to GISB bus can take couple hundred nanoseconds
          * and are done for each packet, save these expensive writes unless
@@@ -140,7 -121,7 +140,7 @@@
          */
   #ifdef CONFIG_PHYS_ADDR_T_64BIT
         if (priv->hw_params->flags & GENET_HAS_40BITS)
- -              addr |= (u64)__raw_readl(d + DMA_DESC_ADDRESS_HI) << 32;
+ +              addr |= (u64)bcmgenet_readl(d + DMA_DESC_ADDRESS_HI) << 32;
   #endif
         return addr;
   }
@@@ -175,8 -156,8 +175,8 @@@ static inline u32 bcmgenet_tbuf_ctrl_ge
         if (GENET_IS_V1(priv))
                 return bcmgenet_rbuf_readl(priv, TBUF_CTRL_V1);
         else
- -              return __raw_readl(priv->base +
- -                              priv->hw_params->tbuf_offset + TBUF_CTRL);
+ +              return bcmgenet_readl(priv->base +
+ +                                    priv->hw_params->tbuf_offset + TBUF_CTRL);
   }
   
   static inline void bcmgenet_tbuf_ctrl_set(struct bcmgenet_priv *priv, u32 val)
@@@ -184,7 -165,7 +184,7 @@@
         if (GENET_IS_V1(priv))
                 bcmgenet_rbuf_writel(priv, val, TBUF_CTRL_V1);
         else
- -              __raw_writel(val, priv->base +
+ +              bcmgenet_writel(val, priv->base +
                                 priv->hw_params->tbuf_offset + TBUF_CTRL);
   }
   
@@@ -193,8 -174,8 +193,8 @@@ static inline u32 bcmgenet_bp_mc_get(st
         if (GENET_IS_V1(priv))
                 return bcmgenet_rbuf_readl(priv, TBUF_BP_MC_V1);
         else
- -              return __raw_readl(priv->base +
- -                              priv->hw_params->tbuf_offset + TBUF_BP_MC);
+ +              return bcmgenet_readl(priv->base +
+ +                                    priv->hw_params->tbuf_offset + TBUF_BP_MC);
   }
   
   static inline void bcmgenet_bp_mc_set(struct bcmgenet_priv *priv, u32 val)
@@@ -202,7 -183,7 +202,7 @@@
         if (GENET_IS_V1(priv))
                 bcmgenet_rbuf_writel(priv, val, TBUF_BP_MC_V1);
         else
- -              __raw_writel(val, priv->base +
+ +              bcmgenet_writel(val, priv->base +
                                 priv->hw_params->tbuf_offset + TBUF_BP_MC);
   }
   
@@@ -345,28 -326,28 +345,28 @@@ static inline struct bcmgenet_priv *dev
   static inline u32 bcmgenet_tdma_readl(struct bcmgenet_priv *priv,
                                       enum dma_reg r)
   {
- -      return __raw_readl(priv->base + GENET_TDMA_REG_OFF +
- -                      DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
+ +      return bcmgenet_readl(priv->base + GENET_TDMA_REG_OFF +
+ +                            DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
   }
   
   static inline void bcmgenet_tdma_writel(struct bcmgenet_priv *priv,
                                         u32 val, enum dma_reg r)
   {
- -      __raw_writel(val, priv->base + GENET_TDMA_REG_OFF +
+ +      bcmgenet_writel(val, priv->base + GENET_TDMA_REG_OFF +
                         DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
   }
   
   static inline u32 bcmgenet_rdma_readl(struct bcmgenet_priv *priv,
                                       enum dma_reg r)
   {
- -      return __raw_readl(priv->base + GENET_RDMA_REG_OFF +
- -                      DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
+ +      return bcmgenet_readl(priv->base + GENET_RDMA_REG_OFF +
+ +                            DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
   }
   
   static inline void bcmgenet_rdma_writel(struct bcmgenet_priv *priv,
                                         u32 val, enum dma_reg r)
   {
- -      __raw_writel(val, priv->base + GENET_RDMA_REG_OFF +
+ +      bcmgenet_writel(val, priv->base + GENET_RDMA_REG_OFF +
                         DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
   }
   
@@@ -437,16 -418,16 +437,16 @@@ static inline u32 bcmgenet_tdma_ring_re
                                            unsigned int ring,
                                            enum dma_ring_reg r)
   {
- -      return __raw_readl(priv->base + GENET_TDMA_REG_OFF +
- -                      (DMA_RING_SIZE * ring) +
- -                      genet_dma_ring_regs[r]);
+ +      return bcmgenet_readl(priv->base + GENET_TDMA_REG_OFF +
+ +                            (DMA_RING_SIZE * ring) +
+ +                            genet_dma_ring_regs[r]);
   }
   
   static inline void bcmgenet_tdma_ring_writel(struct bcmgenet_priv *priv,
                                              unsigned int ring, u32 val,
                                              enum dma_ring_reg r)
   {
- -      __raw_writel(val, priv->base + GENET_TDMA_REG_OFF +
+ +      bcmgenet_writel(val, priv->base + GENET_TDMA_REG_OFF +
                         (DMA_RING_SIZE * ring) +
                         genet_dma_ring_regs[r]);
   }
@@@ -455,16 -436,16 +455,16 @@@ static inline u32 bcmgenet_rdma_ring_re
                                            unsigned int ring,
                                            enum dma_ring_reg r)
   {
- -      return __raw_readl(priv->base + GENET_RDMA_REG_OFF +
- -                      (DMA_RING_SIZE * ring) +
- -                      genet_dma_ring_regs[r]);
+ +      return bcmgenet_readl(priv->base + GENET_RDMA_REG_OFF +
+ +                            (DMA_RING_SIZE * ring) +
+ +                            genet_dma_ring_regs[r]);
   }
   
   static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
                                              unsigned int ring, u32 val,
                                              enum dma_ring_reg r)
   {
- -      __raw_writel(val, priv->base + GENET_RDMA_REG_OFF +
+ +      bcmgenet_writel(val, priv->base + GENET_RDMA_REG_OFF +
                         (DMA_RING_SIZE * ring) +
                         genet_dma_ring_regs[r]);
   }
@@@ -1010,12 -991,12 +1010,12 @@@ static void bcmgenet_eee_enable_set(str
         bcmgenet_umac_writel(priv, reg, UMAC_EEE_CTRL);
   
         /* Enable EEE and switch to a 27Mhz clock automatically */
- -      reg = __raw_readl(priv->base + off);
+ +      reg = bcmgenet_readl(priv->base + off);
         if (enable)
                 reg |= TBUF_EEE_EN | TBUF_PM_EN;
         else
                 reg &= ~(TBUF_EEE_EN | TBUF_PM_EN);
- -      __raw_writel(reg, priv->base + off);
+ +      bcmgenet_writel(reg, priv->base + off);
   
         /* Do the same for thing for RBUF */
         reg = bcmgenet_rbuf_readl(priv, RBUF_ENERGY_CTRL);
@@@ -1379,7 -1360,7 +1379,7 @@@ static unsigned int __bcmgenet_tx_recla
                 if (skb) {
                         pkts_compl++;
                         bytes_compl += GENET_CB(skb)->bytes_sent;
-                       dev_kfree_skb_any(skb);
+                       dev_consume_skb_any(skb);
                 }
   
                 txbds_processed++;
@@@ -1894,7 -1875,7 +1894,7 @@@ static int bcmgenet_alloc_rx_buffers(st
                 cb = ring->cbs + i;
                 skb = bcmgenet_rx_refill(priv, cb);
                 if (skb)
-                       dev_kfree_skb_any(skb);
+                       dev_consume_skb_any(skb);
                 if (!cb->skb)
                         return -ENOMEM;
         }
@@@ -1913,7 -1894,7 +1913,7 @@@ static void bcmgenet_free_rx_buffers(st
   
                 skb = bcmgenet_free_rx_cb(&priv->pdev->dev, cb);
                 if (skb)
-                       dev_kfree_skb_any(skb);
+                       dev_consume_skb_any(skb);
         }
   }
   
diff --combined drivers/net/ethernet/chelsio/cxgb4/t4_hw.c

index a4a33ebd0b98e24cab187921938ab133f4719f6e,0293b41171a5d90070c2ff9a954e1dd0e42aea4d..08624db8a6e9b4f0b19f8977cd7fbec4f9cf3a10
--- 1/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
--- 2/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@@ -369,12 -369,12 +369,12 @@@ int t4_wr_mbox_meat_timeout(struct adap
                 list_del(&entry.list);
                 spin_unlock(&adap->mbox_lock);
                 ret = (v == MBOX_OWNER_FW) ? -EBUSY : -ETIMEDOUT;
-               t4_record_mbox(adap, cmd, MBOX_LEN, access, ret);
+               t4_record_mbox(adap, cmd, size, access, ret);
                 return ret;
         }
   
         /* Copy in the new mailbox command and send it on its way ... */
-       t4_record_mbox(adap, cmd, MBOX_LEN, access, 0);
+       t4_record_mbox(adap, cmd, size, access, 0);
         for (i = 0; i < size; i += 8)
                 t4_write_reg64(adap, data_reg + i, be64_to_cpu(*p++));
   
@@@ -426,7 -426,7 +426,7 @@@
         }
   
         ret = (pcie_fw & PCIE_FW_ERR_F) ? -ENXIO : -ETIMEDOUT;
-       t4_record_mbox(adap, cmd, MBOX_LEN, access, ret);
+       t4_record_mbox(adap, cmd, size, access, ret);
         dev_err(adap->pdev_dev, "command %#x in mailbox %d timed out\n",
                 *(const u8 *)cmd, mbox);
         t4_report_fw_error(adap);
@@@ -913,8 -913,7 +913,8 @@@ void t4_get_regs(struct adapter *adap, 
                 0xd010, 0xd03c,
                 0xdfc0, 0xdfe0,
                 0xe000, 0xea7c,
- -              0xf000, 0x11190,
+ +              0xf000, 0x11110,
+ +              0x11118, 0x11190,
                 0x19040, 0x1906c,
                 0x19078, 0x19080,
                 0x1908c, 0x190e4,
@@@ -1440,6 -1439,8 +1440,6 @@@
                 0x1ff00, 0x1ff84,
                 0x1ffc0, 0x1ffc8,
                 0x30000, 0x30030,
- -              0x30038, 0x30038,
- -              0x30040, 0x30040,
                 0x30100, 0x30144,
                 0x30190, 0x301a0,
                 0x301a8, 0x301b8,
@@@ -1550,6 -1551,8 +1550,6 @@@
                 0x33c3c, 0x33c50,
                 0x33cf0, 0x33cfc,
                 0x34000, 0x34030,
- -              0x34038, 0x34038,
- -              0x34040, 0x34040,
                 0x34100, 0x34144,
                 0x34190, 0x341a0,
                 0x341a8, 0x341b8,
@@@ -1660,6 -1663,8 +1660,6 @@@
                 0x37c3c, 0x37c50,
                 0x37cf0, 0x37cfc,
                 0x38000, 0x38030,
- -              0x38038, 0x38038,
- -              0x38040, 0x38040,
                 0x38100, 0x38144,
                 0x38190, 0x381a0,
                 0x381a8, 0x381b8,
@@@ -1770,6 -1775,8 +1770,6 @@@
                 0x3bc3c, 0x3bc50,
                 0x3bcf0, 0x3bcfc,
                 0x3c000, 0x3c030,
- -              0x3c038, 0x3c038,
- -              0x3c040, 0x3c040,
                 0x3c100, 0x3c144,
                 0x3c190, 0x3c1a0,
                 0x3c1a8, 0x3c1b8,
@@@ -2033,8 -2040,12 +2033,8 @@@
                 0x1190, 0x1194,
                 0x11a0, 0x11a4,
                 0x11b0, 0x11b4,
- -              0x11fc, 0x1258,
- -              0x1280, 0x12d4,
- -              0x12d9, 0x12d9,
- -              0x12de, 0x12de,
- -              0x12e3, 0x12e3,
- -              0x12e8, 0x133c,
+ +              0x11fc, 0x1274,
+ +              0x1280, 0x133c,
                 0x1800, 0x18fc,
                 0x3000, 0x302c,
                 0x3060, 0x30b0,
@@@ -2065,9 -2076,6 +2065,9 @@@
                 0x5ea0, 0x5eb0,
                 0x5ec0, 0x5ec0,
                 0x5ec8, 0x5ed0,
+ +              0x5ee0, 0x5ee0,
+ +              0x5ef0, 0x5ef0,
+ +              0x5f00, 0x5f00,
                 0x6000, 0x6020,
                 0x6028, 0x6040,
                 0x6058, 0x609c,
@@@ -2125,8 -2133,6 +2125,8 @@@
                 0xd300, 0xd31c,
                 0xdfc0, 0xdfe0,
                 0xe000, 0xf008,
+ +              0xf010, 0xf018,
+ +              0xf020, 0xf028,
                 0x11000, 0x11014,
                 0x11048, 0x1106c,
                 0x11074, 0x11088,
@@@ -2250,6 -2256,13 +2250,6 @@@
                 0x1ff00, 0x1ff84,
                 0x1ffc0, 0x1ffc8,
                 0x30000, 0x30030,
- -              0x30038, 0x30038,
- -              0x30040, 0x30040,
- -              0x30048, 0x30048,
- -              0x30050, 0x30050,
- -              0x3005c, 0x30060,
- -              0x30068, 0x30068,
- -              0x30070, 0x30070,
                 0x30100, 0x30168,
                 0x30190, 0x301a0,
                 0x301a8, 0x301b8,
@@@ -2312,12 -2325,13 +2312,12 @@@
                 0x326a8, 0x326a8,
                 0x326ec, 0x326ec,
                 0x32a00, 0x32abc,
- -              0x32b00, 0x32b38,
+ +              0x32b00, 0x32b18,
+ +              0x32b20, 0x32b38,
                 0x32b40, 0x32b58,
                 0x32b60, 0x32b78,
                 0x32c00, 0x32c00,
                 0x32c08, 0x32c3c,
- -              0x32e00, 0x32e2c,
- -              0x32f00, 0x32f2c,
                 0x33000, 0x3302c,
                 0x33034, 0x33050,
                 0x33058, 0x33058,
@@@ -2382,6 -2396,13 +2382,6 @@@
                 0x33c38, 0x33c50,
                 0x33cf0, 0x33cfc,
                 0x34000, 0x34030,
- -              0x34038, 0x34038,
- -              0x34040, 0x34040,
- -              0x34048, 0x34048,
- -              0x34050, 0x34050,
- -              0x3405c, 0x34060,
- -              0x34068, 0x34068,
- -              0x34070, 0x34070,
                 0x34100, 0x34168,
                 0x34190, 0x341a0,
                 0x341a8, 0x341b8,
@@@ -2444,12 -2465,13 +2444,12 @@@
                 0x366a8, 0x366a8,
                 0x366ec, 0x366ec,
                 0x36a00, 0x36abc,
- -              0x36b00, 0x36b38,
+ +              0x36b00, 0x36b18,
+ +              0x36b20, 0x36b38,
                 0x36b40, 0x36b58,
                 0x36b60, 0x36b78,
                 0x36c00, 0x36c00,
                 0x36c08, 0x36c3c,
- -              0x36e00, 0x36e2c,
- -              0x36f00, 0x36f2c,
                 0x37000, 0x3702c,
                 0x37034, 0x37050,
                 0x37058, 0x37058,
@@@ -2523,7 -2545,8 +2523,7 @@@
                 0x40280, 0x40280,
                 0x40304, 0x40304,
                 0x40330, 0x4033c,
- -              0x41304, 0x413b8,
- -              0x413c0, 0x413c8,
+ +              0x41304, 0x413c8,
                 0x413d0, 0x413dc,
                 0x413f0, 0x413f0,
                 0x41400, 0x4140c,
@@@ -3076,179 -3099,6 +3076,179 @@@ int t4_get_exprom_version(struct adapte
         return 0;
   }
   
+ +/**
+ + *      t4_get_vpd_version - return the VPD version
+ + *      @adapter: the adapter
+ + *      @vers: where to place the version
+ + *
+ + *      Reads the VPD via the Firmware interface (thus this can only be called
+ + *      once we're ready to issue Firmware commands).  The format of the
+ + *      VPD version is adapter specific.  Returns 0 on success, an error on
+ + *      failure.
+ + *
+ + *      Note that early versions of the Firmware didn't include the ability
+ + *      to retrieve the VPD version, so we zero-out the return-value parameter
+ + *      in that case to avoid leaving it with garbage in it.
+ + *
+ + *      Also note that the Firmware will return its cached copy of the VPD
+ + *      Revision ID, not the actual Revision ID as written in the Serial
+ + *      EEPROM.  This is only an issue if a new VPD has been written and the
+ + *      Firmware/Chip haven't yet gone through a RESET sequence.  So it's best
+ + *      to defer calling this routine till after a FW_RESET_CMD has been issued
+ + *      if the Host Driver will be performing a full adapter initialization.
+ + */
+ +int t4_get_vpd_version(struct adapter *adapter, u32 *vers)
+ +{
+ +      u32 vpdrev_param;
+ +      int ret;
+ +
+ +      vpdrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+ +                      FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_VPDREV));
+ +      ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+ +                            1, &vpdrev_param, vers);
+ +      if (ret)
+ +              *vers = 0;
+ +      return ret;
+ +}
+ +
+ +/**
+ + *      t4_get_scfg_version - return the Serial Configuration version
+ + *      @adapter: the adapter
+ + *      @vers: where to place the version
+ + *
+ + *      Reads the Serial Configuration Version via the Firmware interface
+ + *      (thus this can only be called once we're ready to issue Firmware
+ + *      commands).  The format of the Serial Configuration version is
+ + *      adapter specific.  Returns 0 on success, an error on failure.
+ + *
+ + *      Note that early versions of the Firmware didn't include the ability
+ + *      to retrieve the Serial Configuration version, so we zero-out the
+ + *      return-value parameter in that case to avoid leaving it with
+ + *      garbage in it.
+ + *
+ + *      Also note that the Firmware will return its cached copy of the Serial
+ + *      Initialization Revision ID, not the actual Revision ID as written in
+ + *      the Serial EEPROM.  This is only an issue if a new VPD has been written
+ + *      and the Firmware/Chip haven't yet gone through a RESET sequence.  So
+ + *      it's best to defer calling this routine till after a FW_RESET_CMD has
+ + *      been issued if the Host Driver will be performing a full adapter
+ + *      initialization.
+ + */
+ +int t4_get_scfg_version(struct adapter *adapter, u32 *vers)
+ +{
+ +      u32 scfgrev_param;
+ +      int ret;
+ +
+ +      scfgrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+ +                       FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_SCFGREV));
+ +      ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+ +                            1, &scfgrev_param, vers);
+ +      if (ret)
+ +              *vers = 0;
+ +      return ret;
+ +}
+ +
+ +/**
+ + *      t4_get_version_info - extract various chip/firmware version information
+ + *      @adapter: the adapter
+ + *
+ + *      Reads various chip/firmware version numbers and stores them into the
+ + *      adapter Adapter Parameters structure.  If any of the efforts fails
+ + *      the first failure will be returned, but all of the version numbers
+ + *      will be read.
+ + */
+ +int t4_get_version_info(struct adapter *adapter)
+ +{
+ +      int ret = 0;
+ +
+ +      #define FIRST_RET(__getvinfo) \
+ +      do { \
+ +              int __ret = __getvinfo; \
+ +              if (__ret && !ret) \
+ +                      ret = __ret; \
+ +      } while (0)
+ +
+ +      FIRST_RET(t4_get_fw_version(adapter, &adapter->params.fw_vers));
+ +      FIRST_RET(t4_get_bs_version(adapter, &adapter->params.bs_vers));
+ +      FIRST_RET(t4_get_tp_version(adapter, &adapter->params.tp_vers));
+ +      FIRST_RET(t4_get_exprom_version(adapter, &adapter->params.er_vers));
+ +      FIRST_RET(t4_get_scfg_version(adapter, &adapter->params.scfg_vers));
+ +      FIRST_RET(t4_get_vpd_version(adapter, &adapter->params.vpd_vers));
+ +
+ +      #undef FIRST_RET
+ +      return ret;
+ +}
+ +
+ +/**
+ + *      t4_dump_version_info - dump all of the adapter configuration IDs
+ + *      @adapter: the adapter
+ + *
+ + *      Dumps all of the various bits of adapter configuration version/revision
+ + *      IDs information.  This is typically called at some point after
+ + *      t4_get_version_info() has been called.
+ + */
+ +void t4_dump_version_info(struct adapter *adapter)
+ +{
+ +      /* Device information */
+ +      dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n",
+ +               adapter->params.vpd.id,
+ +               CHELSIO_CHIP_RELEASE(adapter->params.chip));
+ +      dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n",
+ +               adapter->params.vpd.sn, adapter->params.vpd.pn);
+ +
+ +      /* Firmware Version */
+ +      if (!adapter->params.fw_vers)
+ +              dev_warn(adapter->pdev_dev, "No firmware loaded\n");
+ +      else
+ +              dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n",
+ +                       FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers),
+ +                       FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers),
+ +                       FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers),
+ +                       FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers));
+ +
+ +      /* Bootstrap Firmware Version. (Some adapters don't have Bootstrap
+ +       * Firmware, so dev_info() is more appropriate here.)
+ +       */
+ +      if (!adapter->params.bs_vers)
+ +              dev_info(adapter->pdev_dev, "No bootstrap loaded\n");
+ +      else
+ +              dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n",
+ +                       FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers),
+ +                       FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers),
+ +                       FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers),
+ +                       FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers));
+ +
+ +      /* TP Microcode Version */
+ +      if (!adapter->params.tp_vers)
+ +              dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n");
+ +      else
+ +              dev_info(adapter->pdev_dev,
+ +                       "TP Microcode version: %u.%u.%u.%u\n",
+ +                       FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers),
+ +                       FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers),
+ +                       FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers),
+ +                       FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers));
+ +
+ +      /* Expansion ROM version */
+ +      if (!adapter->params.er_vers)
+ +              dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n");
+ +      else
+ +              dev_info(adapter->pdev_dev,
+ +                       "Expansion ROM version: %u.%u.%u.%u\n",
+ +                       FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers),
+ +                       FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers),
+ +                       FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers),
+ +                       FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers));
+ +
+ +      /* Serial Configuration version */
+ +      dev_info(adapter->pdev_dev, "Serial Configuration version: %#x\n",
+ +               adapter->params.scfg_vers);
+ +
+ +      /* VPD Version */
+ +      dev_info(adapter->pdev_dev, "VPD version: %#x\n",
+ +               adapter->params.vpd_vers);
+ +}
+ +
   /**
    *    t4_check_fw_version - check if the FW is supported with this driver
    *    @adap: the adapter
@@@ -3835,143 -3685,16 +3835,143 @@@ void t4_ulprx_read_la(struct adapter *a
         }
   }
   
- -#define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
- -                   FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
- -                   FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
- -                   FW_PORT_CAP_ANEG)
+ +#define ADVERT_MASK (FW_PORT_CAP32_SPEED_V(FW_PORT_CAP32_SPEED_M) | \
+ +                   FW_PORT_CAP32_ANEG)
+ +
+ +/**
+ + *    fwcaps16_to_caps32 - convert 16-bit Port Capabilities to 32-bits
+ + *    @caps16: a 16-bit Port Capabilities value
+ + *
+ + *    Returns the equivalent 32-bit Port Capabilities value.
+ + */
+ +static fw_port_cap32_t fwcaps16_to_caps32(fw_port_cap16_t caps16)
+ +{
+ +      fw_port_cap32_t caps32 = 0;
+ +
+ +      #define CAP16_TO_CAP32(__cap) \
+ +              do { \
+ +                      if (caps16 & FW_PORT_CAP_##__cap) \
+ +                              caps32 |= FW_PORT_CAP32_##__cap; \
+ +              } while (0)
+ +
+ +      CAP16_TO_CAP32(SPEED_100M);
+ +      CAP16_TO_CAP32(SPEED_1G);
+ +      CAP16_TO_CAP32(SPEED_25G);
+ +      CAP16_TO_CAP32(SPEED_10G);
+ +      CAP16_TO_CAP32(SPEED_40G);
+ +      CAP16_TO_CAP32(SPEED_100G);
+ +      CAP16_TO_CAP32(FC_RX);
+ +      CAP16_TO_CAP32(FC_TX);
+ +      CAP16_TO_CAP32(ANEG);
+ +      CAP16_TO_CAP32(MDIX);
+ +      CAP16_TO_CAP32(MDIAUTO);
+ +      CAP16_TO_CAP32(FEC_RS);
+ +      CAP16_TO_CAP32(FEC_BASER_RS);
+ +      CAP16_TO_CAP32(802_3_PAUSE);
+ +      CAP16_TO_CAP32(802_3_ASM_DIR);
+ +
+ +      #undef CAP16_TO_CAP32
+ +
+ +      return caps32;
+ +}
+ +
+ +/**
+ + *    fwcaps32_to_caps16 - convert 32-bit Port Capabilities to 16-bits
+ + *    @caps32: a 32-bit Port Capabilities value
+ + *
+ + *    Returns the equivalent 16-bit Port Capabilities value.  Note that
+ + *    not all 32-bit Port Capabilities can be represented in the 16-bit
+ + *    Port Capabilities and some fields/values may not make it.
+ + */
+ +static fw_port_cap16_t fwcaps32_to_caps16(fw_port_cap32_t caps32)
+ +{
+ +      fw_port_cap16_t caps16 = 0;
+ +
+ +      #define CAP32_TO_CAP16(__cap) \
+ +              do { \
+ +                      if (caps32 & FW_PORT_CAP32_##__cap) \
+ +                              caps16 |= FW_PORT_CAP_##__cap; \
+ +              } while (0)
+ +
+ +      CAP32_TO_CAP16(SPEED_100M);
+ +      CAP32_TO_CAP16(SPEED_1G);
+ +      CAP32_TO_CAP16(SPEED_10G);
+ +      CAP32_TO_CAP16(SPEED_25G);
+ +      CAP32_TO_CAP16(SPEED_40G);
+ +      CAP32_TO_CAP16(SPEED_100G);
+ +      CAP32_TO_CAP16(FC_RX);
+ +      CAP32_TO_CAP16(FC_TX);
+ +      CAP32_TO_CAP16(802_3_PAUSE);
+ +      CAP32_TO_CAP16(802_3_ASM_DIR);
+ +      CAP32_TO_CAP16(ANEG);
+ +      CAP32_TO_CAP16(MDIX);
+ +      CAP32_TO_CAP16(MDIAUTO);
+ +      CAP32_TO_CAP16(FEC_RS);
+ +      CAP32_TO_CAP16(FEC_BASER_RS);
+ +
+ +      #undef CAP32_TO_CAP16
+ +
+ +      return caps16;
+ +}
+ +
+ +/* Translate Firmware Port Capabilities Pause specification to Common Code */
+ +static inline enum cc_pause fwcap_to_cc_pause(fw_port_cap32_t fw_pause)
+ +{
+ +      enum cc_pause cc_pause = 0;
+ +
+ +      if (fw_pause & FW_PORT_CAP32_FC_RX)
+ +              cc_pause |= PAUSE_RX;
+ +      if (fw_pause & FW_PORT_CAP32_FC_TX)
+ +              cc_pause |= PAUSE_TX;
+ +
+ +      return cc_pause;
+ +}
+ +
+ +/* Translate Common Code Pause specification into Firmware Port Capabilities */
+ +static inline fw_port_cap32_t cc_to_fwcap_pause(enum cc_pause cc_pause)
+ +{
+ +      fw_port_cap32_t fw_pause = 0;
+ +
+ +      if (cc_pause & PAUSE_RX)
+ +              fw_pause |= FW_PORT_CAP32_FC_RX;
+ +      if (cc_pause & PAUSE_TX)
+ +              fw_pause |= FW_PORT_CAP32_FC_TX;
+ +
+ +      return fw_pause;
+ +}
+ +
+ +/* Translate Firmware Forward Error Correction specification to Common Code */
+ +static inline enum cc_fec fwcap_to_cc_fec(fw_port_cap32_t fw_fec)
+ +{
+ +      enum cc_fec cc_fec = 0;
+ +
+ +      if (fw_fec & FW_PORT_CAP32_FEC_RS)
+ +              cc_fec |= FEC_RS;
+ +      if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
+ +              cc_fec |= FEC_BASER_RS;
+ +
+ +      return cc_fec;
+ +}
+ +
+ +/* Translate Common Code Forward Error Correction specification to Firmware */
+ +static inline fw_port_cap32_t cc_to_fwcap_fec(enum cc_fec cc_fec)
+ +{
+ +      fw_port_cap32_t fw_fec = 0;
+ +
+ +      if (cc_fec & FEC_RS)
+ +              fw_fec |= FW_PORT_CAP32_FEC_RS;
+ +      if (cc_fec & FEC_BASER_RS)
+ +              fw_fec |= FW_PORT_CAP32_FEC_BASER_RS;
+ +
+ +      return fw_fec;
+ +}
   
   /**
    *    t4_link_l1cfg - apply link configuration to MAC/PHY
- - *    @phy: the PHY to setup
- - *    @mac: the MAC to setup
- - *    @lc: the requested link configuration
+ + *    @adapter: the adapter
+ + *    @mbox: the Firmware Mailbox to use
+ + *    @port: the Port ID
+ + *    @lc: the Port's Link Configuration
    *
    *    Set up a port's MAC and PHY according to a desired link configuration.
    *    - If the PHY can auto-negotiate first decide what to advertise, then
@@@ -3980,64 -3703,47 +3980,64 @@@
    *    - If auto-negotiation is off set the MAC to the proper speed/duplex/FC,
    *      otherwise do it later based on the outcome of auto-negotiation.
    */
- -int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
- -                struct link_config *lc)
+ +int t4_link_l1cfg(struct adapter *adapter, unsigned int mbox,
+ +                unsigned int port, struct link_config *lc)
   {
- -      struct fw_port_cmd c;
- -      unsigned int mdi = FW_PORT_CAP_MDI_V(FW_PORT_CAP_MDI_AUTO);
- -      unsigned int fc = 0, fec = 0, fw_fec = 0;
+ +      unsigned int fw_caps = adapter->params.fw_caps_support;
+ +      struct fw_port_cmd cmd;
+ +      unsigned int fw_mdi = FW_PORT_CAP32_MDI_V(FW_PORT_CAP32_MDI_AUTO);
+ +      fw_port_cap32_t fw_fc, cc_fec, fw_fec, rcap;
   
         lc->link_ok = 0;
- -      if (lc->requested_fc & PAUSE_RX)
- -              fc |= FW_PORT_CAP_FC_RX;
- -      if (lc->requested_fc & PAUSE_TX)
- -              fc |= FW_PORT_CAP_FC_TX;
- -
- -      fec = lc->requested_fec & FEC_AUTO ? lc->auto_fec : lc->requested_fec;
   
- -      if (fec & FEC_RS)
- -              fw_fec |= FW_PORT_CAP_FEC_RS;
- -      if (fec & FEC_BASER_RS)
- -              fw_fec |= FW_PORT_CAP_FEC_BASER_RS;
- -
- -      memset(&c, 0, sizeof(c));
- -      c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
- -                                   FW_CMD_REQUEST_F | FW_CMD_EXEC_F |
- -                                   FW_PORT_CMD_PORTID_V(port));
- -      c.action_to_len16 =
- -              cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) |
- -                          FW_LEN16(c));
+ +      /* Convert driver coding of Pause Frame Flow Control settings into the
+ +       * Firmware's API.
+ +       */
+ +      fw_fc = cc_to_fwcap_pause(lc->requested_fc);
+ +
+ +      /* Convert Common Code Forward Error Control settings into the
+ +       * Firmware's API.  If the current Requested FEC has "Automatic"
+ +       * (IEEE 802.3) specified, then we use whatever the Firmware
+ +       * sent us as part of it's IEEE 802.3-based interpratation of
+ +       * the Transceiver Module EPROM FEC parameters.  Otherwise we
+ +       * use whatever is in the current Requested FEC settings.
+ +       */
+ +      if (lc->requested_fec & FEC_AUTO)
+ +              cc_fec = fwcap_to_cc_fec(lc->def_acaps);
+ +      else
+ +              cc_fec = lc->requested_fec;
+ +      fw_fec = cc_to_fwcap_fec(cc_fec);
   
- -      if (!(lc->supported & FW_PORT_CAP_ANEG)) {
- -              c.u.l1cfg.rcap = cpu_to_be32((lc->supported & ADVERT_MASK) |
- -                                           fc | fw_fec);
- -              lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
+ +      /* Figure out what our Requested Port Capabilities are going to be.
+ +       */
+ +      if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) {
+ +              rcap = (lc->pcaps & ADVERT_MASK) | fw_fc | fw_fec;
+ +              lc->fc = lc->requested_fc & ~PAUSE_AUTONEG;
+ +              lc->fec = cc_fec;
         } else if (lc->autoneg == AUTONEG_DISABLE) {
- -              c.u.l1cfg.rcap = cpu_to_be32(lc->requested_speed | fc |
- -                                           fw_fec | mdi);
- -              lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
- -      } else
- -              c.u.l1cfg.rcap = cpu_to_be32(lc->advertising | fc |
- -                                           fw_fec | mdi);
+ +              rcap = lc->speed_caps | fw_fc | fw_fec | fw_mdi;
+ +              lc->fc = lc->requested_fc & ~PAUSE_AUTONEG;
+ +              lc->fec = cc_fec;
+ +      } else {
+ +              rcap = lc->acaps | fw_fc | fw_fec | fw_mdi;
+ +      }
   
- -      return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
+ +      /* And send that on to the Firmware ...
+ +       */
+ +      memset(&cmd, 0, sizeof(cmd));
+ +      cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
+ +                                     FW_CMD_REQUEST_F | FW_CMD_EXEC_F |
+ +                                     FW_PORT_CMD_PORTID_V(port));
+ +      cmd.action_to_len16 =
+ +              cpu_to_be32(FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16
+ +                                               ? FW_PORT_ACTION_L1_CFG
+ +                                               : FW_PORT_ACTION_L1_CFG32) |
+ +                          FW_LEN16(cmd));
+ +      if (fw_caps == FW_CAPS16)
+ +              cmd.u.l1cfg.rcap = cpu_to_be32(fwcaps32_to_caps16(rcap));
+ +      else
+ +              cmd.u.l1cfg32.rcap32 = cpu_to_be32(rcap);
+ +      return t4_wr_mbox(adapter, mbox, &cmd, sizeof(cmd), NULL);
   }
   
   /**
@@@ -4059,7 -3765,7 +4059,7 @@@ int t4_restart_aneg(struct adapter *ada
         c.action_to_len16 =
                 cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) |
                             FW_LEN16(c));
- -      c.u.l1cfg.rcap = cpu_to_be32(FW_PORT_CAP_ANEG);
+ +      c.u.l1cfg.rcap = cpu_to_be32(FW_PORT_CAP32_ANEG);
         return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
   }
   
@@@ -6742,17 -6448,6 +6742,17 @@@ int t4_fw_upgrade(struct adapter *adap
         if (ret < 0)
                 goto out;
   
+ +      /*
+ +       * If there was a Firmware Configuration File stored in FLASH,
+ +       * there's a good chance that it won't be compatible with the new
+ +       * Firmware.  In order to prevent difficult to diagnose adapter
+ +       * initialization issues, we clear out the Firmware Configuration File
+ +       * portion of the FLASH .  The user will need to re-FLASH a new
+ +       * Firmware Configuration File which is compatible with the new
+ +       * Firmware if that's desired.
+ +       */
+ +      (void)t4_load_cfg(adap, NULL, 0);
+ +
         /*
          * Older versions of the firmware don't understand the new
          * PCIE_FW.HALT flag and so won't know to perform a RESET when they
@@@ -7775,98 -7470,6 +7775,98 @@@ static const char *t4_link_down_rc_str(
         return reason[link_down_rc];
   }
   
+ +/**
+ + * Return the highest speed set in the port capabilities, in Mb/s.
+ + */
+ +static unsigned int fwcap_to_speed(fw_port_cap32_t caps)
+ +{
+ +      #define TEST_SPEED_RETURN(__caps_speed, __speed) \
+ +              do { \
+ +                      if (caps & FW_PORT_CAP32_SPEED_##__caps_speed) \
+ +                              return __speed; \
+ +              } while (0)
+ +
+ +      TEST_SPEED_RETURN(400G, 400000);
+ +      TEST_SPEED_RETURN(200G, 200000);
+ +      TEST_SPEED_RETURN(100G, 100000);
+ +      TEST_SPEED_RETURN(50G,   50000);
+ +      TEST_SPEED_RETURN(40G,   40000);
+ +      TEST_SPEED_RETURN(25G,   25000);
+ +      TEST_SPEED_RETURN(10G,   10000);
+ +      TEST_SPEED_RETURN(1G,     1000);
+ +      TEST_SPEED_RETURN(100M,    100);
+ +
+ +      #undef TEST_SPEED_RETURN
+ +
+ +      return 0;
+ +}
+ +
+ +/**
+ + *    fwcap_to_fwspeed - return highest speed in Port Capabilities
+ + *    @acaps: advertised Port Capabilities
+ + *
+ + *    Get the highest speed for the port from the advertised Port
+ + *    Capabilities.  It will be either the highest speed from the list of
+ + *    speeds or whatever user has set using ethtool.
+ + */
+ +static fw_port_cap32_t fwcap_to_fwspeed(fw_port_cap32_t acaps)
+ +{
+ +      #define TEST_SPEED_RETURN(__caps_speed) \
+ +              do { \
+ +                      if (acaps & FW_PORT_CAP32_SPEED_##__caps_speed) \
+ +                              return FW_PORT_CAP32_SPEED_##__caps_speed; \
+ +              } while (0)
+ +
+ +      TEST_SPEED_RETURN(400G);
+ +      TEST_SPEED_RETURN(200G);
+ +      TEST_SPEED_RETURN(100G);
+ +      TEST_SPEED_RETURN(50G);
+ +      TEST_SPEED_RETURN(40G);
+ +      TEST_SPEED_RETURN(25G);
+ +      TEST_SPEED_RETURN(10G);
+ +      TEST_SPEED_RETURN(1G);
+ +      TEST_SPEED_RETURN(100M);
+ +
+ +      #undef TEST_SPEED_RETURN
+ +
+ +      return 0;
+ +}
+ +
+ +/**
+ + *    lstatus_to_fwcap - translate old lstatus to 32-bit Port Capabilities
+ + *    @lstatus: old FW_PORT_ACTION_GET_PORT_INFO lstatus value
+ + *
+ + *    Translates old FW_PORT_ACTION_GET_PORT_INFO lstatus field into new
+ + *    32-bit Port Capabilities value.
+ + */
+ +static fw_port_cap32_t lstatus_to_fwcap(u32 lstatus)
+ +{
+ +      fw_port_cap32_t linkattr = 0;
+ +
+ +      /* Unfortunately the format of the Link Status in the old
+ +       * 16-bit Port Information message isn't the same as the
+ +       * 16-bit Port Capabilities bitfield used everywhere else ...
+ +       */
+ +      if (lstatus & FW_PORT_CMD_RXPAUSE_F)
+ +              linkattr |= FW_PORT_CAP32_FC_RX;
+ +      if (lstatus & FW_PORT_CMD_TXPAUSE_F)
+ +              linkattr |= FW_PORT_CAP32_FC_TX;
+ +      if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M))
+ +              linkattr |= FW_PORT_CAP32_SPEED_100M;
+ +      if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G))
+ +              linkattr |= FW_PORT_CAP32_SPEED_1G;
+ +      if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
+ +              linkattr |= FW_PORT_CAP32_SPEED_10G;
+ +      if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+ +              linkattr |= FW_PORT_CAP32_SPEED_25G;
+ +      if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
+ +              linkattr |= FW_PORT_CAP32_SPEED_40G;
+ +      if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+ +              linkattr |= FW_PORT_CAP32_SPEED_100G;
+ +
+ +      return linkattr;
+ +}
+ +
   /**
    *    t4_handle_get_port_info - process a FW reply message
    *    @pi: the port info
@@@ -7876,123 -7479,56 +7876,123 @@@
    */
   void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
   {
- -      const struct fw_port_cmd *p = (const void *)rpl;
- -      struct adapter *adap = pi->adapter;
- -
- -      /* link/module state change message */
- -      int speed = 0, fc = 0;
- -      struct link_config *lc;
- -      u32 stat = be32_to_cpu(p->u.info.lstatus_to_modtype);
- -      int link_ok = (stat & FW_PORT_CMD_LSTATUS_F) != 0;
- -      u32 mod = FW_PORT_CMD_MODTYPE_G(stat);
- -
- -      if (stat & FW_PORT_CMD_RXPAUSE_F)
- -              fc |= PAUSE_RX;
- -      if (stat & FW_PORT_CMD_TXPAUSE_F)
- -              fc |= PAUSE_TX;
- -      if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M))
- -              speed = 100;
- -      else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G))
- -              speed = 1000;
- -      else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
- -              speed = 10000;
- -      else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
- -              speed = 25000;
- -      else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
- -              speed = 40000;
- -      else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
- -              speed = 100000;
- -
- -      lc = &pi->link_cfg;
- -
- -      if (mod != pi->mod_type) {
- -              pi->mod_type = mod;
- -              t4_os_portmod_changed(adap, pi->port_id);
+ +      const struct fw_port_cmd *cmd = (const void *)rpl;
+ +      int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+ +      struct adapter *adapter = pi->adapter;
+ +      struct link_config *lc = &pi->link_cfg;
+ +      int link_ok, linkdnrc;
+ +      enum fw_port_type port_type;
+ +      enum fw_port_module_type mod_type;
+ +      unsigned int speed, fc, fec;
+ +      fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+ +
+ +      /* Extract the various fields from the Port Information message.
+ +       */
+ +      switch (action) {
+ +      case FW_PORT_ACTION_GET_PORT_INFO: {
+ +              u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
+ +
+ +              link_ok = (lstatus & FW_PORT_CMD_LSTATUS_F) != 0;
+ +              linkdnrc = FW_PORT_CMD_LINKDNRC_G(lstatus);
+ +              port_type = FW_PORT_CMD_PTYPE_G(lstatus);
+ +              mod_type = FW_PORT_CMD_MODTYPE_G(lstatus);
+ +              pcaps = fwcaps16_to_caps32(be16_to_cpu(cmd->u.info.pcap));
+ +              acaps = fwcaps16_to_caps32(be16_to_cpu(cmd->u.info.acap));
+ +              lpacaps = fwcaps16_to_caps32(be16_to_cpu(cmd->u.info.lpacap));
+ +              linkattr = lstatus_to_fwcap(lstatus);
+ +              break;
         }
+ +
+ +      case FW_PORT_ACTION_GET_PORT_INFO32: {
+ +              u32 lstatus32;
+ +
+ +              lstatus32 = be32_to_cpu(cmd->u.info32.lstatus32_to_cbllen32);
+ +              link_ok = (lstatus32 & FW_PORT_CMD_LSTATUS32_F) != 0;
+ +              linkdnrc = FW_PORT_CMD_LINKDNRC32_G(lstatus32);
+ +              port_type = FW_PORT_CMD_PORTTYPE32_G(lstatus32);
+ +              mod_type = FW_PORT_CMD_MODTYPE32_G(lstatus32);
+ +              pcaps = be32_to_cpu(cmd->u.info32.pcaps32);
+ +              acaps = be32_to_cpu(cmd->u.info32.acaps32);
+ +              lpacaps = be32_to_cpu(cmd->u.info32.lpacaps32);
+ +              linkattr = be32_to_cpu(cmd->u.info32.linkattr32);
+ +              break;
+ +      }
+ +
+ +      default:
+ +              dev_err(adapter->pdev_dev, "Handle Port Information: Bad Command/Action %#x\n",
+ +                      be32_to_cpu(cmd->action_to_len16));
+ +              return;
+ +      }
+ +
+ +      fec = fwcap_to_cc_fec(acaps);
+ +      fc = fwcap_to_cc_pause(linkattr);
+ +      speed = fwcap_to_speed(linkattr);
+ +
+ +      if (mod_type != pi->mod_type) {
+ +              /* With the newer SFP28 and QSFP28 Transceiver Module Types,
+ +               * various fundamental Port Capabilities which used to be
+ +               * immutable can now change radically.  We can now have
+ +               * Speeds, Auto-Negotiation, Forward Error Correction, etc.
+ +               * all change based on what Transceiver Module is inserted.
+ +               * So we need to record the Physical "Port" Capabilities on
+ +               * every Transceiver Module change.
+ +               */
+ +              lc->pcaps = pcaps;
+ +
+ +              /* When a new Transceiver Module is inserted, the Firmware
+ +               * will examine its i2c EPROM to determine its type and
+ +               * general operating parameters including things like Forward
+ +               * Error Control, etc.  Various IEEE 802.3 standards dictate
+ +               * how to interpret these i2c values to determine default
+ +               * "sutomatic" settings.  We record these for future use when
+ +               * the user explicitly requests these standards-based values.
+ +               */
+ +              lc->def_acaps = acaps;
+ +
+ +              /* Some versions of the early T6 Firmware "cheated" when
+ +               * handling different Transceiver Modules by changing the
+ +               * underlaying Port Type reported to the Host Drivers.  As
+ +               * such we need to capture whatever Port Type the Firmware
+ +               * sends us and record it in case it's different from what we
+ +               * were told earlier.  Unfortunately, since Firmware is
+ +               * forever, we'll need to keep this code here forever, but in
+ +               * later T6 Firmware it should just be an assignment of the
+ +               * same value already recorded.
+ +               */
+ +              pi->port_type = port_type;
+ +
+ +              pi->mod_type = mod_type;
+ +              t4_os_portmod_changed(adapter, pi->port_id);
+ +      }
+ +
         if (link_ok != lc->link_ok || speed != lc->speed ||
- -          fc != lc->fc) {     /* something changed */
+ +          fc != lc->fc || fec != lc->fec) {   /* something changed */
                 if (!link_ok && lc->link_ok) {
- -                      unsigned char rc = FW_PORT_CMD_LINKDNRC_G(stat);
- -
- -                      lc->link_down_rc = rc;
- -                      dev_warn(adap->pdev_dev,
- -                               "Port %d link down, reason: %s\n",
- -                               pi->port_id, t4_link_down_rc_str(rc));
+ +                      lc->link_down_rc = linkdnrc;
+ +                      dev_warn(adapter->pdev_dev, "Port %d link down, reason: %s\n",
+ +                               pi->tx_chan, t4_link_down_rc_str(linkdnrc));
                 }
                 lc->link_ok = link_ok;
                 lc->speed = speed;
                 lc->fc = fc;
- -              lc->supported = be16_to_cpu(p->u.info.pcap);
- -              lc->lp_advertising = be16_to_cpu(p->u.info.lpacap);
+ +              lc->fec = fec;
+ +
+ +              lc->lpacaps = lpacaps;
+ +              lc->acaps = acaps & ADVERT_MASK;
+ +
+ +              if (lc->acaps & FW_PORT_CAP32_ANEG) {
+ +                      lc->autoneg = AUTONEG_ENABLE;
+ +              } else {
+ +                      /* When Autoneg is disabled, user needs to set
+ +                       * single speed.
+ +                       * Similar to cxgb4_ethtool.c: set_link_ksettings
+ +                       */
+ +                      lc->acaps = 0;
+ +                      lc->speed_caps = fwcap_to_fwspeed(acaps);
+ +                      lc->autoneg = AUTONEG_DISABLE;
+ +              }
   
- -              t4_os_link_changed(adap, pi->port_id, link_ok);
+ +              t4_os_link_changed(adapter, pi->port_id, link_ok);
         }
   }
   
@@@ -8006,18 -7542,15 +8006,18 @@@
    */
   int t4_update_port_info(struct port_info *pi)
   {
+ +      unsigned int fw_caps = pi->adapter->params.fw_caps_support;
         struct fw_port_cmd port_cmd;
         int ret;
   
         memset(&port_cmd, 0, sizeof(port_cmd));
         port_cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
                                             FW_CMD_REQUEST_F | FW_CMD_READ_F |
- -                                          FW_PORT_CMD_PORTID_V(pi->port_id));
+ +                                          FW_PORT_CMD_PORTID_V(pi->tx_chan));
         port_cmd.action_to_len16 = cpu_to_be32(
- -              FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_GET_PORT_INFO) |
+ +              FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16
+ +                                   ? FW_PORT_ACTION_GET_PORT_INFO
+ +                                   : FW_PORT_ACTION_GET_PORT_INFO32) |
                 FW_LEN16(port_cmd));
         ret = t4_wr_mbox(pi->adapter, pi->adapter->mbox,
                          &port_cmd, sizeof(port_cmd), &port_cmd);
@@@ -8028,65 -7561,6 +8028,65 @@@
         return 0;
   }
   
+ +/**
+ + *    t4_get_link_params - retrieve basic link parameters for given port
+ + *    @pi: the port
+ + *    @link_okp: value return pointer for link up/down
+ + *    @speedp: value return pointer for speed (Mb/s)
+ + *    @mtup: value return pointer for mtu
+ + *
+ + *    Retrieves basic link parameters for a port: link up/down, speed (Mb/s),
+ + *    and MTU for a specified port.  A negative error is returned on
+ + *    failure; 0 on success.
+ + */
+ +int t4_get_link_params(struct port_info *pi, unsigned int *link_okp,
+ +                     unsigned int *speedp, unsigned int *mtup)
+ +{
+ +      unsigned int fw_caps = pi->adapter->params.fw_caps_support;
+ +      struct fw_port_cmd port_cmd;
+ +      unsigned int action, link_ok, speed, mtu;
+ +      fw_port_cap32_t linkattr;
+ +      int ret;
+ +
+ +      memset(&port_cmd, 0, sizeof(port_cmd));
+ +      port_cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
+ +                                          FW_CMD_REQUEST_F | FW_CMD_READ_F |
+ +                                          FW_PORT_CMD_PORTID_V(pi->tx_chan));
+ +      action = (fw_caps == FW_CAPS16
+ +                ? FW_PORT_ACTION_GET_PORT_INFO
+ +                : FW_PORT_ACTION_GET_PORT_INFO32);
+ +      port_cmd.action_to_len16 = cpu_to_be32(
+ +              FW_PORT_CMD_ACTION_V(action) |
+ +              FW_LEN16(port_cmd));
+ +      ret = t4_wr_mbox(pi->adapter, pi->adapter->mbox,
+ +                       &port_cmd, sizeof(port_cmd), &port_cmd);
+ +      if (ret)
+ +              return ret;
+ +
+ +      if (action == FW_PORT_ACTION_GET_PORT_INFO) {
+ +              u32 lstatus = be32_to_cpu(port_cmd.u.info.lstatus_to_modtype);
+ +
+ +              link_ok = !!(lstatus & FW_PORT_CMD_LSTATUS_F);
+ +              linkattr = lstatus_to_fwcap(lstatus);
+ +              mtu = be16_to_cpu(port_cmd.u.info.mtu);
+ +      } else {
+ +              u32 lstatus32 =
+ +                         be32_to_cpu(port_cmd.u.info32.lstatus32_to_cbllen32);
+ +
+ +              link_ok = !!(lstatus32 & FW_PORT_CMD_LSTATUS32_F);
+ +              linkattr = be32_to_cpu(port_cmd.u.info32.linkattr32);
+ +              mtu = FW_PORT_CMD_MTU32_G(
+ +                      be32_to_cpu(port_cmd.u.info32.auxlinfo32_mtu32));
+ +      }
+ +      speed = fwcap_to_speed(linkattr);
+ +
+ +      *link_okp = link_ok;
+ +      *speedp = fwcap_to_speed(linkattr);
+ +      *mtup = mtu;
+ +
+ +      return 0;
+ +}
+ +
   /**
    *      t4_handle_fw_rpl - process a FW reply message
    *      @adap: the adapter
@@@ -8107,9 -7581,7 +8107,9 @@@ int t4_handle_fw_rpl(struct adapter *ad
         unsigned int action =
                 FW_PORT_CMD_ACTION_G(be32_to_cpu(p->action_to_len16));
   
- -      if (opcode == FW_PORT_CMD && action == FW_PORT_ACTION_GET_PORT_INFO) {
+ +      if (opcode == FW_PORT_CMD &&
+ +          (action == FW_PORT_ACTION_GET_PORT_INFO ||
+ +           action == FW_PORT_ACTION_GET_PORT_INFO32)) {
                 int i;
                 int chan = FW_PORT_CMD_PORTID_G(be32_to_cpu(p->op_to_portid));
                 struct port_info *pi = NULL;
@@@ -8122,8 -7594,7 +8122,8 @@@
   
                 t4_handle_get_port_info(pi, rpl);
         } else {
- -              dev_warn(adap->pdev_dev, "Unknown firmware reply %d\n", opcode);
+ +              dev_warn(adap->pdev_dev, "Unknown firmware reply %d\n",
+ +                       opcode);
                 return -EINVAL;
         }
         return 0;
@@@ -8142,35 -7613,38 +8142,35 @@@ static void get_pci_mode(struct adapte
   
   /**
    *    init_link_config - initialize a link's SW state
- - *    @lc: structure holding the link state
- - *    @caps: link capabilities
+ + *    @lc: pointer to structure holding the link state
+ + *    @pcaps: link Port Capabilities
+ + *    @acaps: link current Advertised Port Capabilities
    *
    *    Initializes the SW state maintained for each link, including the link's
    *    capabilities and default speed/flow-control/autonegotiation settings.
    */
- -static void init_link_config(struct link_config *lc, unsigned int pcaps,
- -                           unsigned int acaps)
+ +static void init_link_config(struct link_config *lc, fw_port_cap32_t pcaps,
+ +                           fw_port_cap32_t acaps)
   {
- -      lc->supported = pcaps;
- -      lc->lp_advertising = 0;
- -      lc->requested_speed = 0;
+ +      lc->pcaps = pcaps;
+ +      lc->def_acaps = acaps;
+ +      lc->lpacaps = 0;
+ +      lc->speed_caps = 0;
         lc->speed = 0;
         lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
- -      lc->auto_fec = 0;
   
         /* For Forward Error Control, we default to whatever the Firmware
          * tells us the Link is currently advertising.
          */
- -      if (acaps & FW_PORT_CAP_FEC_RS)
- -              lc->auto_fec |= FEC_RS;
- -      if (acaps & FW_PORT_CAP_FEC_BASER_RS)
- -              lc->auto_fec |= FEC_BASER_RS;
         lc->requested_fec = FEC_AUTO;
- -      lc->fec = lc->auto_fec;
+ +      lc->fec = fwcap_to_cc_fec(lc->def_acaps);
   
- -      if (lc->supported & FW_PORT_CAP_ANEG) {
- -              lc->advertising = lc->supported & ADVERT_MASK;
+ +      if (lc->pcaps & FW_PORT_CAP32_ANEG) {
+ +              lc->acaps = lc->pcaps & ADVERT_MASK;
                 lc->autoneg = AUTONEG_ENABLE;
                 lc->requested_fc |= PAUSE_AUTONEG;
         } else {
- -              lc->advertising = 0;
+ +              lc->acaps = 0;
                 lc->autoneg = AUTONEG_DISABLE;
         }
   }
@@@ -8695,7 -8169,7 +8695,7 @@@ int t4_init_rss_mode(struct adapter *ad
   }
   
   /**
- - *    t4_init_portinfo - allocate a virtual interface amd initialize port_info
+ + *    t4_init_portinfo - allocate a virtual interface and initialize port_info
    *    @pi: the port_info
    *    @mbox: mailbox to use for the FW command
    *    @port: physical port associated with the VI
@@@ -8711,67 -8185,21 +8711,67 @@@
   int t4_init_portinfo(struct port_info *pi, int mbox,
                      int port, int pf, int vf, u8 mac[])
   {
- -      int ret;
- -      struct fw_port_cmd c;
+ +      struct adapter *adapter = pi->adapter;
+ +      unsigned int fw_caps = adapter->params.fw_caps_support;
+ +      struct fw_port_cmd cmd;
         unsigned int rss_size;
+ +      enum fw_port_type port_type;
+ +      int mdio_addr;
+ +      fw_port_cap32_t pcaps, acaps;
+ +      int ret;
   
- -      memset(&c, 0, sizeof(c));
- -      c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
- -                                   FW_CMD_REQUEST_F | FW_CMD_READ_F |
- -                                   FW_PORT_CMD_PORTID_V(port));
- -      c.action_to_len16 = cpu_to_be32(
- -              FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_GET_PORT_INFO) |
- -              FW_LEN16(c));
- -      ret = t4_wr_mbox(pi->adapter, mbox, &c, sizeof(c), &c);
+ +      /* If we haven't yet determined whether we're talking to Firmware
+ +       * which knows the new 32-bit Port Capabilities, it's time to find
+ +       * out now.  This will also tell new Firmware to send us Port Status
+ +       * Updates using the new 32-bit Port Capabilities version of the
+ +       * Port Information message.
+ +       */
+ +      if (fw_caps == FW_CAPS_UNKNOWN) {
+ +              u32 param, val;
+ +
+ +              param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
+ +                       FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_PORT_CAPS32));
+ +              val = 1;
+ +              ret = t4_set_params(adapter, mbox, pf, vf, 1, &param, &val);
+ +              fw_caps = (ret == 0 ? FW_CAPS32 : FW_CAPS16);
+ +              adapter->params.fw_caps_support = fw_caps;
+ +      }
+ +
+ +      memset(&cmd, 0, sizeof(cmd));
+ +      cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
+ +                                     FW_CMD_REQUEST_F | FW_CMD_READ_F |
+ +                                     FW_PORT_CMD_PORTID_V(port));
+ +      cmd.action_to_len16 = cpu_to_be32(
+ +              FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16
+ +                                   ? FW_PORT_ACTION_GET_PORT_INFO
+ +                                   : FW_PORT_ACTION_GET_PORT_INFO32) |
+ +              FW_LEN16(cmd));
+ +      ret = t4_wr_mbox(pi->adapter, mbox, &cmd, sizeof(cmd), &cmd);
         if (ret)
                 return ret;
   
+ +      /* Extract the various fields from the Port Information message.
+ +       */
+ +      if (fw_caps == FW_CAPS16) {
+ +              u32 lstatus = be32_to_cpu(cmd.u.info.lstatus_to_modtype);
+ +
+ +              port_type = FW_PORT_CMD_PTYPE_G(lstatus);
+ +              mdio_addr = ((lstatus & FW_PORT_CMD_MDIOCAP_F)
+ +                           ? FW_PORT_CMD_MDIOADDR_G(lstatus)
+ +                           : -1);
+ +              pcaps = fwcaps16_to_caps32(be16_to_cpu(cmd.u.info.pcap));
+ +              acaps = fwcaps16_to_caps32(be16_to_cpu(cmd.u.info.acap));
+ +      } else {
+ +              u32 lstatus32 = be32_to_cpu(cmd.u.info32.lstatus32_to_cbllen32);
+ +
+ +              port_type = FW_PORT_CMD_PORTTYPE32_G(lstatus32);
+ +              mdio_addr = ((lstatus32 & FW_PORT_CMD_MDIOCAP32_F)
+ +                           ? FW_PORT_CMD_MDIOADDR32_G(lstatus32)
+ +                           : -1);
+ +              pcaps = be32_to_cpu(cmd.u.info32.pcaps32);
+ +              acaps = be32_to_cpu(cmd.u.info32.acaps32);
+ +      }
+ +
         ret = t4_alloc_vi(pi->adapter, mbox, port, pf, vf, 1, mac, &rss_size);
         if (ret < 0)
                 return ret;
@@@ -8781,11 -8209,14 +8781,11 @@@
         pi->lport = port;
         pi->rss_size = rss_size;
   
- -      ret = be32_to_cpu(c.u.info.lstatus_to_modtype);
- -      pi->mdio_addr = (ret & FW_PORT_CMD_MDIOCAP_F) ?
- -              FW_PORT_CMD_MDIOADDR_G(ret) : -1;
- -      pi->port_type = FW_PORT_CMD_PTYPE_G(ret);
+ +      pi->port_type = port_type;
+ +      pi->mdio_addr = mdio_addr;
         pi->mod_type = FW_PORT_MOD_TYPE_NA;
   
- -      init_link_config(&pi->link_cfg, be16_to_cpu(c.u.info.pcap),
- -                       be16_to_cpu(c.u.info.acap));
+ +      init_link_config(&pi->link_cfg, pcaps, acaps);
         return 0;
   }
   
@@@ -9232,65 -8663,6 +9232,65 @@@ void t4_idma_monitor(struct adapter *ad
         }
   }
   
+ +/**
+ + *    t4_load_cfg - download config file
+ + *    @adap: the adapter
+ + *    @cfg_data: the cfg text file to write
+ + *    @size: text file size
+ + *
+ + *    Write the supplied config text file to the card's serial flash.
+ + */
+ +int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
+ +{
+ +      int ret, i, n, cfg_addr;
+ +      unsigned int addr;
+ +      unsigned int flash_cfg_start_sec;
+ +      unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
+ +
+ +      cfg_addr = t4_flash_cfg_addr(adap);
+ +      if (cfg_addr < 0)
+ +              return cfg_addr;
+ +
+ +      addr = cfg_addr;
+ +      flash_cfg_start_sec = addr / SF_SEC_SIZE;
+ +
+ +      if (size > FLASH_CFG_MAX_SIZE) {
+ +              dev_err(adap->pdev_dev, "cfg file too large, max is %u bytes\n",
+ +                      FLASH_CFG_MAX_SIZE);
+ +              return -EFBIG;
+ +      }
+ +
+ +      i = DIV_ROUND_UP(FLASH_CFG_MAX_SIZE,    /* # of sectors spanned */
+ +                       sf_sec_size);
+ +      ret = t4_flash_erase_sectors(adap, flash_cfg_start_sec,
+ +                                   flash_cfg_start_sec + i - 1);
+ +      /* If size == 0 then we're simply erasing the FLASH sectors associated
+ +       * with the on-adapter Firmware Configuration File.
+ +       */
+ +      if (ret || size == 0)
+ +              goto out;
+ +
+ +      /* this will write to the flash up to SF_PAGE_SIZE at a time */
+ +      for (i = 0; i < size; i += SF_PAGE_SIZE) {
+ +              if ((size - i) <  SF_PAGE_SIZE)
+ +                      n = size - i;
+ +              else
+ +                      n = SF_PAGE_SIZE;
+ +              ret = t4_write_flash(adap, addr, n, cfg_data);
+ +              if (ret)
+ +                      goto out;
+ +
+ +              addr += SF_PAGE_SIZE;
+ +              cfg_data += SF_PAGE_SIZE;
+ +      }
+ +
+ +out:
+ +      if (ret)
+ +              dev_err(adap->pdev_dev, "config file %s failed %d\n",
+ +                      (size == 0 ? "clear" : "download"), ret);
+ +      return ret;
+ +}
+ +
   /**
    *    t4_set_vf_mac - Set MAC address for the specified VF
    *    @adapter: The adapter
diff --combined drivers/net/ethernet/faraday/ftgmac100.c

index 05fe7123d5ae8868a1f4b1a0ef5f23de33b71ad6,59da7ac3c1087c03f52b514c3fc9b81074284a5e..9ed8e4b815304f1e443ccb7e4c863e800584b8ab
--- 1/drivers/net/ethernet/faraday/ftgmac100.c
--- 2/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@@ -1623,8 -1623,6 +1623,8 @@@ static const struct net_device_ops ftgm
   #ifdef CONFIG_NET_POLL_CONTROLLER
         .ndo_poll_controller    = ftgmac100_poll_controller,
   #endif
+ +      .ndo_vlan_rx_add_vid    = ncsi_vlan_rx_add_vid,
+ +      .ndo_vlan_rx_kill_vid   = ncsi_vlan_rx_kill_vid,
   };
   
   static int ftgmac100_setup_mdio(struct net_device *netdev)
@@@ -1839,9 -1837,6 +1839,9 @@@ static int ftgmac100_probe(struct platf
                 NETIF_F_GRO | NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_RX |
                 NETIF_F_HW_VLAN_CTAG_TX;
   
+ +      if (priv->use_ncsi)
+ +              netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ +
         /* AST2400  doesn't have working HW checksum generation */
         if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac")))
                 netdev->hw_features &= ~NETIF_F_HW_CSUM;
@@@ -1868,7 -1863,6 +1868,6 @@@ err_setup_mdio
   err_ioremap:
         release_resource(priv->res);
   err_req_mem:
-       netif_napi_del(&priv->napi);
         free_netdev(netdev);
   err_alloc_etherdev:
         return err;
diff --combined drivers/net/ethernet/freescale/fman/mac.c

index 14cd2c8b00248298088337c62a288f8c99039ec3,1c7da16ad0ffe5de0bbed64f027ed765daec7464..387eb4a88b723f2de346b7335e4fd818478efa1d
--- 1/drivers/net/ethernet/freescale/fman/mac.c
--- 2/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@@ -623,6 -623,8 +623,8 @@@ static struct platform_device *dpaa_eth
                 goto no_mem;
         }
   
+       pdev->dev.of_node = node;
+       pdev->dev.parent = priv->dev;
         set_dma_ops(&pdev->dev, get_dma_ops(priv->dev));
   
         ret = platform_device_add_data(pdev, &data, sizeof(data));
@@@ -698,8 -700,8 +700,8 @@@ static int mac_probe(struct platform_de
                 priv->internal_phy_node = of_parse_phandle(mac_node,
                                                           "pcsphy-handle", 0);
         } else {
- -              dev_err(dev, "MAC node (%s) contains unsupported MAC\n",
- -                      mac_node->full_name);
+ +              dev_err(dev, "MAC node (%pOF) contains unsupported MAC\n",
+ +                      mac_node);
                 err = -EINVAL;
                 goto _return;
         }
@@@ -712,15 -714,16 +714,15 @@@
         /* Get the FM node */
         dev_node = of_get_parent(mac_node);
         if (!dev_node) {
- -              dev_err(dev, "of_get_parent(%s) failed\n",
- -                      mac_node->full_name);
+ +              dev_err(dev, "of_get_parent(%pOF) failed\n",
+ +                      mac_node);
                 err = -EINVAL;
                 goto _return_dev_set_drvdata;
         }
   
         of_dev = of_find_device_by_node(dev_node);
         if (!of_dev) {
- -              dev_err(dev, "of_find_device_by_node(%s) failed\n",
- -                      dev_node->full_name);
+ +              dev_err(dev, "of_find_device_by_node(%pOF) failed\n", dev_node);
                 err = -EINVAL;
                 goto _return_of_node_put;
         }
@@@ -728,7 -731,8 +730,7 @@@
         /* Get the FMan cell-index */
         err = of_property_read_u32(dev_node, "cell-index", &val);
         if (err) {
- -              dev_err(dev, "failed to read cell-index for %s\n",
- -                      dev_node->full_name);
+ +              dev_err(dev, "failed to read cell-index for %pOF\n", dev_node);
                 err = -EINVAL;
                 goto _return_of_node_put;
         }
@@@ -737,7 -741,7 +739,7 @@@
   
         priv->fman = fman_bind(&of_dev->dev);
         if (!priv->fman) {
- -              dev_err(dev, "fman_bind(%s) failed\n", dev_node->full_name);
+ +              dev_err(dev, "fman_bind(%pOF) failed\n", dev_node);
                 err = -ENODEV;
                 goto _return_of_node_put;
         }
@@@ -747,8 -751,8 +749,8 @@@
         /* Get the address of the memory mapped registers */
         err = of_address_to_resource(mac_node, 0, &res);
         if (err < 0) {
- -              dev_err(dev, "of_address_to_resource(%s) = %d\n",
- -                      mac_node->full_name, err);
+ +              dev_err(dev, "of_address_to_resource(%pOF) = %d\n",
+ +                      mac_node, err);
                 goto _return_dev_set_drvdata;
         }
   
@@@ -782,7 -786,8 +784,7 @@@
         /* Get the cell-index */
         err = of_property_read_u32(mac_node, "cell-index", &val);
         if (err) {
- -              dev_err(dev, "failed to read cell-index for %s\n",
- -                      mac_node->full_name);
+ +              dev_err(dev, "failed to read cell-index for %pOF\n", mac_node);
                 err = -EINVAL;
                 goto _return_dev_set_drvdata;
         }
@@@ -791,7 -796,8 +793,7 @@@
         /* Get the MAC address */
         mac_addr = of_get_mac_address(mac_node);
         if (!mac_addr) {
- -              dev_err(dev, "of_get_mac_address(%s) failed\n",
- -                      mac_node->full_name);
+ +              dev_err(dev, "of_get_mac_address(%pOF) failed\n", mac_node);
                 err = -EINVAL;
                 goto _return_dev_set_drvdata;
         }
@@@ -800,15 -806,15 +802,15 @@@
         /* Get the port handles */
         nph = of_count_phandle_with_args(mac_node, "fsl,fman-ports", NULL);
         if (unlikely(nph < 0)) {
- -              dev_err(dev, "of_count_phandle_with_args(%s, fsl,fman-ports) failed\n",
- -                      mac_node->full_name);
+ +              dev_err(dev, "of_count_phandle_with_args(%pOF, fsl,fman-ports) failed\n",
+ +                      mac_node);
                 err = nph;
                 goto _return_dev_set_drvdata;
         }
   
         if (nph != ARRAY_SIZE(mac_dev->port)) {
- -              dev_err(dev, "Not supported number of fman-ports handles of mac node %s from device tree\n",
- -                      mac_node->full_name);
+ +              dev_err(dev, "Not supported number of fman-ports handles of mac node %pOF from device tree\n",
+ +                      mac_node);
                 err = -EINVAL;
                 goto _return_dev_set_drvdata;
         }
@@@ -817,24 -823,24 +819,24 @@@
                 /* Find the port node */
                 dev_node = of_parse_phandle(mac_node, "fsl,fman-ports", i);
                 if (!dev_node) {
- -                      dev_err(dev, "of_parse_phandle(%s, fsl,fman-ports) failed\n",
- -                              mac_node->full_name);
+ +                      dev_err(dev, "of_parse_phandle(%pOF, fsl,fman-ports) failed\n",
+ +                              mac_node);
                         err = -EINVAL;
                         goto _return_of_node_put;
                 }
   
                 of_dev = of_find_device_by_node(dev_node);
                 if (!of_dev) {
- -                      dev_err(dev, "of_find_device_by_node(%s) failed\n",
- -                              dev_node->full_name);
+ +                      dev_err(dev, "of_find_device_by_node(%pOF) failed\n",
+ +                              dev_node);
                         err = -EINVAL;
                         goto _return_of_node_put;
                 }
   
                 mac_dev->port[i] = fman_port_bind(&of_dev->dev);
                 if (!mac_dev->port[i]) {
- -                      dev_err(dev, "dev_get_drvdata(%s) failed\n",
- -                              dev_node->full_name);
+ +                      dev_err(dev, "dev_get_drvdata(%pOF) failed\n",
+ +                              dev_node);
                         err = -EINVAL;
                         goto _return_of_node_put;
                 }
@@@ -845,8 -851,8 +847,8 @@@
         phy_if = of_get_phy_mode(mac_node);
         if (phy_if < 0) {
                 dev_warn(dev,
- -                       "of_get_phy_mode() for %s failed. Defaulting to SGMII\n",
- -                       mac_node->full_name);
+ +                       "of_get_phy_mode() for %pOF failed. Defaulting to SGMII\n",
+ +                       mac_node);
                 phy_if = PHY_INTERFACE_MODE_SGMII;
         }
         priv->phy_if = phy_if;
diff --combined drivers/net/ethernet/marvell/mvpp2.c

index f37c05fed5bcf3c601ef64c912c9f58051fd7686,4d598ca8503a50952576354ae35f1b3b6a574b6e..d5624894152e1e6317b50092cdda223837e0ad0f
--- 1/drivers/net/ethernet/marvell/mvpp2.c
--- 2/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@@ -18,7 -18,6 +18,7 @@@
   #include <linux/inetdevice.h>
   #include <linux/mbus.h>
   #include <linux/module.h>
+ +#include <linux/mfd/syscon.h>
   #include <linux/interrupt.h>
   #include <linux/cpumask.h>
   #include <linux/of.h>
@@@ -28,15 -27,12 +28,15 @@@
   #include <linux/of_address.h>
   #include <linux/of_device.h>
   #include <linux/phy.h>
+ +#include <linux/phy/phy.h>
   #include <linux/clk.h>
   #include <linux/hrtimer.h>
   #include <linux/ktime.h>
+ +#include <linux/regmap.h>
   #include <uapi/linux/ppp_defs.h>
   #include <net/ip.h>
   #include <net/ipv6.h>
+ +#include <net/tso.h>
   
   /* RX Fifo Registers */
   #define MVPP2_RX_DATA_FIFO_SIZE_REG(port)     (0x00 + 4 * (port))
@@@ -124,9 -120,6 +124,9 @@@
   #define MVPP2_TXQ_DESC_ADDR_REG                       0x2084
   #define MVPP2_TXQ_DESC_SIZE_REG                       0x2088
   #define     MVPP2_TXQ_DESC_SIZE_MASK          0x3ff0
+ +#define MVPP2_TXQ_THRESH_REG                  0x2094
+ +#define           MVPP2_TXQ_THRESH_OFFSET             16
+ +#define           MVPP2_TXQ_THRESH_MASK               0x3fff
   #define MVPP2_AGGR_TXQ_UPDATE_REG             0x2090
   #define MVPP2_TXQ_INDEX_REG                   0x2098
   #define MVPP2_TXQ_PREF_BUF_REG                        0x209c
@@@ -190,25 -183,22 +190,25 @@@
   #define MVPP22_AXI_CODE_DOMAIN_SYSTEM         3
   
   /* Interrupt Cause and Mask registers */
+ +#define MVPP2_ISR_TX_THRESHOLD_REG(port)      (0x5140 + 4 * (port))
+ +#define     MVPP2_MAX_ISR_TX_THRESHOLD                0xfffff0
+ +
   #define MVPP2_ISR_RX_THRESHOLD_REG(rxq)               (0x5200 + 4 * (rxq))
   #define     MVPP2_MAX_ISR_RX_THRESHOLD                0xfffff0
- -#define MVPP21_ISR_RXQ_GROUP_REG(rxq)         (0x5400 + 4 * (rxq))
+ +#define MVPP21_ISR_RXQ_GROUP_REG(port)                (0x5400 + 4 * (port))
   
- -#define MVPP22_ISR_RXQ_GROUP_INDEX_REG          0x5400
+ +#define MVPP22_ISR_RXQ_GROUP_INDEX_REG                0x5400
   #define MVPP22_ISR_RXQ_GROUP_INDEX_SUBGROUP_MASK 0xf
- -#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_MASK   0x380
- -#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET 7
+ +#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_MASK 0x380
+ +#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET       7
   
   #define MVPP22_ISR_RXQ_GROUP_INDEX_SUBGROUP_MASK 0xf
- -#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_MASK   0x380
+ +#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_MASK 0x380
   
- -#define MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG     0x5404
- -#define MVPP22_ISR_RXQ_SUB_GROUP_STARTQ_MASK    0x1f
- -#define MVPP22_ISR_RXQ_SUB_GROUP_SIZE_MASK      0xf00
- -#define MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET    8
+ +#define MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG   0x5404
+ +#define MVPP22_ISR_RXQ_SUB_GROUP_STARTQ_MASK  0x1f
+ +#define MVPP22_ISR_RXQ_SUB_GROUP_SIZE_MASK    0xf00
+ +#define MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET  8
   
   #define MVPP2_ISR_ENABLE_REG(port)            (0x5420 + 4 * (port))
   #define     MVPP2_ISR_ENABLE_INTERRUPT(mask)  ((mask) & 0xffff)
@@@ -216,7 -206,6 +216,7 @@@
   #define MVPP2_ISR_RX_TX_CAUSE_REG(port)               (0x5480 + 4 * (port))
   #define     MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK       0xffff
   #define     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK       0xff0000
+ +#define     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET     16
   #define     MVPP2_CAUSE_RX_FIFO_OVERRUN_MASK  BIT(24)
   #define     MVPP2_CAUSE_FCS_ERR_MASK          BIT(25)
   #define     MVPP2_CAUSE_TX_FIFO_UNDERRUN_MASK BIT(26)
@@@ -276,7 -265,7 +276,7 @@@
   #define MVPP2_BM_VIRT_RLS_REG                 0x64c0
   #define MVPP22_BM_ADDR_HIGH_RLS_REG           0x64c4
   #define     MVPP22_BM_ADDR_HIGH_PHYS_RLS_MASK 0xff
- -#define           MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK   0xff00
+ +#define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK 0xff00
   #define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT        8
   
   /* TX Scheduler registers */
@@@ -318,87 -307,57 +318,87 @@@
   
   /* Per-port registers */
   #define MVPP2_GMAC_CTRL_0_REG                 0x0
- -#define      MVPP2_GMAC_PORT_EN_MASK          BIT(0)
- -#define      MVPP2_GMAC_MAX_RX_SIZE_OFFS      2
- -#define      MVPP2_GMAC_MAX_RX_SIZE_MASK      0x7ffc
- -#define      MVPP2_GMAC_MIB_CNTR_EN_MASK      BIT(15)
+ +#define     MVPP2_GMAC_PORT_EN_MASK           BIT(0)
+ +#define     MVPP2_GMAC_PORT_TYPE_MASK         BIT(1)
+ +#define     MVPP2_GMAC_MAX_RX_SIZE_OFFS               2
+ +#define     MVPP2_GMAC_MAX_RX_SIZE_MASK               0x7ffc
+ +#define     MVPP2_GMAC_MIB_CNTR_EN_MASK               BIT(15)
   #define MVPP2_GMAC_CTRL_1_REG                 0x4
- -#define      MVPP2_GMAC_PERIODIC_XON_EN_MASK  BIT(1)
- -#define      MVPP2_GMAC_GMII_LB_EN_MASK               BIT(5)
- -#define      MVPP2_GMAC_PCS_LB_EN_BIT         6
- -#define      MVPP2_GMAC_PCS_LB_EN_MASK                BIT(6)
- -#define      MVPP2_GMAC_SA_LOW_OFFS           7
+ +#define     MVPP2_GMAC_PERIODIC_XON_EN_MASK   BIT(1)
+ +#define     MVPP2_GMAC_GMII_LB_EN_MASK                BIT(5)
+ +#define     MVPP2_GMAC_PCS_LB_EN_BIT          6
+ +#define     MVPP2_GMAC_PCS_LB_EN_MASK         BIT(6)
+ +#define     MVPP2_GMAC_SA_LOW_OFFS            7
   #define MVPP2_GMAC_CTRL_2_REG                 0x8
- -#define      MVPP2_GMAC_INBAND_AN_MASK                BIT(0)
- -#define      MVPP2_GMAC_PCS_ENABLE_MASK               BIT(3)
- -#define      MVPP2_GMAC_PORT_RGMII_MASK               BIT(4)
- -#define      MVPP2_GMAC_PORT_RESET_MASK               BIT(6)
+ +#define     MVPP2_GMAC_INBAND_AN_MASK         BIT(0)
+ +#define     MVPP2_GMAC_FLOW_CTRL_MASK         GENMASK(2, 1)
+ +#define     MVPP2_GMAC_PCS_ENABLE_MASK                BIT(3)
+ +#define     MVPP2_GMAC_PORT_RGMII_MASK                BIT(4)
+ +#define     MVPP2_GMAC_DISABLE_PADDING                BIT(5)
+ +#define     MVPP2_GMAC_PORT_RESET_MASK                BIT(6)
   #define MVPP2_GMAC_AUTONEG_CONFIG             0xc
- -#define      MVPP2_GMAC_FORCE_LINK_DOWN               BIT(0)
- -#define      MVPP2_GMAC_FORCE_LINK_PASS               BIT(1)
- -#define      MVPP2_GMAC_CONFIG_MII_SPEED      BIT(5)
- -#define      MVPP2_GMAC_CONFIG_GMII_SPEED     BIT(6)
- -#define      MVPP2_GMAC_AN_SPEED_EN           BIT(7)
- -#define      MVPP2_GMAC_FC_ADV_EN             BIT(9)
- -#define      MVPP2_GMAC_CONFIG_FULL_DUPLEX    BIT(12)
- -#define      MVPP2_GMAC_AN_DUPLEX_EN          BIT(13)
+ +#define     MVPP2_GMAC_FORCE_LINK_DOWN                BIT(0)
+ +#define     MVPP2_GMAC_FORCE_LINK_PASS                BIT(1)
+ +#define     MVPP2_GMAC_IN_BAND_AUTONEG                BIT(2)
+ +#define     MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS BIT(3)
+ +#define     MVPP2_GMAC_CONFIG_MII_SPEED       BIT(5)
+ +#define     MVPP2_GMAC_CONFIG_GMII_SPEED      BIT(6)
+ +#define     MVPP2_GMAC_AN_SPEED_EN            BIT(7)
+ +#define     MVPP2_GMAC_FC_ADV_EN              BIT(9)
+ +#define     MVPP2_GMAC_FLOW_CTRL_AUTONEG      BIT(11)
+ +#define     MVPP2_GMAC_CONFIG_FULL_DUPLEX     BIT(12)
+ +#define     MVPP2_GMAC_AN_DUPLEX_EN           BIT(13)
+ +#define MVPP2_GMAC_STATUS0                    0x10
+ +#define     MVPP2_GMAC_STATUS0_LINK_UP                BIT(0)
   #define MVPP2_GMAC_PORT_FIFO_CFG_1_REG                0x1c
- -#define      MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS   6
- -#define      MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK       0x1fc0
- -#define      MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(v)        (((v) << 6) & \
+ +#define     MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS    6
+ +#define     MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK        0x1fc0
+ +#define     MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(v) (((v) << 6) & \
                                         MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK)
+ +#define MVPP22_GMAC_INT_STAT                  0x20
+ +#define     MVPP22_GMAC_INT_STAT_LINK         BIT(1)
+ +#define MVPP22_GMAC_INT_MASK                  0x24
+ +#define     MVPP22_GMAC_INT_MASK_LINK_STAT    BIT(1)
   #define MVPP22_GMAC_CTRL_4_REG                        0x90
- -#define      MVPP22_CTRL4_EXT_PIN_GMII_SEL    BIT(0)
- -#define      MVPP22_CTRL4_DP_CLK_SEL          BIT(5)
- -#define      MVPP22_CTRL4_SYNC_BYPASS         BIT(6)
- -#define      MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE        BIT(7)
+ +#define     MVPP22_CTRL4_EXT_PIN_GMII_SEL     BIT(0)
+ +#define     MVPP22_CTRL4_DP_CLK_SEL           BIT(5)
+ +#define     MVPP22_CTRL4_SYNC_BYPASS_DIS      BIT(6)
+ +#define     MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE BIT(7)
+ +#define MVPP22_GMAC_INT_SUM_MASK              0xa4
+ +#define     MVPP22_GMAC_INT_SUM_MASK_LINK_STAT        BIT(1)
   
   /* Per-port XGMAC registers. PPv2.2 only, only for GOP port 0,
    * relative to port->base.
    */
   #define MVPP22_XLG_CTRL0_REG                  0x100
- -#define      MVPP22_XLG_CTRL0_PORT_EN         BIT(0)
- -#define      MVPP22_XLG_CTRL0_MAC_RESET_DIS   BIT(1)
- -#define      MVPP22_XLG_CTRL0_MIB_CNT_DIS     BIT(14)
- -
+ +#define     MVPP22_XLG_CTRL0_PORT_EN          BIT(0)
+ +#define     MVPP22_XLG_CTRL0_MAC_RESET_DIS    BIT(1)
+ +#define     MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN  BIT(7)
+ +#define     MVPP22_XLG_CTRL0_MIB_CNT_DIS      BIT(14)
+ +#define MVPP22_XLG_CTRL1_REG                  0x104
+ +#define     MVPP22_XLG_CTRL1_FRAMESIZELIMIT_OFFS      0
+ +#define     MVPP22_XLG_CTRL1_FRAMESIZELIMIT_MASK      0x1fff
+ +#define MVPP22_XLG_STATUS                     0x10c
+ +#define     MVPP22_XLG_STATUS_LINK_UP         BIT(0)
+ +#define MVPP22_XLG_INT_STAT                   0x114
+ +#define     MVPP22_XLG_INT_STAT_LINK          BIT(1)
+ +#define MVPP22_XLG_INT_MASK                   0x118
+ +#define     MVPP22_XLG_INT_MASK_LINK          BIT(1)
   #define MVPP22_XLG_CTRL3_REG                  0x11c
- -#define      MVPP22_XLG_CTRL3_MACMODESELECT_MASK      (7 << 13)
- -#define      MVPP22_XLG_CTRL3_MACMODESELECT_GMAC      (0 << 13)
- -#define      MVPP22_XLG_CTRL3_MACMODESELECT_10G               (1 << 13)
+ +#define     MVPP22_XLG_CTRL3_MACMODESELECT_MASK       (7 << 13)
+ +#define     MVPP22_XLG_CTRL3_MACMODESELECT_GMAC       (0 << 13)
+ +#define     MVPP22_XLG_CTRL3_MACMODESELECT_10G        (1 << 13)
+ +#define MVPP22_XLG_EXT_INT_MASK                       0x15c
+ +#define     MVPP22_XLG_EXT_INT_MASK_XLG               BIT(1)
+ +#define     MVPP22_XLG_EXT_INT_MASK_GIG               BIT(2)
+ +#define MVPP22_XLG_CTRL4_REG                  0x184
+ +#define     MVPP22_XLG_CTRL4_FWD_FC           BIT(5)
+ +#define     MVPP22_XLG_CTRL4_FWD_PFC          BIT(6)
+ +#define     MVPP22_XLG_CTRL4_MACMODSELECT_GMAC        BIT(12)
   
   /* SMI registers. PPv2.2 only, relative to priv->iface_base. */
   #define MVPP22_SMI_MISC_CFG_REG                       0x1204
- -#define      MVPP22_SMI_POLLING_EN            BIT(10)
+ +#define     MVPP22_SMI_POLLING_EN             BIT(10)
   
   #define MVPP22_GMAC_BASE(port)                (0x7000 + (port) * 0x1000 + 0xe00)
   
@@@ -408,44 -367,11 +408,44 @@@
   #define MVPP2_QUEUE_NEXT_DESC(q, index) \
         (((index) < (q)->last_desc) ? ((index) + 1) : 0)
   
+ +/* XPCS registers. PPv2.2 only */
+ +#define MVPP22_MPCS_BASE(port)                        (0x7000 + (port) * 0x1000)
+ +#define MVPP22_MPCS_CTRL                      0x14
+ +#define     MVPP22_MPCS_CTRL_FWD_ERR_CONN     BIT(10)
+ +#define MVPP22_MPCS_CLK_RESET                 0x14c
+ +#define     MAC_CLK_RESET_SD_TX                       BIT(0)
+ +#define     MAC_CLK_RESET_SD_RX                       BIT(1)
+ +#define     MAC_CLK_RESET_MAC                 BIT(2)
+ +#define     MVPP22_MPCS_CLK_RESET_DIV_RATIO(n)        ((n) << 4)
+ +#define     MVPP22_MPCS_CLK_RESET_DIV_SET     BIT(11)
+ +
+ +/* XPCS registers. PPv2.2 only */
+ +#define MVPP22_XPCS_BASE(port)                        (0x7400 + (port) * 0x1000)
+ +#define MVPP22_XPCS_CFG0                      0x0
+ +#define     MVPP22_XPCS_CFG0_PCS_MODE(n)      ((n) << 3)
+ +#define     MVPP22_XPCS_CFG0_ACTIVE_LANE(n)   ((n) << 5)
+ +
+ +/* System controller registers. Accessed through a regmap. */
+ +#define GENCONF_SOFT_RESET1                           0x1108
+ +#define     GENCONF_SOFT_RESET1_GOP                   BIT(6)
+ +#define GENCONF_PORT_CTRL0                            0x1110
+ +#define     GENCONF_PORT_CTRL0_BUS_WIDTH_SELECT               BIT(1)
+ +#define     GENCONF_PORT_CTRL0_RX_DATA_SAMPLE         BIT(29)
+ +#define     GENCONF_PORT_CTRL0_CLK_DIV_PHASE_CLR      BIT(31)
+ +#define GENCONF_PORT_CTRL1                            0x1114
+ +#define     GENCONF_PORT_CTRL1_EN(p)                  BIT(p)
+ +#define     GENCONF_PORT_CTRL1_RESET(p)                       (BIT(p) << 28)
+ +#define GENCONF_CTRL0                                 0x1120
+ +#define     GENCONF_CTRL0_PORT0_RGMII                 BIT(0)
+ +#define     GENCONF_CTRL0_PORT1_RGMII_MII             BIT(1)
+ +#define     GENCONF_CTRL0_PORT1_RGMII                 BIT(2)
+ +
   /* Various constants */
   
   /* Coalescing */
   #define MVPP2_TXDONE_COAL_PKTS_THRESH 15
   #define MVPP2_TXDONE_HRTIMER_PERIOD_NS        1000000UL
+ +#define MVPP2_TXDONE_COAL_USEC                1000
   #define MVPP2_RX_COAL_PKTS            32
   #define MVPP2_RX_COAL_USEC            100
   
@@@ -759,8 -685,7 +759,8 @@@ enum mvpp2_prs_l3_cast 
   #define MVPP21_ADDR_SPACE_SZ          0
   #define MVPP22_ADDR_SPACE_SZ          SZ_64K
   
- -#define MVPP2_MAX_CPUS                        4
+ +#define MVPP2_MAX_THREADS             8
+ +#define MVPP2_MAX_QVECS                       MVPP2_MAX_THREADS
   
   enum mvpp2_bm_type {
         MVPP2_BM_FREE,
@@@ -776,17 -701,11 +776,17 @@@ struct mvpp2 
         void __iomem *lms_base;
         void __iomem *iface_base;
   
- -      /* On PPv2.2, each CPU can access the base register through a
- -       * separate address space, each 64 KB apart from each
- -       * other.
+ +      /* On PPv2.2, each "software thread" can access the base
+ +       * register through a separate address space, each 64 KB apart
+ +       * from each other. Typically, such address spaces will be
+ +       * used per CPU.
+ +       */
+ +      void __iomem *swth_base[MVPP2_MAX_THREADS];
+ +
+ +      /* On PPv2.2, some port control registers are located into the system
+ +       * controller space. These registers are accessible through a regmap.
          */
- -      void __iomem *cpu_base[MVPP2_MAX_CPUS];
+ +      struct regmap *sysctrl_base;
   
         /* Common clocks */
         struct clk *pp_clk;
@@@ -833,18 -752,6 +833,18 @@@ struct mvpp2_port_pcpu 
         struct tasklet_struct tx_done_tasklet;
   };
   
+ +struct mvpp2_queue_vector {
+ +      int irq;
+ +      struct napi_struct napi;
+ +      enum { MVPP2_QUEUE_VECTOR_SHARED, MVPP2_QUEUE_VECTOR_PRIVATE } type;
+ +      int sw_thread_id;
+ +      u16 sw_thread_mask;
+ +      int first_rxq;
+ +      int nrxqs;
+ +      u32 pending_cause_rx;
+ +      struct mvpp2_port *port;
+ +};
+ +
   struct mvpp2_port {
         u8 id;
   
@@@ -853,7 -760,7 +853,7 @@@
          */
         int gop_id;
   
- -      int irq;
+ +      int link_irq;
   
         struct mvpp2 *priv;
   
@@@ -861,13 -768,14 +861,13 @@@
         void __iomem *base;
   
         struct mvpp2_rx_queue **rxqs;
+ +      unsigned int nrxqs;
         struct mvpp2_tx_queue **txqs;
+ +      unsigned int ntxqs;
         struct net_device *dev;
   
         int pkt_size;
   
- -      u32 pending_cause_rx;
- -      struct napi_struct napi;
- -
         /* Per-CPU port control */
         struct mvpp2_port_pcpu __percpu *pcpu;
   
@@@ -880,7 -788,6 +880,7 @@@
   
         phy_interface_t phy_interface;
         struct device_node *phy_node;
+ +      struct phy *comphy;
         unsigned int link;
         unsigned int duplex;
         unsigned int speed;
@@@ -890,12 -797,6 +890,12 @@@
   
         /* Index of first port's physical RXQ */
         u8 first_rxq;
+ +
+ +      struct mvpp2_queue_vector qvecs[MVPP2_MAX_QVECS];
+ +      unsigned int nqvecs;
+ +      bool has_tx_irqs;
+ +
+ +      u32 tx_time_coal;
   };
   
   /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
@@@ -1031,10 -932,6 +1031,10 @@@ struct mvpp2_txq_pcpu 
   
         /* Index of the TX DMA descriptor to be cleaned up */
         int txq_get_index;
+ +
+ +      /* DMA buffer for TSO headers */
+ +      char *tso_headers;
+ +      dma_addr_t tso_headers_dma;
   };
   
   struct mvpp2_tx_queue {
@@@ -1165,14 -1062,12 +1165,14 @@@ struct mvpp2_bm_pool 
         u32 port_map;
   };
   
- -/* Static declaractions */
+ +/* Queue modes */
+ +#define MVPP2_QDIST_SINGLE_MODE       0
+ +#define MVPP2_QDIST_MULTI_MODE        1
   
- -/* Number of RXQs used by single port */
- -static int rxq_number = MVPP2_DEFAULT_RXQ;
- -/* Number of TXQs used by single port */
- -static int txq_number = MVPP2_MAX_TXQ;
+ +static int queue_mode = MVPP2_QDIST_SINGLE_MODE;
+ +
+ +module_param(queue_mode, int, 0444);
+ +MODULE_PARM_DESC(queue_mode, "Set queue_mode (single=0, multi=1)");
   
   #define MVPP2_DRIVER_NAME "mvpp2"
   #define MVPP2_DRIVER_VERSION "1.0"
@@@ -1181,12 -1076,12 +1181,12 @@@
   
   static void mvpp2_write(struct mvpp2 *priv, u32 offset, u32 data)
   {
- -      writel(data, priv->cpu_base[0] + offset);
+ +      writel(data, priv->swth_base[0] + offset);
   }
   
   static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
   {
- -      return readl(priv->cpu_base[0] + offset);
+ +      return readl(priv->swth_base[0] + offset);
   }
   
   /* These accessors should be used to access:
@@@ -1228,13 -1123,13 +1228,13 @@@
   static void mvpp2_percpu_write(struct mvpp2 *priv, int cpu,
                                u32 offset, u32 data)
   {
- -      writel(data, priv->cpu_base[cpu] + offset);
+ +      writel(data, priv->swth_base[cpu] + offset);
   }
   
   static u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu,
                              u32 offset)
   {
- -      return readl(priv->cpu_base[cpu] + offset);
+ +      return readl(priv->swth_base[cpu] + offset);
   }
   
   static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
@@@ -4175,7 -4070,7 +4175,7 @@@ static int mvpp2_swf_bm_pool_init(struc
   
                 port->pool_long->port_map |= (1 << port->id);
   
- -              for (rxq = 0; rxq < rxq_number; rxq++)
+ +              for (rxq = 0; rxq < port->nrxqs; rxq++)
                         mvpp2_rxq_long_pool_set(port, rxq, port->pool_long->id);
         }
   
@@@ -4189,7 -4084,7 +4189,7 @@@
   
                 port->pool_short->port_map |= (1 << port->id);
   
- -              for (rxq = 0; rxq < rxq_number; rxq++)
+ +              for (rxq = 0; rxq < port->nrxqs; rxq++)
                         mvpp2_rxq_short_pool_set(port, rxq,
                                                  port->pool_short->id);
         }
@@@ -4230,40 -4125,22 +4230,40 @@@ static int mvpp2_bm_update_mtu(struct n
   
   static inline void mvpp2_interrupts_enable(struct mvpp2_port *port)
   {
- -      int cpu, cpu_mask = 0;
+ +      int i, sw_thread_mask = 0;
+ +
+ +      for (i = 0; i < port->nqvecs; i++)
+ +              sw_thread_mask |= port->qvecs[i].sw_thread_mask;
   
- -      for_each_present_cpu(cpu)
- -              cpu_mask |= 1 << cpu;
         mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
- -                  MVPP2_ISR_ENABLE_INTERRUPT(cpu_mask));
+ +                  MVPP2_ISR_ENABLE_INTERRUPT(sw_thread_mask));
   }
   
   static inline void mvpp2_interrupts_disable(struct mvpp2_port *port)
   {
- -      int cpu, cpu_mask = 0;
+ +      int i, sw_thread_mask = 0;
+ +
+ +      for (i = 0; i < port->nqvecs; i++)
+ +              sw_thread_mask |= port->qvecs[i].sw_thread_mask;
+ +
+ +      mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
+ +                  MVPP2_ISR_DISABLE_INTERRUPT(sw_thread_mask));
+ +}
+ +
+ +static inline void mvpp2_qvec_interrupt_enable(struct mvpp2_queue_vector *qvec)
+ +{
+ +      struct mvpp2_port *port = qvec->port;
+ +
+ +      mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
+ +                  MVPP2_ISR_ENABLE_INTERRUPT(qvec->sw_thread_mask));
+ +}
+ +
+ +static inline void mvpp2_qvec_interrupt_disable(struct mvpp2_queue_vector *qvec)
+ +{
+ +      struct mvpp2_port *port = qvec->port;
   
- -      for_each_present_cpu(cpu)
- -              cpu_mask |= 1 << cpu;
         mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
- -                  MVPP2_ISR_DISABLE_INTERRUPT(cpu_mask));
+ +                  MVPP2_ISR_DISABLE_INTERRUPT(qvec->sw_thread_mask));
   }
   
   /* Mask the current CPU's Rx/Tx interrupts
@@@ -4285,346 -4162,15 +4285,346 @@@ static void mvpp2_interrupts_mask(void 
   static void mvpp2_interrupts_unmask(void *arg)
   {
         struct mvpp2_port *port = arg;
+ +      u32 val;
+ +
+ +      val = MVPP2_CAUSE_MISC_SUM_MASK |
+ +              MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+ +      if (port->has_tx_irqs)
+ +              val |= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
   
         mvpp2_percpu_write(port->priv, smp_processor_id(),
- -                         MVPP2_ISR_RX_TX_MASK_REG(port->id),
- -                         (MVPP2_CAUSE_MISC_SUM_MASK |
- -                          MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK));
+ +                         MVPP2_ISR_RX_TX_MASK_REG(port->id), val);
+ +}
+ +
+ +static void
+ +mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask)
+ +{
+ +      u32 val;
+ +      int i;
+ +
+ +      if (port->priv->hw_version != MVPP22)
+ +              return;
+ +
+ +      if (mask)
+ +              val = 0;
+ +      else
+ +              val = MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+ +
+ +      for (i = 0; i < port->nqvecs; i++) {
+ +              struct mvpp2_queue_vector *v = port->qvecs + i;
+ +
+ +              if (v->type != MVPP2_QUEUE_VECTOR_SHARED)
+ +                      continue;
+ +
+ +              mvpp2_percpu_write(port->priv, v->sw_thread_id,
+ +                                 MVPP2_ISR_RX_TX_MASK_REG(port->id), val);
+ +      }
   }
   
   /* Port configuration routines */
   
+ +static void mvpp22_gop_init_rgmii(struct mvpp2_port *port)
+ +{
+ +      struct mvpp2 *priv = port->priv;
+ +      u32 val;
+ +
+ +      regmap_read(priv->sysctrl_base, GENCONF_PORT_CTRL0, &val);
+ +      val |= GENCONF_PORT_CTRL0_BUS_WIDTH_SELECT;
+ +      regmap_write(priv->sysctrl_base, GENCONF_PORT_CTRL0, val);
+ +
+ +      regmap_read(priv->sysctrl_base, GENCONF_CTRL0, &val);
+ +      if (port->gop_id == 2)
+ +              val |= GENCONF_CTRL0_PORT0_RGMII | GENCONF_CTRL0_PORT1_RGMII;
+ +      else if (port->gop_id == 3)
+ +              val |= GENCONF_CTRL0_PORT1_RGMII_MII;
+ +      regmap_write(priv->sysctrl_base, GENCONF_CTRL0, val);
+ +}
+ +
+ +static void mvpp22_gop_init_sgmii(struct mvpp2_port *port)
+ +{
+ +      struct mvpp2 *priv = port->priv;
+ +      u32 val;
+ +
+ +      regmap_read(priv->sysctrl_base, GENCONF_PORT_CTRL0, &val);
+ +      val |= GENCONF_PORT_CTRL0_BUS_WIDTH_SELECT |
+ +             GENCONF_PORT_CTRL0_RX_DATA_SAMPLE;
+ +      regmap_write(priv->sysctrl_base, GENCONF_PORT_CTRL0, val);
+ +
+ +      if (port->gop_id > 1) {
+ +              regmap_read(priv->sysctrl_base, GENCONF_CTRL0, &val);
+ +              if (port->gop_id == 2)
+ +                      val &= ~GENCONF_CTRL0_PORT0_RGMII;
+ +              else if (port->gop_id == 3)
+ +                      val &= ~GENCONF_CTRL0_PORT1_RGMII_MII;
+ +              regmap_write(priv->sysctrl_base, GENCONF_CTRL0, val);
+ +      }
+ +}
+ +
+ +static void mvpp22_gop_init_10gkr(struct mvpp2_port *port)
+ +{
+ +      struct mvpp2 *priv = port->priv;
+ +      void __iomem *mpcs = priv->iface_base + MVPP22_MPCS_BASE(port->gop_id);
+ +      void __iomem *xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id);
+ +      u32 val;
+ +
+ +      /* XPCS */
+ +      val = readl(xpcs + MVPP22_XPCS_CFG0);
+ +      val &= ~(MVPP22_XPCS_CFG0_PCS_MODE(0x3) |
+ +               MVPP22_XPCS_CFG0_ACTIVE_LANE(0x3));
+ +      val |= MVPP22_XPCS_CFG0_ACTIVE_LANE(2);
+ +      writel(val, xpcs + MVPP22_XPCS_CFG0);
+ +
+ +      /* MPCS */
+ +      val = readl(mpcs + MVPP22_MPCS_CTRL);
+ +      val &= ~MVPP22_MPCS_CTRL_FWD_ERR_CONN;
+ +      writel(val, mpcs + MVPP22_MPCS_CTRL);
+ +
+ +      val = readl(mpcs + MVPP22_MPCS_CLK_RESET);
+ +      val &= ~(MVPP22_MPCS_CLK_RESET_DIV_RATIO(0x7) | MAC_CLK_RESET_MAC |
+ +               MAC_CLK_RESET_SD_RX | MAC_CLK_RESET_SD_TX);
+ +      val |= MVPP22_MPCS_CLK_RESET_DIV_RATIO(1);
+ +      writel(val, mpcs + MVPP22_MPCS_CLK_RESET);
+ +
+ +      val &= ~MVPP22_MPCS_CLK_RESET_DIV_SET;
+ +      val |= MAC_CLK_RESET_MAC | MAC_CLK_RESET_SD_RX | MAC_CLK_RESET_SD_TX;
+ +      writel(val, mpcs + MVPP22_MPCS_CLK_RESET);
+ +}
+ +
+ +static int mvpp22_gop_init(struct mvpp2_port *port)
+ +{
+ +      struct mvpp2 *priv = port->priv;
+ +      u32 val;
+ +
+ +      if (!priv->sysctrl_base)
+ +              return 0;
+ +
+ +      switch (port->phy_interface) {
+ +      case PHY_INTERFACE_MODE_RGMII:
+ +      case PHY_INTERFACE_MODE_RGMII_ID:
+ +      case PHY_INTERFACE_MODE_RGMII_RXID:
+ +      case PHY_INTERFACE_MODE_RGMII_TXID:
+ +              if (port->gop_id == 0)
+ +                      goto invalid_conf;
+ +              mvpp22_gop_init_rgmii(port);
+ +              break;
+ +      case PHY_INTERFACE_MODE_SGMII:
+ +              mvpp22_gop_init_sgmii(port);
+ +              break;
+ +      case PHY_INTERFACE_MODE_10GKR:
+ +              if (port->gop_id != 0)
+ +                      goto invalid_conf;
+ +              mvpp22_gop_init_10gkr(port);
+ +              break;
+ +      default:
+ +              goto unsupported_conf;
+ +      }
+ +
+ +      regmap_read(priv->sysctrl_base, GENCONF_PORT_CTRL1, &val);
+ +      val |= GENCONF_PORT_CTRL1_RESET(port->gop_id) |
+ +             GENCONF_PORT_CTRL1_EN(port->gop_id);
+ +      regmap_write(priv->sysctrl_base, GENCONF_PORT_CTRL1, val);
+ +
+ +      regmap_read(priv->sysctrl_base, GENCONF_PORT_CTRL0, &val);
+ +      val |= GENCONF_PORT_CTRL0_CLK_DIV_PHASE_CLR;
+ +      regmap_write(priv->sysctrl_base, GENCONF_PORT_CTRL0, val);
+ +
+ +      regmap_read(priv->sysctrl_base, GENCONF_SOFT_RESET1, &val);
+ +      val |= GENCONF_SOFT_RESET1_GOP;
+ +      regmap_write(priv->sysctrl_base, GENCONF_SOFT_RESET1, val);
+ +
+ +unsupported_conf:
+ +      return 0;
+ +
+ +invalid_conf:
+ +      netdev_err(port->dev, "Invalid port configuration\n");
+ +      return -EINVAL;
+ +}
+ +
+ +static void mvpp22_gop_unmask_irq(struct mvpp2_port *port)
+ +{
+ +      u32 val;
+ +
+ +      if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+ +          port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ +              /* Enable the GMAC link status irq for this port */
+ +              val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK);
+ +              val |= MVPP22_GMAC_INT_SUM_MASK_LINK_STAT;
+ +              writel(val, port->base + MVPP22_GMAC_INT_SUM_MASK);
+ +      }
+ +
+ +      if (port->gop_id == 0) {
+ +              /* Enable the XLG/GIG irqs for this port */
+ +              val = readl(port->base + MVPP22_XLG_EXT_INT_MASK);
+ +              if (port->phy_interface == PHY_INTERFACE_MODE_10GKR)
+ +                      val |= MVPP22_XLG_EXT_INT_MASK_XLG;
+ +              else
+ +                      val |= MVPP22_XLG_EXT_INT_MASK_GIG;
+ +              writel(val, port->base + MVPP22_XLG_EXT_INT_MASK);
+ +      }
+ +}
+ +
+ +static void mvpp22_gop_mask_irq(struct mvpp2_port *port)
+ +{
+ +      u32 val;
+ +
+ +      if (port->gop_id == 0) {
+ +              val = readl(port->base + MVPP22_XLG_EXT_INT_MASK);
+ +              val &= ~(MVPP22_XLG_EXT_INT_MASK_XLG |
+ +                       MVPP22_XLG_EXT_INT_MASK_GIG);
+ +              writel(val, port->base + MVPP22_XLG_EXT_INT_MASK);
+ +      }
+ +
+ +      if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+ +          port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ +              val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK);
+ +              val &= ~MVPP22_GMAC_INT_SUM_MASK_LINK_STAT;
+ +              writel(val, port->base + MVPP22_GMAC_INT_SUM_MASK);
+ +      }
+ +}
+ +
+ +static void mvpp22_gop_setup_irq(struct mvpp2_port *port)
+ +{
+ +      u32 val;
+ +
+ +      if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+ +          port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ +              val = readl(port->base + MVPP22_GMAC_INT_MASK);
+ +              val |= MVPP22_GMAC_INT_MASK_LINK_STAT;
+ +              writel(val, port->base + MVPP22_GMAC_INT_MASK);
+ +      }
+ +
+ +      if (port->gop_id == 0) {
+ +              val = readl(port->base + MVPP22_XLG_INT_MASK);
+ +              val |= MVPP22_XLG_INT_MASK_LINK;
+ +              writel(val, port->base + MVPP22_XLG_INT_MASK);
+ +      }
+ +
+ +      mvpp22_gop_unmask_irq(port);
+ +}
+ +
+ +static int mvpp22_comphy_init(struct mvpp2_port *port)
+ +{
+ +      enum phy_mode mode;
+ +      int ret;
+ +
+ +      if (!port->comphy)
+ +              return 0;
+ +
+ +      switch (port->phy_interface) {
+ +      case PHY_INTERFACE_MODE_SGMII:
+ +              mode = PHY_MODE_SGMII;
+ +              break;
+ +      case PHY_INTERFACE_MODE_10GKR:
+ +              mode = PHY_MODE_10GKR;
+ +              break;
+ +      default:
+ +              return -EINVAL;
+ +      }
+ +
+ +      ret = phy_set_mode(port->comphy, mode);
+ +      if (ret)
+ +              return ret;
+ +
+ +      return phy_power_on(port->comphy);
+ +}
+ +
+ +static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port)
+ +{
+ +      u32 val;
+ +
+ +      if (port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ +              val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
+ +              val |= MVPP22_CTRL4_SYNC_BYPASS_DIS | MVPP22_CTRL4_DP_CLK_SEL |
+ +                     MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+ +              val &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
+ +              writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
+ +
+ +              val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+ +              val |= MVPP2_GMAC_DISABLE_PADDING;
+ +              val &= ~MVPP2_GMAC_FLOW_CTRL_MASK;
+ +              writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+ +      } else if (phy_interface_mode_is_rgmii(port->phy_interface)) {
+ +              val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
+ +              val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL |
+ +                     MVPP22_CTRL4_SYNC_BYPASS_DIS |
+ +                     MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+ +              val &= ~MVPP22_CTRL4_DP_CLK_SEL;
+ +              writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
+ +
+ +              val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+ +              val &= ~MVPP2_GMAC_DISABLE_PADDING;
+ +              writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+ +      }
+ +
+ +      /* The port is connected to a copper PHY */
+ +      val = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
+ +      val &= ~MVPP2_GMAC_PORT_TYPE_MASK;
+ +      writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
+ +
+ +      val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +      val |= MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS |
+ +             MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG |
+ +             MVPP2_GMAC_AN_DUPLEX_EN;
+ +      if (port->phy_interface == PHY_INTERFACE_MODE_SGMII)
+ +              val |= MVPP2_GMAC_IN_BAND_AUTONEG;
+ +      writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +}
+ +
+ +static void mvpp2_port_mii_gmac_configure(struct mvpp2_port *port)
+ +{
+ +      u32 val;
+ +
+ +      /* Force link down */
+ +      val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +      val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+ +      val |= MVPP2_GMAC_FORCE_LINK_DOWN;
+ +      writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +
+ +      /* Set the GMAC in a reset state */
+ +      val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+ +      val |= MVPP2_GMAC_PORT_RESET_MASK;
+ +      writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+ +
+ +      /* Configure the PCS and in-band AN */
+ +      val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+ +      if (port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ +              val |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK;
+ +      } else if (phy_interface_mode_is_rgmii(port->phy_interface)) {
+ +              val &= ~MVPP2_GMAC_PCS_ENABLE_MASK;
+ +              val |= MVPP2_GMAC_PORT_RGMII_MASK;
+ +      }
+ +      writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+ +
+ +      mvpp2_port_mii_gmac_configure_mode(port);
+ +
+ +      /* Unset the GMAC reset state */
+ +      val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+ +      val &= ~MVPP2_GMAC_PORT_RESET_MASK;
+ +      writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+ +
+ +      /* Stop forcing link down */
+ +      val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +      val &= ~MVPP2_GMAC_FORCE_LINK_DOWN;
+ +      writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +}
+ +
+ +static void mvpp2_port_mii_xlg_configure(struct mvpp2_port *port)
+ +{
+ +      u32 val;
+ +
+ +      if (port->gop_id != 0)
+ +              return;
+ +
+ +      val = readl(port->base + MVPP22_XLG_CTRL0_REG);
+ +      val |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN;
+ +      writel(val, port->base + MVPP22_XLG_CTRL0_REG);
+ +
+ +      val = readl(port->base + MVPP22_XLG_CTRL4_REG);
+ +      val &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC;
+ +      val |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC;
+ +      writel(val, port->base + MVPP22_XLG_CTRL4_REG);
+ +}
+ +
   static void mvpp22_port_mii_set(struct mvpp2_port *port)
   {
         u32 val;
@@@ -4642,18 -4188,38 +4642,18 @@@
   
                 writel(val, port->base + MVPP22_XLG_CTRL3_REG);
         }
- -
- -      val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
- -      if (port->phy_interface == PHY_INTERFACE_MODE_RGMII)
- -              val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL;
- -      else
- -              val &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
- -      val &= ~MVPP22_CTRL4_DP_CLK_SEL;
- -      val |= MVPP22_CTRL4_SYNC_BYPASS;
- -      val |= MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
- -      writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
   }
   
   static void mvpp2_port_mii_set(struct mvpp2_port *port)
   {
- -      u32 val;
- -
         if (port->priv->hw_version == MVPP22)
                 mvpp22_port_mii_set(port);
   
- -      val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
- -
- -      switch (port->phy_interface) {
- -      case PHY_INTERFACE_MODE_SGMII:
- -              val |= MVPP2_GMAC_INBAND_AN_MASK;
- -              break;
- -      case PHY_INTERFACE_MODE_RGMII:
- -              val |= MVPP2_GMAC_PORT_RGMII_MASK;
- -      default:
- -              val &= ~MVPP2_GMAC_PCS_ENABLE_MASK;
- -      }
- -
- -      writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+ +      if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+ +          port->phy_interface == PHY_INTERFACE_MODE_SGMII)
+ +              mvpp2_port_mii_gmac_configure(port);
+ +      else if (port->phy_interface == PHY_INTERFACE_MODE_10GKR)
+ +              mvpp2_port_mii_xlg_configure(port);
   }
   
   static void mvpp2_port_fc_adv_enable(struct mvpp2_port *port)
@@@ -4760,18 -4326,6 +4760,18 @@@ static inline void mvpp2_gmac_max_rx_si
         writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
   }
   
+ +/* Change maximum receive size of the port */
+ +static inline void mvpp2_xlg_max_rx_size_set(struct mvpp2_port *port)
+ +{
+ +      u32 val;
+ +
+ +      val =  readl(port->base + MVPP22_XLG_CTRL1_REG);
+ +      val &= ~MVPP22_XLG_CTRL1_FRAMESIZELIMIT_MASK;
+ +      val |= ((port->pkt_size - MVPP2_MH_SIZE) / 2) <<
+ +             MVPP22_XLG_CTRL1_FRAMESIZELIMIT_OFFS;
+ +      writel(val, port->base + MVPP22_XLG_CTRL1_REG);
+ +}
+ +
   /* Set defaults to the MVPP2 port */
   static void mvpp2_defaults_set(struct mvpp2_port *port)
   {
@@@ -4822,7 -4376,7 +4822,7 @@@
                     MVPP2_RX_LOW_LATENCY_PKT_SIZE(256));
   
         /* Enable Rx cache snoop */
- -      for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+ +      for (lrxq = 0; lrxq < port->nrxqs; lrxq++) {
                 queue = port->rxqs[lrxq]->id;
                 val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
                 val |= MVPP2_SNOOP_PKT_SIZE_MASK |
@@@ -4840,7 -4394,7 +4840,7 @@@ static void mvpp2_ingress_enable(struc
         u32 val;
         int lrxq, queue;
   
- -      for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+ +      for (lrxq = 0; lrxq < port->nrxqs; lrxq++) {
                 queue = port->rxqs[lrxq]->id;
                 val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
                 val &= ~MVPP2_RXQ_DISABLE_MASK;
@@@ -4853,7 -4407,7 +4853,7 @@@ static void mvpp2_ingress_disable(struc
         u32 val;
         int lrxq, queue;
   
- -      for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+ +      for (lrxq = 0; lrxq < port->nrxqs; lrxq++) {
                 queue = port->rxqs[lrxq]->id;
                 val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
                 val |= MVPP2_RXQ_DISABLE_MASK;
@@@ -4872,7 -4426,7 +4872,7 @@@ static void mvpp2_egress_enable(struct 
   
         /* Enable all initialized TXs. */
         qmap = 0;
- -      for (queue = 0; queue < txq_number; queue++) {
+ +      for (queue = 0; queue < port->ntxqs; queue++) {
                 struct mvpp2_tx_queue *txq = port->txqs[queue];
   
                 if (txq->descs)
@@@ -5158,7 -4712,7 +5158,7 @@@ static void mvpp2_txq_sent_counter_clea
         struct mvpp2_port *port = arg;
         int queue;
   
- -      for (queue = 0; queue < txq_number; queue++) {
+ +      for (queue = 0; queue < port->ntxqs; queue++) {
                 int id = port->txqs[queue]->id;
   
                 mvpp2_percpu_read(port->priv, smp_processor_id(),
@@@ -5199,7 -4753,7 +5199,7 @@@ static void mvpp2_txp_max_tx_size_set(s
                 mvpp2_write(port->priv, MVPP2_TXP_SCHED_TOKEN_SIZE_REG, val);
         }
   
- -      for (txq = 0; txq < txq_number; txq++) {
+ +      for (txq = 0; txq < port->ntxqs; txq++) {
                 val = mvpp2_read(port->priv,
                                  MVPP2_TXQ_SCHED_TOKEN_SIZE_REG(txq));
                 size = val & MVPP2_TXQ_TOKEN_SIZE_MAX;
@@@ -5233,23 -4787,6 +5233,23 @@@ static void mvpp2_rx_pkts_coal_set(stru
         put_cpu();
   }
   
+ +/* For some reason in the LSP this is done on each CPU. Why ? */
+ +static void mvpp2_tx_pkts_coal_set(struct mvpp2_port *port,
+ +                                 struct mvpp2_tx_queue *txq)
+ +{
+ +      int cpu = get_cpu();
+ +      u32 val;
+ +
+ +      if (txq->done_pkts_coal > MVPP2_TXQ_THRESH_MASK)
+ +              txq->done_pkts_coal = MVPP2_TXQ_THRESH_MASK;
+ +
+ +      val = (txq->done_pkts_coal << MVPP2_TXQ_THRESH_OFFSET);
+ +      mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
+ +      mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_THRESH_REG, val);
+ +
+ +      put_cpu();
+ +}
+ +
   static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz)
   {
         u64 tmp = (u64)clk_hz * usec;
@@@ -5286,22 -4823,6 +5286,22 @@@ static void mvpp2_rx_time_coal_set(stru
         mvpp2_write(port->priv, MVPP2_ISR_RX_THRESHOLD_REG(rxq->id), val);
   }
   
+ +static void mvpp2_tx_time_coal_set(struct mvpp2_port *port)
+ +{
+ +      unsigned long freq = port->priv->tclk;
+ +      u32 val = mvpp2_usec_to_cycles(port->tx_time_coal, freq);
+ +
+ +      if (val > MVPP2_MAX_ISR_TX_THRESHOLD) {
+ +              port->tx_time_coal =
+ +                      mvpp2_cycles_to_usec(MVPP2_MAX_ISR_TX_THRESHOLD, freq);
+ +
+ +              /* re-evaluate to get actual register value */
+ +              val = mvpp2_usec_to_cycles(port->tx_time_coal, freq);
+ +      }
+ +
+ +      mvpp2_write(port->priv, MVPP2_ISR_TX_THRESHOLD_REG(port->id), val);
+ +}
+ +
   /* Free Tx queue skbuffs */
   static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
                                 struct mvpp2_tx_queue *txq,
@@@ -5360,8 -4881,7 +5360,8 @@@ static void mvpp2_txq_done(struct mvpp2
                         netif_tx_wake_queue(nq);
   }
   
- -static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause)
+ +static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause,
+ +                                int cpu)
   {
         struct mvpp2_tx_queue *txq;
         struct mvpp2_txq_pcpu *txq_pcpu;
@@@ -5372,7 -4892,7 +5372,7 @@@
                 if (!txq)
                         break;
   
- -              txq_pcpu = this_cpu_ptr(txq->pcpu);
+ +              txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
   
                 if (txq_pcpu->count) {
                         mvpp2_txq_done(port, txq, txq_pcpu);
@@@ -5388,14 -4908,15 +5388,14 @@@
   
   /* Allocate and initialize descriptors for aggr TXQ */
   static int mvpp2_aggr_txq_init(struct platform_device *pdev,
- -                             struct mvpp2_tx_queue *aggr_txq,
- -                             int desc_num, int cpu,
+ +                             struct mvpp2_tx_queue *aggr_txq, int cpu,
                                struct mvpp2 *priv)
   {
         u32 txq_dma;
   
         /* Allocate memory for TX descriptors */
         aggr_txq->descs = dma_alloc_coherent(&pdev->dev,
- -                              desc_num * MVPP2_DESC_ALIGNED_SIZE,
+ +                              MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
                                 &aggr_txq->descs_dma, GFP_KERNEL);
         if (!aggr_txq->descs)
                 return -ENOMEM;
@@@ -5416,8 -4937,7 +5416,8 @@@
                         MVPP22_AGGR_TXQ_DESC_ADDR_OFFS;
   
         mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu), txq_dma);
- -      mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu), desc_num);
+ +      mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu),
+ +                  MVPP2_AGGR_TXQ_SIZE);
   
         return 0;
   }
@@@ -5598,14 -5118,6 +5598,14 @@@ static int mvpp2_txq_init(struct mvpp2_
                 txq_pcpu->reserved_num = 0;
                 txq_pcpu->txq_put_index = 0;
                 txq_pcpu->txq_get_index = 0;
+ +
+ +              txq_pcpu->tso_headers =
+ +                      dma_alloc_coherent(port->dev->dev.parent,
+ +                                         MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE,
+ +                                         &txq_pcpu->tso_headers_dma,
+ +                                         GFP_KERNEL);
+ +              if (!txq_pcpu->tso_headers)
+ +                      goto cleanup;
         }
   
         return 0;
@@@ -5613,11 -5125,6 +5613,11 @@@ cleanup
         for_each_present_cpu(cpu) {
                 txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
                 kfree(txq_pcpu->buffs);
+ +
+ +              dma_free_coherent(port->dev->dev.parent,
+ +                                MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+ +                                txq_pcpu->tso_headers,
+ +                                txq_pcpu->tso_headers_dma);
         }
   
         dma_free_coherent(port->dev->dev.parent,
@@@ -5637,11 -5144,6 +5637,11 @@@ static void mvpp2_txq_deinit(struct mvp
         for_each_present_cpu(cpu) {
                 txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
                 kfree(txq_pcpu->buffs);
+ +
+ +              dma_free_coherent(port->dev->dev.parent,
+ +                                MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+ +                                txq_pcpu->tso_headers,
+ +                                txq_pcpu->tso_headers_dma);
         }
   
         if (txq->descs)
@@@ -5727,7 -5229,7 +5727,7 @@@ static void mvpp2_cleanup_txqs(struct m
         val |= MVPP2_TX_PORT_FLUSH_MASK(port->id);
         mvpp2_write(port->priv, MVPP2_TX_PORT_FLUSH_REG, val);
   
- -      for (queue = 0; queue < txq_number; queue++) {
+ +      for (queue = 0; queue < port->ntxqs; queue++) {
                 txq = port->txqs[queue];
                 mvpp2_txq_clean(port, txq);
                 mvpp2_txq_deinit(port, txq);
@@@ -5744,7 -5246,7 +5744,7 @@@ static void mvpp2_cleanup_rxqs(struct m
   {
         int queue;
   
- -      for (queue = 0; queue < rxq_number; queue++)
+ +      for (queue = 0; queue < port->nrxqs; queue++)
                 mvpp2_rxq_deinit(port, port->rxqs[queue]);
   }
   
@@@ -5753,7 -5255,7 +5753,7 @@@ static int mvpp2_setup_rxqs(struct mvpp
   {
         int queue, err;
   
- -      for (queue = 0; queue < rxq_number; queue++) {
+ +      for (queue = 0; queue < port->nrxqs; queue++) {
                 err = mvpp2_rxq_init(port, port->rxqs[queue]);
                 if (err)
                         goto err_cleanup;
@@@ -5771,21 -5273,13 +5771,21 @@@ static int mvpp2_setup_txqs(struct mvpp
         struct mvpp2_tx_queue *txq;
         int queue, err;
   
- -      for (queue = 0; queue < txq_number; queue++) {
+ +      for (queue = 0; queue < port->ntxqs; queue++) {
                 txq = port->txqs[queue];
                 err = mvpp2_txq_init(port, txq);
                 if (err)
                         goto err_cleanup;
         }
   
+ +      if (port->has_tx_irqs) {
+ +              mvpp2_tx_time_coal_set(port);
+ +              for (queue = 0; queue < port->ntxqs; queue++) {
+ +                      txq = port->txqs[queue];
+ +                      mvpp2_tx_pkts_coal_set(port, txq);
+ +              }
+ +      }
+ +
         on_each_cpu(mvpp2_txq_sent_counter_clear, port, 1);
         return 0;
   
@@@ -5797,170 -5291,72 +5797,170 @@@ err_cleanup
   /* The callback for per-port interrupt */
   static irqreturn_t mvpp2_isr(int irq, void *dev_id)
   {
- -      struct mvpp2_port *port = (struct mvpp2_port *)dev_id;
+ +      struct mvpp2_queue_vector *qv = dev_id;
   
- -      mvpp2_interrupts_disable(port);
+ +      mvpp2_qvec_interrupt_disable(qv);
   
- -      napi_schedule(&port->napi);
+ +      napi_schedule(&qv->napi);
   
         return IRQ_HANDLED;
   }
   
- -/* Adjust link */
- -static void mvpp2_link_event(struct net_device *dev)
+ +/* Per-port interrupt for link status changes */
+ +static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
   {
- -      struct mvpp2_port *port = netdev_priv(dev);
- -      struct phy_device *phydev = dev->phydev;
- -      int status_change = 0;
+ +      struct mvpp2_port *port = (struct mvpp2_port *)dev_id;
+ +      struct net_device *dev = port->dev;
+ +      bool event = false, link = false;
         u32 val;
   
- -      if (phydev->link) {
- -              if ((port->speed != phydev->speed) ||
- -                  (port->duplex != phydev->duplex)) {
- -                      u32 val;
+ +      mvpp22_gop_mask_irq(port);
   
- -                      val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
- -                      val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
- -                               MVPP2_GMAC_CONFIG_GMII_SPEED |
- -                               MVPP2_GMAC_CONFIG_FULL_DUPLEX |
- -                               MVPP2_GMAC_AN_SPEED_EN |
- -                               MVPP2_GMAC_AN_DUPLEX_EN);
+ +      if (port->gop_id == 0 &&
+ +          port->phy_interface == PHY_INTERFACE_MODE_10GKR) {
+ +              val = readl(port->base + MVPP22_XLG_INT_STAT);
+ +              if (val & MVPP22_XLG_INT_STAT_LINK) {
+ +                      event = true;
+ +                      val = readl(port->base + MVPP22_XLG_STATUS);
+ +                      if (val & MVPP22_XLG_STATUS_LINK_UP)
+ +                              link = true;
+ +              }
+ +      } else if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+ +                 port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ +              val = readl(port->base + MVPP22_GMAC_INT_STAT);
+ +              if (val & MVPP22_GMAC_INT_STAT_LINK) {
+ +                      event = true;
+ +                      val = readl(port->base + MVPP2_GMAC_STATUS0);
+ +                      if (val & MVPP2_GMAC_STATUS0_LINK_UP)
+ +                              link = true;
+ +              }
+ +      }
   
- -                      if (phydev->duplex)
- -                              val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+ +      if (!netif_running(dev) || !event)
+ +              goto handled;
   
- -                      if (phydev->speed == SPEED_1000)
- -                              val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
- -                      else if (phydev->speed == SPEED_100)
- -                              val |= MVPP2_GMAC_CONFIG_MII_SPEED;
+ +      if (link) {
+ +              mvpp2_interrupts_enable(port);
   
- -                      writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +              mvpp2_egress_enable(port);
+ +              mvpp2_ingress_enable(port);
+ +              netif_carrier_on(dev);
+ +              netif_tx_wake_all_queues(dev);
+ +      } else {
+ +              netif_tx_stop_all_queues(dev);
+ +              netif_carrier_off(dev);
+ +              mvpp2_ingress_disable(port);
+ +              mvpp2_egress_disable(port);
   
- -                      port->duplex = phydev->duplex;
- -                      port->speed  = phydev->speed;
- -              }
+ +              mvpp2_interrupts_disable(port);
         }
   
- -      if (phydev->link != port->link) {
- -              if (!phydev->link) {
- -                      port->duplex = -1;
- -                      port->speed = 0;
+ +handled:
+ +      mvpp22_gop_unmask_irq(port);
+ +      return IRQ_HANDLED;
+ +}
+ +
+ +static void mvpp2_gmac_set_autoneg(struct mvpp2_port *port,
+ +                                 struct phy_device *phydev)
+ +{
+ +      u32 val;
+ +
+ +      if (port->phy_interface != PHY_INTERFACE_MODE_RGMII &&
+ +          port->phy_interface != PHY_INTERFACE_MODE_RGMII_ID &&
+ +          port->phy_interface != PHY_INTERFACE_MODE_RGMII_RXID &&
+ +          port->phy_interface != PHY_INTERFACE_MODE_RGMII_TXID &&
+ +          port->phy_interface != PHY_INTERFACE_MODE_SGMII)
+ +              return;
+ +
+ +      val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +      val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
+ +               MVPP2_GMAC_CONFIG_GMII_SPEED |
+ +               MVPP2_GMAC_CONFIG_FULL_DUPLEX |
+ +               MVPP2_GMAC_AN_SPEED_EN |
+ +               MVPP2_GMAC_AN_DUPLEX_EN);
+ +
+ +      if (phydev->duplex)
+ +              val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+ +
+ +      if (phydev->speed == SPEED_1000)
+ +              val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
+ +      else if (phydev->speed == SPEED_100)
+ +              val |= MVPP2_GMAC_CONFIG_MII_SPEED;
+ +
+ +      writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +}
+ +
+ +/* Adjust link */
+ +static void mvpp2_link_event(struct net_device *dev)
+ +{
+ +      struct mvpp2_port *port = netdev_priv(dev);
+ +      struct phy_device *phydev = dev->phydev;
+ +      bool link_reconfigured = false;
+ +      u32 val;
+ +
+ +      if (phydev->link) {
+ +              if (port->phy_interface != phydev->interface && port->comphy) {
+ +                      /* disable current port for reconfiguration */
+ +                      mvpp2_interrupts_disable(port);
+ +                      netif_carrier_off(port->dev);
+ +                      mvpp2_port_disable(port);
+ +                      phy_power_off(port->comphy);
+ +
+ +                      /* comphy reconfiguration */
+ +                      port->phy_interface = phydev->interface;
+ +                      mvpp22_comphy_init(port);
+ +
+ +                      /* gop/mac reconfiguration */
+ +                      mvpp22_gop_init(port);
+ +                      mvpp2_port_mii_set(port);
+ +
+ +                      link_reconfigured = true;
                 }
   
- -              port->link = phydev->link;
- -              status_change = 1;
+ +              if ((port->speed != phydev->speed) ||
+ +                  (port->duplex != phydev->duplex)) {
+ +                      mvpp2_gmac_set_autoneg(port, phydev);
+ +
+ +                      port->duplex = phydev->duplex;
+ +                      port->speed  = phydev->speed;
+ +              }
         }
   
- -      if (status_change) {
+ +      if (phydev->link != port->link || link_reconfigured) {
+ +              port->link = phydev->link;
+ +
                 if (phydev->link) {
- -                      val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
- -                      val |= (MVPP2_GMAC_FORCE_LINK_PASS |
- -                              MVPP2_GMAC_FORCE_LINK_DOWN);
- -                      writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +                      if (port->phy_interface == PHY_INTERFACE_MODE_RGMII ||
+ +                          port->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ +                          port->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID ||
+ +                          port->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID ||
+ +                          port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ +                              val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +                              val |= (MVPP2_GMAC_FORCE_LINK_PASS |
+ +                                      MVPP2_GMAC_FORCE_LINK_DOWN);
+ +                              writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ +                      }
+ +
+ +                      mvpp2_interrupts_enable(port);
+ +                      mvpp2_port_enable(port);
+ +
                         mvpp2_egress_enable(port);
                         mvpp2_ingress_enable(port);
+ +                      netif_carrier_on(dev);
+ +                      netif_tx_wake_all_queues(dev);
                 } else {
+ +                      port->duplex = -1;
+ +                      port->speed = 0;
+ +
+ +                      netif_tx_stop_all_queues(dev);
+ +                      netif_carrier_off(dev);
                         mvpp2_ingress_disable(port);
                         mvpp2_egress_disable(port);
+ +
+ +                      mvpp2_port_disable(port);
+ +                      mvpp2_interrupts_disable(port);
                 }
+ +
                 phy_print_status(phydev);
         }
   }
@@@ -5989,8 -5385,8 +5989,8 @@@ static void mvpp2_tx_proc_cb(unsigned l
         port_pcpu->timer_scheduled = false;
   
         /* Process all the Tx queues */
- -      cause = (1 << txq_number) - 1;
- -      tx_todo = mvpp2_tx_done(port, cause);
+ +      cause = (1 << port->ntxqs) - 1;
+ +      tx_todo = mvpp2_tx_done(port, cause, smp_processor_id());
   
         /* Set the timer in case not all the packets were processed */
         if (tx_todo)
@@@ -6102,8 -5498,8 +6102,8 @@@ static u32 mvpp2_skb_tx_csum(struct mvp
   }
   
   /* Main rx processing */
- -static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
- -                  struct mvpp2_rx_queue *rxq)
+ +static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
+ +                  int rx_todo, struct mvpp2_rx_queue *rxq)
   {
         struct net_device *dev = port->dev;
         int rx_received;
@@@ -6181,7 -5577,7 +6181,7 @@@ err_drop_frame
                 skb->protocol = eth_type_trans(skb, dev);
                 mvpp2_rx_csum(port, rx_status, skb);
   
- -              napi_gro_receive(&port->napi, skb);
+ +              napi_gro_receive(napi, skb);
         }
   
         if (rcvd_pkts) {
@@@ -6269,123 -5665,6 +6269,123 @@@ cleanup
         return -ENOMEM;
   }
   
+ +static inline void mvpp2_tso_put_hdr(struct sk_buff *skb,
+ +                                   struct net_device *dev,
+ +                                   struct mvpp2_tx_queue *txq,
+ +                                   struct mvpp2_tx_queue *aggr_txq,
+ +                                   struct mvpp2_txq_pcpu *txq_pcpu,
+ +                                   int hdr_sz)
+ +{
+ +      struct mvpp2_port *port = netdev_priv(dev);
+ +      struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+ +      dma_addr_t addr;
+ +
+ +      mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+ +      mvpp2_txdesc_size_set(port, tx_desc, hdr_sz);
+ +
+ +      addr = txq_pcpu->tso_headers_dma +
+ +             txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
+ +      mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN);
+ +      mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN);
+ +
+ +      mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) |
+ +                                          MVPP2_TXD_F_DESC |
+ +                                          MVPP2_TXD_PADDING_DISABLE);
+ +      mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
+ +}
+ +
+ +static inline int mvpp2_tso_put_data(struct sk_buff *skb,
+ +                                   struct net_device *dev, struct tso_t *tso,
+ +                                   struct mvpp2_tx_queue *txq,
+ +                                   struct mvpp2_tx_queue *aggr_txq,
+ +                                   struct mvpp2_txq_pcpu *txq_pcpu,
+ +                                   int sz, bool left, bool last)
+ +{
+ +      struct mvpp2_port *port = netdev_priv(dev);
+ +      struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+ +      dma_addr_t buf_dma_addr;
+ +
+ +      mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+ +      mvpp2_txdesc_size_set(port, tx_desc, sz);
+ +
+ +      buf_dma_addr = dma_map_single(dev->dev.parent, tso->data, sz,
+ +                                    DMA_TO_DEVICE);
+ +      if (unlikely(dma_mapping_error(dev->dev.parent, buf_dma_addr))) {
+ +              mvpp2_txq_desc_put(txq);
+ +              return -ENOMEM;
+ +      }
+ +
+ +      mvpp2_txdesc_offset_set(port, tx_desc,
+ +                              buf_dma_addr & MVPP2_TX_DESC_ALIGN);
+ +      mvpp2_txdesc_dma_addr_set(port, tx_desc,
+ +                                buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
+ +
+ +      if (!left) {
+ +              mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC);
+ +              if (last) {
+ +                      mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
+ +                      return 0;
+ +              }
+ +      } else {
+ +              mvpp2_txdesc_cmd_set(port, tx_desc, 0);
+ +      }
+ +
+ +      mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
+ +      return 0;
+ +}
+ +
+ +static int mvpp2_tx_tso(struct sk_buff *skb, struct net_device *dev,
+ +                      struct mvpp2_tx_queue *txq,
+ +                      struct mvpp2_tx_queue *aggr_txq,
+ +                      struct mvpp2_txq_pcpu *txq_pcpu)
+ +{
+ +      struct mvpp2_port *port = netdev_priv(dev);
+ +      struct tso_t tso;
+ +      int hdr_sz = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ +      int i, len, descs = 0;
+ +
+ +      /* Check number of available descriptors */
+ +      if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq,
+ +                                    tso_count_descs(skb)) ||
+ +          mvpp2_txq_reserved_desc_num_proc(port->priv, txq, txq_pcpu,
+ +                                           tso_count_descs(skb)))
+ +              return 0;
+ +
+ +      tso_start(skb, &tso);
+ +      len = skb->len - hdr_sz;
+ +      while (len > 0) {
+ +              int left = min_t(int, skb_shinfo(skb)->gso_size, len);
+ +              char *hdr = txq_pcpu->tso_headers +
+ +                          txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
+ +
+ +              len -= left;
+ +              descs++;
+ +
+ +              tso_build_hdr(skb, hdr, &tso, left, len == 0);
+ +              mvpp2_tso_put_hdr(skb, dev, txq, aggr_txq, txq_pcpu, hdr_sz);
+ +
+ +              while (left > 0) {
+ +                      int sz = min_t(int, tso.size, left);
+ +                      left -= sz;
+ +                      descs++;
+ +
+ +                      if (mvpp2_tso_put_data(skb, dev, &tso, txq, aggr_txq,
+ +                                             txq_pcpu, sz, left, len == 0))
+ +                              goto release;
+ +                      tso_build_data(skb, &tso, sz);
+ +              }
+ +      }
+ +
+ +      return descs;
+ +
+ +release:
+ +      for (i = descs - 1; i >= 0; i--) {
+ +              struct mvpp2_tx_desc *tx_desc = txq->descs + i;
+ +              tx_desc_unmap_put(port, txq, tx_desc);
+ +      }
+ +      return 0;
+ +}
+ +
   /* Main tx processing */
   static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
   {
@@@ -6403,10 -5682,6 +6403,10 @@@
         txq_pcpu = this_cpu_ptr(txq->pcpu);
         aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];
   
+ +      if (skb_is_gso(skb)) {
+ +              frags = mvpp2_tx_tso(skb, dev, txq, aggr_txq, txq_pcpu);
+ +              goto out;
+ +      }
         frags = skb_shinfo(skb)->nr_frags + 1;
   
         /* Check number of available descriptors */
@@@ -6456,21 -5731,22 +6456,21 @@@
                 }
         }
   
- -      txq_pcpu->reserved_num -= frags;
- -      txq_pcpu->count += frags;
- -      aggr_txq->count += frags;
- -
- -      /* Enable transmit */
- -      wmb();
- -      mvpp2_aggr_txq_pend_desc_add(port, frags);
- -
- -      if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) {
- -              struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
- -
- -              netif_tx_stop_queue(nq);
- -      }
   out:
         if (frags > 0) {
                 struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
+ +              struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
+ +
+ +              txq_pcpu->reserved_num -= frags;
+ +              txq_pcpu->count += frags;
+ +              aggr_txq->count += frags;
+ +
+ +              /* Enable transmit */
+ +              wmb();
+ +              mvpp2_aggr_txq_pend_desc_add(port, frags);
+ +
+ +              if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1)
+ +                      netif_tx_stop_queue(nq);
   
                 u64_stats_update_begin(&stats->syncp);
                 stats->tx_packets++;
@@@ -6486,8 -5762,7 +6486,8 @@@
                 mvpp2_txq_done(port, txq, txq_pcpu);
   
         /* Set the timer in case not all frags were processed */
- -      if (txq_pcpu->count <= frags && txq_pcpu->count > 0) {
+ +      if (!port->has_tx_irqs && txq_pcpu->count <= frags &&
+ +          txq_pcpu->count > 0) {
                 struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu);
   
                 mvpp2_timer_set(port_pcpu);
@@@ -6508,14 -5783,11 +6508,14 @@@ static inline void mvpp2_cause_error(st
   
   static int mvpp2_poll(struct napi_struct *napi, int budget)
   {
- -      u32 cause_rx_tx, cause_rx, cause_misc;
+ +      u32 cause_rx_tx, cause_rx, cause_tx, cause_misc;
         int rx_done = 0;
         struct mvpp2_port *port = netdev_priv(napi->dev);
+ +      struct mvpp2_queue_vector *qv;
         int cpu = smp_processor_id();
   
+ +      qv = container_of(napi, struct mvpp2_queue_vector, napi);
+ +
         /* Rx/Tx cause register
          *
          * Bits 0-15: each bit indicates received packets on the Rx queue
@@@ -6526,10 -5798,11 +6526,10 @@@
          *
          * Each CPU has its own Rx/Tx cause register
          */
- -      cause_rx_tx = mvpp2_percpu_read(port->priv, cpu,
+ +      cause_rx_tx = mvpp2_percpu_read(port->priv, qv->sw_thread_id,
                                         MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
- -      cause_rx_tx &= ~MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
- -      cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
   
+ +      cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
         if (cause_misc) {
                 mvpp2_cause_error(port->dev, cause_misc);
   
@@@ -6540,16 -5813,10 +6540,16 @@@
                                    cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
         }
   
- -      cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+ +      cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
+ +      if (cause_tx) {
+ +              cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET;
+ +              mvpp2_tx_done(port, cause_tx, qv->sw_thread_id);
+ +      }
   
         /* Process RX packets */
- -      cause_rx |= port->pending_cause_rx;
+ +      cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+ +      cause_rx <<= qv->first_rxq;
+ +      cause_rx |= qv->pending_cause_rx;
         while (cause_rx && budget > 0) {
                 int count;
                 struct mvpp2_rx_queue *rxq;
@@@ -6558,7 -5825,7 +6558,7 @@@
                 if (!rxq)
                         break;
   
- -              count = mvpp2_rx(port, budget, rxq);
+ +              count = mvpp2_rx(port, napi, budget, rxq);
                 rx_done += count;
                 budget -= count;
                 if (budget > 0) {
@@@ -6574,9 -5841,9 +6574,9 @@@
                 cause_rx = 0;
                 napi_complete_done(napi, rx_done);
   
- -              mvpp2_interrupts_enable(port);
+ +              mvpp2_qvec_interrupt_enable(qv);
         }
- -      port->pending_cause_rx = cause_rx;
+ +      qv->pending_cause_rx = cause_rx;
         return rx_done;
   }
   
@@@ -6584,32 -5851,17 +6584,32 @@@
   static void mvpp2_start_dev(struct mvpp2_port *port)
   {
         struct net_device *ndev = port->dev;
+ +      int i;
+ +
+ +      if (port->gop_id == 0 &&
+ +          (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
+ +           port->phy_interface == PHY_INTERFACE_MODE_10GKR))
+ +              mvpp2_xlg_max_rx_size_set(port);
+ +      else
+ +              mvpp2_gmac_max_rx_size_set(port);
   
- -      mvpp2_gmac_max_rx_size_set(port);
         mvpp2_txp_max_tx_size_set(port);
   
- -      napi_enable(&port->napi);
+ +      for (i = 0; i < port->nqvecs; i++)
+ +              napi_enable(&port->qvecs[i].napi);
   
         /* Enable interrupts on all CPUs */
         mvpp2_interrupts_enable(port);
   
+ +      if (port->priv->hw_version == MVPP22) {
+ +              mvpp22_comphy_init(port);
+ +              mvpp22_gop_init(port);
+ +      }
+ +
+ +      mvpp2_port_mii_set(port);
         mvpp2_port_enable(port);
- -      phy_start(ndev->phydev);
+ +      if (ndev->phydev)
+ +              phy_start(ndev->phydev);
         netif_tx_start_all_queues(port->dev);
   }
   
@@@ -6617,7 -5869,6 +6617,7 @@@
   static void mvpp2_stop_dev(struct mvpp2_port *port)
   {
         struct net_device *ndev = port->dev;
+ +      int i;
   
         /* Stop new packets from arriving to RXQs */
         mvpp2_ingress_disable(port);
@@@ -6627,17 -5878,14 +6627,17 @@@
         /* Disable interrupts on all CPUs */
         mvpp2_interrupts_disable(port);
   
- -      napi_disable(&port->napi);
+ +      for (i = 0; i < port->nqvecs; i++)
+ +              napi_disable(&port->qvecs[i].napi);
   
         netif_carrier_off(port->dev);
         netif_tx_stop_all_queues(port->dev);
   
         mvpp2_egress_disable(port);
         mvpp2_port_disable(port);
- -      phy_stop(ndev->phydev);
+ +      if (ndev->phydev)
+ +              phy_stop(ndev->phydev);
+ +      phy_power_off(port->comphy);
   }
   
   static int mvpp2_check_ringparam_valid(struct net_device *dev,
@@@ -6693,10 -5941,6 +6693,10 @@@ static int mvpp2_phy_connect(struct mvp
   {
         struct phy_device *phy_dev;
   
+ +      /* No PHY is attached */
+ +      if (!port->phy_node)
+ +              return 0;
+ +
         phy_dev = of_phy_connect(port->dev, port->phy_node, mvpp2_link_event, 0,
                                  port->phy_interface);
         if (!phy_dev) {
@@@ -6717,56 -5961,12 +6717,56 @@@ static void mvpp2_phy_disconnect(struc
   {
         struct net_device *ndev = port->dev;
   
+ +      if (!ndev->phydev)
+ +              return;
+ +
         phy_disconnect(ndev->phydev);
   }
   
+ +static int mvpp2_irqs_init(struct mvpp2_port *port)
+ +{
+ +      int err, i;
+ +
+ +      for (i = 0; i < port->nqvecs; i++) {
+ +              struct mvpp2_queue_vector *qv = port->qvecs + i;
+ +
+ +              err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv);
+ +              if (err)
+ +                      goto err;
+ +
+ +              if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE)
+ +                      irq_set_affinity_hint(qv->irq,
+ +                                            cpumask_of(qv->sw_thread_id));
+ +      }
+ +
+ +      return 0;
+ +err:
+ +      for (i = 0; i < port->nqvecs; i++) {
+ +              struct mvpp2_queue_vector *qv = port->qvecs + i;
+ +
+ +              irq_set_affinity_hint(qv->irq, NULL);
+ +              free_irq(qv->irq, qv);
+ +      }
+ +
+ +      return err;
+ +}
+ +
+ +static void mvpp2_irqs_deinit(struct mvpp2_port *port)
+ +{
+ +      int i;
+ +
+ +      for (i = 0; i < port->nqvecs; i++) {
+ +              struct mvpp2_queue_vector *qv = port->qvecs + i;
+ +
+ +              irq_set_affinity_hint(qv->irq, NULL);
+ +              free_irq(qv->irq, qv);
+ +      }
+ +}
+ +
   static int mvpp2_open(struct net_device *dev)
   {
         struct mvpp2_port *port = netdev_priv(dev);
+ +      struct mvpp2 *priv = port->priv;
         unsigned char mac_bcast[ETH_ALEN] = {
                         0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
         int err;
@@@ -6806,44 -6006,28 +6806,44 @@@
                 goto err_cleanup_rxqs;
         }
   
- -      err = request_irq(port->irq, mvpp2_isr, 0, dev->name, port);
+ +      err = mvpp2_irqs_init(port);
         if (err) {
- -              netdev_err(port->dev, "cannot request IRQ %d\n", port->irq);
+ +              netdev_err(port->dev, "cannot init IRQs\n");
                 goto err_cleanup_txqs;
         }
   
+ +      if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq) {
+ +              err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
+ +                                dev->name, port);
+ +              if (err) {
+ +                      netdev_err(port->dev, "cannot request link IRQ %d\n",
+ +                                 port->link_irq);
+ +                      goto err_free_irq;
+ +              }
+ +
+ +              mvpp22_gop_setup_irq(port);
+ +      }
+ +
         /* In default link is down */
         netif_carrier_off(port->dev);
   
         err = mvpp2_phy_connect(port);
         if (err < 0)
- -              goto err_free_irq;
+ +              goto err_free_link_irq;
   
         /* Unmask interrupts on all CPUs */
         on_each_cpu(mvpp2_interrupts_unmask, port, 1);
+ +      mvpp2_shared_interrupt_mask_unmask(port, false);
   
         mvpp2_start_dev(port);
   
         return 0;
   
+ +err_free_link_irq:
+ +      if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq)
+ +              free_irq(port->link_irq, port);
   err_free_irq:
- -      free_irq(port->irq, port);
+ +      mvpp2_irqs_deinit(port);
   err_cleanup_txqs:
         mvpp2_cleanup_txqs(port);
   err_cleanup_rxqs:
@@@ -6855,7 -6039,6 +6855,7 @@@ static int mvpp2_stop(struct net_devic
   {
         struct mvpp2_port *port = netdev_priv(dev);
         struct mvpp2_port_pcpu *port_pcpu;
+ +      struct mvpp2 *priv = port->priv;
         int cpu;
   
         mvpp2_stop_dev(port);
@@@ -6863,20 -6046,14 +6863,20 @@@
   
         /* Mask interrupts on all CPUs */
         on_each_cpu(mvpp2_interrupts_mask, port, 1);
+ +      mvpp2_shared_interrupt_mask_unmask(port, true);
   
- -      free_irq(port->irq, port);
- -      for_each_present_cpu(cpu) {
- -              port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+ +      if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq)
+ +              free_irq(port->link_irq, port);
   
- -              hrtimer_cancel(&port_pcpu->tx_done_timer);
- -              port_pcpu->timer_scheduled = false;
- -              tasklet_kill(&port_pcpu->tx_done_tasklet);
+ +      mvpp2_irqs_deinit(port);
+ +      if (!port->has_tx_irqs) {
+ +              for_each_present_cpu(cpu) {
+ +                      port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+ +
+ +                      hrtimer_cancel(&port_pcpu->tx_done_timer);
+ +                      port_pcpu->timer_scheduled = false;
+ +                      tasklet_kill(&port_pcpu->tx_done_tasklet);
+ +              }
         }
         mvpp2_cleanup_rxqs(port);
         mvpp2_cleanup_txqs(port);
@@@ -7051,7 -6228,7 +7051,7 @@@ static int mvpp2_ethtool_set_coalesce(s
         struct mvpp2_port *port = netdev_priv(dev);
         int queue;
   
- -      for (queue = 0; queue < rxq_number; queue++) {
+ +      for (queue = 0; queue < port->nrxqs; queue++) {
                 struct mvpp2_rx_queue *rxq = port->rxqs[queue];
   
                 rxq->time_coal = c->rx_coalesce_usecs;
@@@ -7060,18 -6237,10 +7060,18 @@@
                 mvpp2_rx_time_coal_set(port, rxq);
         }
   
- -      for (queue = 0; queue < txq_number; queue++) {
+ +      if (port->has_tx_irqs) {
+ +              port->tx_time_coal = c->tx_coalesce_usecs;
+ +              mvpp2_tx_time_coal_set(port);
+ +      }
+ +
+ +      for (queue = 0; queue < port->ntxqs; queue++) {
                 struct mvpp2_tx_queue *txq = port->txqs[queue];
   
                 txq->done_pkts_coal = c->tx_max_coalesced_frames;
+ +
+ +              if (port->has_tx_irqs)
+ +                      mvpp2_tx_pkts_coal_set(port, txq);
         }
   
         return 0;
@@@ -7196,129 -6365,6 +7196,129 @@@ static const struct ethtool_ops mvpp2_e
         .set_link_ksettings = phy_ethtool_set_link_ksettings,
   };
   
+ +/* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
+ + * had a single IRQ defined per-port.
+ + */
+ +static int mvpp2_simple_queue_vectors_init(struct mvpp2_port *port,
+ +                                         struct device_node *port_node)
+ +{
+ +      struct mvpp2_queue_vector *v = &port->qvecs[0];
+ +
+ +      v->first_rxq = 0;
+ +      v->nrxqs = port->nrxqs;
+ +      v->type = MVPP2_QUEUE_VECTOR_SHARED;
+ +      v->sw_thread_id = 0;
+ +      v->sw_thread_mask = *cpumask_bits(cpu_online_mask);
+ +      v->port = port;
+ +      v->irq = irq_of_parse_and_map(port_node, 0);
+ +      if (v->irq <= 0)
+ +              return -EINVAL;
+ +      netif_napi_add(port->dev, &v->napi, mvpp2_poll,
+ +                     NAPI_POLL_WEIGHT);
+ +
+ +      port->nqvecs = 1;
+ +
+ +      return 0;
+ +}
+ +
+ +static int mvpp2_multi_queue_vectors_init(struct mvpp2_port *port,
+ +                                        struct device_node *port_node)
+ +{
+ +      struct mvpp2_queue_vector *v;
+ +      int i, ret;
+ +
+ +      port->nqvecs = num_possible_cpus();
+ +      if (queue_mode == MVPP2_QDIST_SINGLE_MODE)
+ +              port->nqvecs += 1;
+ +
+ +      for (i = 0; i < port->nqvecs; i++) {
+ +              char irqname[16];
+ +
+ +              v = port->qvecs + i;
+ +
+ +              v->port = port;
+ +              v->type = MVPP2_QUEUE_VECTOR_PRIVATE;
+ +              v->sw_thread_id = i;
+ +              v->sw_thread_mask = BIT(i);
+ +
+ +              snprintf(irqname, sizeof(irqname), "tx-cpu%d", i);
+ +
+ +              if (queue_mode == MVPP2_QDIST_MULTI_MODE) {
+ +                      v->first_rxq = i * MVPP2_DEFAULT_RXQ;
+ +                      v->nrxqs = MVPP2_DEFAULT_RXQ;
+ +              } else if (queue_mode == MVPP2_QDIST_SINGLE_MODE &&
+ +                         i == (port->nqvecs - 1)) {
+ +                      v->first_rxq = 0;
+ +                      v->nrxqs = port->nrxqs;
+ +                      v->type = MVPP2_QUEUE_VECTOR_SHARED;
+ +                      strncpy(irqname, "rx-shared", sizeof(irqname));
+ +              }
+ +
+ +              v->irq = of_irq_get_byname(port_node, irqname);
+ +              if (v->irq <= 0) {
+ +                      ret = -EINVAL;
+ +                      goto err;
+ +              }
+ +
+ +              netif_napi_add(port->dev, &v->napi, mvpp2_poll,
+ +                             NAPI_POLL_WEIGHT);
+ +      }
+ +
+ +      return 0;
+ +
+ +err:
+ +      for (i = 0; i < port->nqvecs; i++)
+ +              irq_dispose_mapping(port->qvecs[i].irq);
+ +      return ret;
+ +}
+ +
+ +static int mvpp2_queue_vectors_init(struct mvpp2_port *port,
+ +                                  struct device_node *port_node)
+ +{
+ +      if (port->has_tx_irqs)
+ +              return mvpp2_multi_queue_vectors_init(port, port_node);
+ +      else
+ +              return mvpp2_simple_queue_vectors_init(port, port_node);
+ +}
+ +
+ +static void mvpp2_queue_vectors_deinit(struct mvpp2_port *port)
+ +{
+ +      int i;
+ +
+ +      for (i = 0; i < port->nqvecs; i++)
+ +              irq_dispose_mapping(port->qvecs[i].irq);
+ +}
+ +
+ +/* Configure Rx queue group interrupt for this port */
+ +static void mvpp2_rx_irqs_setup(struct mvpp2_port *port)
+ +{
+ +      struct mvpp2 *priv = port->priv;
+ +      u32 val;
+ +      int i;
+ +
+ +      if (priv->hw_version == MVPP21) {
+ +              mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(port->id),
+ +                          port->nrxqs);
+ +              return;
+ +      }
+ +
+ +      /* Handle the more complicated PPv2.2 case */
+ +      for (i = 0; i < port->nqvecs; i++) {
+ +              struct mvpp2_queue_vector *qv = port->qvecs + i;
+ +
+ +              if (!qv->nrxqs)
+ +                      continue;
+ +
+ +              val = qv->sw_thread_id;
+ +              val |= port->id << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET;
+ +              mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
+ +
+ +              val = qv->first_rxq;
+ +              val |= qv->nrxqs << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET;
+ +              mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
+ +      }
+ +}
+ +
   /* Initialize port HW */
   static int mvpp2_port_init(struct mvpp2_port *port)
   {
@@@ -7327,22 -6373,15 +7327,22 @@@
         struct mvpp2_txq_pcpu *txq_pcpu;
         int queue, cpu, err;
   
- -      if (port->first_rxq + rxq_number >
+ +      /* Checks for hardware constraints */
+ +      if (port->first_rxq + port->nrxqs >
             MVPP2_MAX_PORTS * priv->max_port_rxqs)
                 return -EINVAL;
   
+ +      if (port->nrxqs % 4 || (port->nrxqs > priv->max_port_rxqs) ||
+ +          (port->ntxqs > MVPP2_MAX_TXQ))
+ +              return -EINVAL;
+ +
         /* Disable port */
         mvpp2_egress_disable(port);
         mvpp2_port_disable(port);
   
- -      port->txqs = devm_kcalloc(dev, txq_number, sizeof(*port->txqs),
+ +      port->tx_time_coal = MVPP2_TXDONE_COAL_USEC;
+ +
+ +      port->txqs = devm_kcalloc(dev, port->ntxqs, sizeof(*port->txqs),
                                   GFP_KERNEL);
         if (!port->txqs)
                 return -ENOMEM;
@@@ -7350,7 -6389,7 +7350,7 @@@
         /* Associate physical Tx queues to this port and initialize.
          * The mapping is predefined.
          */
- -      for (queue = 0; queue < txq_number; queue++) {
+ +      for (queue = 0; queue < port->ntxqs; queue++) {
                 int queue_phy_id = mvpp2_txq_phys(port->id, queue);
                 struct mvpp2_tx_queue *txq;
   
@@@ -7377,7 -6416,7 +7377,7 @@@
                 port->txqs[queue] = txq;
         }
   
- -      port->rxqs = devm_kcalloc(dev, rxq_number, sizeof(*port->rxqs),
+ +      port->rxqs = devm_kcalloc(dev, port->nrxqs, sizeof(*port->rxqs),
                                   GFP_KERNEL);
         if (!port->rxqs) {
                 err = -ENOMEM;
@@@ -7385,7 -6424,7 +7385,7 @@@
         }
   
         /* Allocate and initialize Rx queue for this port */
- -      for (queue = 0; queue < rxq_number; queue++) {
+ +      for (queue = 0; queue < port->nrxqs; queue++) {
                 struct mvpp2_rx_queue *rxq;
   
                 /* Map physical Rx queue to port's logical Rx queue */
@@@ -7402,10 -6441,22 +7402,10 @@@
                 port->rxqs[queue] = rxq;
         }
   
- -      /* Configure Rx queue group interrupt for this port */
- -      if (priv->hw_version == MVPP21) {
- -              mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(port->id),
- -                          rxq_number);
- -      } else {
- -              u32 val;
- -
- -              val = (port->id << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET);
- -              mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
- -
- -              val = (rxq_number << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET);
- -              mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
- -      }
+ +      mvpp2_rx_irqs_setup(port);
   
         /* Create Rx descriptor rings */
- -      for (queue = 0; queue < rxq_number; queue++) {
+ +      for (queue = 0; queue < port->nrxqs; queue++) {
                 struct mvpp2_rx_queue *rxq = port->rxqs[queue];
   
                 rxq->size = port->rx_ring_size;
@@@ -7433,7 -6484,7 +7433,7 @@@
         return 0;
   
   err_free_percpu:
- -      for (queue = 0; queue < txq_number; queue++) {
+ +      for (queue = 0; queue < port->ntxqs; queue++) {
                 if (!port->txqs[queue])
                         continue;
                 free_percpu(port->txqs[queue]->pcpu);
@@@ -7441,67 -6492,35 +7441,67 @@@
         return err;
   }
   
+ +/* Checks if the port DT description has the TX interrupts
+ + * described. On PPv2.1, there are no such interrupts. On PPv2.2,
+ + * there are available, but we need to keep support for old DTs.
+ + */
+ +static bool mvpp2_port_has_tx_irqs(struct mvpp2 *priv,
+ +                                 struct device_node *port_node)
+ +{
+ +      char *irqs[5] = { "rx-shared", "tx-cpu0", "tx-cpu1",
+ +                        "tx-cpu2", "tx-cpu3" };
+ +      int ret, i;
+ +
+ +      if (priv->hw_version == MVPP21)
+ +              return false;
+ +
+ +      for (i = 0; i < 5; i++) {
+ +              ret = of_property_match_string(port_node, "interrupt-names",
+ +                                             irqs[i]);
+ +              if (ret < 0)
+ +                      return false;
+ +      }
+ +
+ +      return true;
+ +}
+ +
   /* Ports initialization */
   static int mvpp2_port_probe(struct platform_device *pdev,
                             struct device_node *port_node,
                             struct mvpp2 *priv)
   {
         struct device_node *phy_node;
+ +      struct phy *comphy;
         struct mvpp2_port *port;
         struct mvpp2_port_pcpu *port_pcpu;
         struct net_device *dev;
         struct resource *res;
         const char *dt_mac_addr;
         const char *mac_from;
-       char hw_mac_addr[ETH_ALEN];
+       char hw_mac_addr[ETH_ALEN] = {0};
+ +      unsigned int ntxqs, nrxqs;
+ +      bool has_tx_irqs;
         u32 id;
         int features;
         int phy_mode;
         int err, i, cpu;
   
- -      dev = alloc_etherdev_mqs(sizeof(*port), txq_number, rxq_number);
+ +      has_tx_irqs = mvpp2_port_has_tx_irqs(priv, port_node);
+ +
+ +      if (!has_tx_irqs)
+ +              queue_mode = MVPP2_QDIST_SINGLE_MODE;
+ +
+ +      ntxqs = MVPP2_MAX_TXQ;
+ +      if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_MULTI_MODE)
+ +              nrxqs = MVPP2_DEFAULT_RXQ * num_possible_cpus();
+ +      else
+ +              nrxqs = MVPP2_DEFAULT_RXQ;
+ +
+ +      dev = alloc_etherdev_mqs(sizeof(*port), ntxqs, nrxqs);
         if (!dev)
                 return -ENOMEM;
   
         phy_node = of_parse_phandle(port_node, "phy", 0);
- -      if (!phy_node) {
- -              dev_err(&pdev->dev, "missing phy\n");
- -              err = -ENODEV;
- -              goto err_free_netdev;
- -      }
- -
         phy_mode = of_get_phy_mode(port_node);
         if (phy_mode < 0) {
                 dev_err(&pdev->dev, "incorrect phy mode\n");
@@@ -7509,15 -6528,6 +7509,15 @@@
                 goto err_free_netdev;
         }
   
+ +      comphy = devm_of_phy_get(&pdev->dev, port_node, NULL);
+ +      if (IS_ERR(comphy)) {
+ +              if (PTR_ERR(comphy) == -EPROBE_DEFER) {
+ +                      err = -EPROBE_DEFER;
+ +                      goto err_free_netdev;
+ +              }
+ +              comphy = NULL;
+ +      }
+ +
         if (of_property_read_u32(port_node, "port-id", &id)) {
                 err = -EINVAL;
                 dev_err(&pdev->dev, "missing port-id value\n");
@@@ -7530,37 -6540,25 +7530,37 @@@
         dev->ethtool_ops = &mvpp2_eth_tool_ops;
   
         port = netdev_priv(dev);
+ +      port->dev = dev;
+ +      port->ntxqs = ntxqs;
+ +      port->nrxqs = nrxqs;
+ +      port->priv = priv;
+ +      port->has_tx_irqs = has_tx_irqs;
   
- -      port->irq = irq_of_parse_and_map(port_node, 0);
- -      if (port->irq <= 0) {
- -              err = -EINVAL;
+ +      err = mvpp2_queue_vectors_init(port, port_node);
+ +      if (err)
                 goto err_free_netdev;
+ +
+ +      port->link_irq = of_irq_get_byname(port_node, "link");
+ +      if (port->link_irq == -EPROBE_DEFER) {
+ +              err = -EPROBE_DEFER;
+ +              goto err_deinit_qvecs;
         }
+ +      if (port->link_irq <= 0)
+ +              /* the link irq is optional */
+ +              port->link_irq = 0;
   
         if (of_property_read_bool(port_node, "marvell,loopback"))
                 port->flags |= MVPP2_F_LOOPBACK;
   
- -      port->priv = priv;
         port->id = id;
         if (priv->hw_version == MVPP21)
- -              port->first_rxq = port->id * rxq_number;
+ +              port->first_rxq = port->id * port->nrxqs;
         else
                 port->first_rxq = port->id * priv->max_port_rxqs;
   
         port->phy_node = phy_node;
         port->phy_interface = phy_mode;
+ +      port->comphy = comphy;
   
         if (priv->hw_version == MVPP21) {
                 res = platform_get_resource(pdev, IORESOURCE_MEM, 2 + id);
@@@ -7574,7 -6572,7 +7574,7 @@@
                                          &port->gop_id)) {
                         err = -EINVAL;
                         dev_err(&pdev->dev, "missing gop-port-id value\n");
- -                      goto err_free_irq;
+ +                      goto err_deinit_qvecs;
                 }
   
                 port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id);
@@@ -7605,6 -6603,7 +7605,6 @@@
   
         port->tx_ring_size = MVPP2_MAX_TXD;
         port->rx_ring_size = MVPP2_MAX_RXD;
- -      port->dev = dev;
         SET_NETDEV_DEV(dev, &pdev->dev);
   
         err = mvpp2_port_init(port);
@@@ -7613,6 -6612,7 +7613,6 @@@
                 goto err_free_stats;
         }
   
- -      mvpp2_port_mii_set(port);
         mvpp2_port_periodic_xon_disable(port);
   
         if (priv->hw_version == MVPP21)
@@@ -7626,22 -6626,20 +7626,22 @@@
                 goto err_free_txq_pcpu;
         }
   
- -      for_each_present_cpu(cpu) {
- -              port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+ +      if (!port->has_tx_irqs) {
+ +              for_each_present_cpu(cpu) {
+ +                      port_pcpu = per_cpu_ptr(port->pcpu, cpu);
   
- -              hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC,
- -                           HRTIMER_MODE_REL_PINNED);
- -              port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb;
- -              port_pcpu->timer_scheduled = false;
+ +                      hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC,
+ +                                   HRTIMER_MODE_REL_PINNED);
+ +                      port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb;
+ +                      port_pcpu->timer_scheduled = false;
   
- -              tasklet_init(&port_pcpu->tx_done_tasklet, mvpp2_tx_proc_cb,
- -                           (unsigned long)dev);
+ +                      tasklet_init(&port_pcpu->tx_done_tasklet,
+ +                                   mvpp2_tx_proc_cb,
+ +                                   (unsigned long)dev);
+ +              }
         }
   
- -      netif_napi_add(dev, &port->napi, mvpp2_poll, NAPI_POLL_WEIGHT);
- -      features = NETIF_F_SG | NETIF_F_IP_CSUM;
+ +      features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
         dev->features = features | NETIF_F_RXCSUM;
         dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
         dev->vlan_features |= features;
@@@ -7664,15 -6662,12 +7664,15 @@@
   err_free_port_pcpu:
         free_percpu(port->pcpu);
   err_free_txq_pcpu:
- -      for (i = 0; i < txq_number; i++)
+ +      for (i = 0; i < port->ntxqs; i++)
                 free_percpu(port->txqs[i]->pcpu);
   err_free_stats:
         free_percpu(port->stats);
   err_free_irq:
- -      irq_dispose_mapping(port->irq);
+ +      if (port->link_irq)
+ +              irq_dispose_mapping(port->link_irq);
+ +err_deinit_qvecs:
+ +      mvpp2_queue_vectors_deinit(port);
   err_free_netdev:
         of_node_put(phy_node);
         free_netdev(dev);
@@@ -7688,11 -6683,9 +7688,11 @@@ static void mvpp2_port_remove(struct mv
         of_node_put(port->phy_node);
         free_percpu(port->pcpu);
         free_percpu(port->stats);
- -      for (i = 0; i < txq_number; i++)
+ +      for (i = 0; i < port->ntxqs; i++)
                 free_percpu(port->txqs[i]->pcpu);
- -      irq_dispose_mapping(port->irq);
+ +      mvpp2_queue_vectors_deinit(port);
+ +      if (port->link_irq)
+ +              irq_dispose_mapping(port->link_irq);
         free_netdev(port->dev);
   }
   
@@@ -7807,6 -6800,13 +7807,6 @@@ static int mvpp2_init(struct platform_d
         int err, i;
         u32 val;
   
- -      /* Checks for hardware constraints */
- -      if (rxq_number % 4 || (rxq_number > priv->max_port_rxqs) ||
- -          (txq_number > MVPP2_MAX_TXQ)) {
- -              dev_err(&pdev->dev, "invalid queue size parameter\n");
- -              return -EINVAL;
- -      }
- -
         /* MBUS windows configuration */
         dram_target_info = mv_mbus_dram_info();
         if (dram_target_info)
@@@ -7836,7 -6836,8 +7836,7 @@@
         for_each_present_cpu(i) {
                 priv->aggr_txqs[i].id = i;
                 priv->aggr_txqs[i].size = MVPP2_AGGR_TXQ_SIZE;
- -              err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i],
- -                                        MVPP2_AGGR_TXQ_SIZE, i, priv);
+ +              err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i], i, priv);
                 if (err < 0)
                         return err;
         }
@@@ -7844,6 -6845,23 +7844,6 @@@
         /* Rx Fifo Init */
         mvpp2_rx_fifo_init(priv);
   
- -      /* Reset Rx queue group interrupt configuration */
- -      for (i = 0; i < MVPP2_MAX_PORTS; i++) {
- -              if (priv->hw_version == MVPP21) {
- -                      mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(i),
- -                                  rxq_number);
- -                      continue;
- -              } else {
- -                      u32 val;
- -
- -                      val = (i << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET);
- -                      mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
- -
- -                      val = (rxq_number << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET);
- -                      mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
- -              }
- -      }
- -
         if (priv->hw_version == MVPP21)
                 writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT,
                        priv->lms_base + MVPP2_MNG_EXTENDED_GLOBAL_CTRL_REG);
@@@ -7874,7 -6892,7 +7874,7 @@@ static int mvpp2_probe(struct platform_
         struct mvpp2 *priv;
         struct resource *res;
         void __iomem *base;
- -      int port_count, cpu;
+ +      int port_count, i;
         int err;
   
         priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
@@@ -7899,25 -6917,14 +7899,25 @@@
                 priv->iface_base = devm_ioremap_resource(&pdev->dev, res);
                 if (IS_ERR(priv->iface_base))
                         return PTR_ERR(priv->iface_base);
+ +
+ +              priv->sysctrl_base =
+ +                      syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ +                                                      "marvell,system-controller");
+ +              if (IS_ERR(priv->sysctrl_base))
+ +                      /* The system controller regmap is optional for dt
+ +                       * compatibility reasons. When not provided, the
+ +                       * configuration of the GoP relies on the
+ +                       * firmware/bootloader.
+ +                       */
+ +                      priv->sysctrl_base = NULL;
         }
   
- -      for_each_present_cpu(cpu) {
+ +      for (i = 0; i < MVPP2_MAX_THREADS; i++) {
                 u32 addr_space_sz;
   
                 addr_space_sz = (priv->hw_version == MVPP21 ?
                                  MVPP21_ADDR_SPACE_SZ : MVPP22_ADDR_SPACE_SZ);
- -              priv->cpu_base[cpu] = base + cpu * addr_space_sz;
+ +              priv->swth_base[i] = base + i * addr_space_sz;
         }
   
         if (priv->hw_version == MVPP21)
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h

index a31912415264dda786037eaa0d332a0073a478e7,2f26fb34d7416b88ee8bf7a3464e40837ab90c3c..6c2abeccfa5a380ff05375393417f99de52ec2ca
--- 1/drivers/net/ethernet/mellanox/mlx5/core/en.h
--- 2/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@@ -263,6 -263,7 +263,7 @@@ struct mlx5e_dcbx 
   
         /* The only setting that cannot be read from FW */
         u8                         tc_tsa[IEEE_8021QAZ_MAX_TCS];
+       u8                         cap;
   };
   #endif
   
@@@ -620,12 -621,6 +621,12 @@@ enum mlx5e_traffic_types 
         MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
   };
   
+ +enum mlx5e_tunnel_types {
+ +      MLX5E_TT_IPV4_GRE,
+ +      MLX5E_TT_IPV6_GRE,
+ +      MLX5E_NUM_TUNNEL_TT,
+ +};
+ +
   enum {
         MLX5E_STATE_ASYNC_EVENTS_ENABLED,
         MLX5E_STATE_OPENED,
@@@ -685,7 -680,6 +686,7 @@@ struct mlx5e_l2_table 
   struct mlx5e_ttc_table {
         struct mlx5e_flow_table  ft;
         struct mlx5_flow_handle  *rules[MLX5E_NUM_TT];
+ +      struct mlx5_flow_handle  *tunnel_rules[MLX5E_NUM_TUNNEL_TT];
   };
   
   #define ARFS_HASH_SHIFT BITS_PER_BYTE
@@@ -718,7 -712,6 +719,7 @@@ enum 
         MLX5E_VLAN_FT_LEVEL = 0,
         MLX5E_L2_FT_LEVEL,
         MLX5E_TTC_FT_LEVEL,
+ +      MLX5E_INNER_TTC_FT_LEVEL,
         MLX5E_ARFS_FT_LEVEL
   };
   
@@@ -744,7 -737,6 +745,7 @@@ struct mlx5e_flow_steering 
         struct mlx5e_vlan_table         vlan;
         struct mlx5e_l2_table           l2;
         struct mlx5e_ttc_table          ttc;
+ +      struct mlx5e_ttc_table          inner_ttc;
         struct mlx5e_arfs_tables        arfs;
   };
   
@@@ -778,7 -770,6 +779,7 @@@ struct mlx5e_priv 
         u32                        tisn[MLX5E_MAX_NUM_TC];
         struct mlx5e_rqt           indir_rqt;
         struct mlx5e_tir           indir_tir[MLX5E_NUM_INDIR_TIRS];
+ +      struct mlx5e_tir           inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
         struct mlx5e_tir           direct_tir[MLX5E_MAX_NUM_CHANNELS];
         u32                        tx_rates[MLX5E_MAX_NUM_SQS];
         int                        hard_mtu;
@@@ -913,7 -904,7 +914,7 @@@ int mlx5e_redirect_rqt(struct mlx5e_pri
                        struct mlx5e_redirect_rqt_param rrp);
   void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
                                     enum mlx5e_traffic_types tt,
- -                                  void *tirc);
+ +                                  void *tirc, bool inner);
   
   int mlx5e_open_locked(struct net_device *netdev);
   int mlx5e_close_locked(struct net_device *netdev);
@@@ -942,12 -933,6 +943,12 @@@ void mlx5e_set_rx_cq_mode_params(struc
   void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
                               struct mlx5e_params *params, u8 rq_type);
   
+ +static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+ +{
+ +      return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) &&
+ +              MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
+ +}
+ +
   static inline
   struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
   {
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

index c6ec90e9c95bb23d51b3a19a94a790333be9659d,f5594014715bbbd1c281f95ecd871408b4949e06..6127e0d2f310cad2d56f6a54c8c3b694475615cf
--- 1/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
--- 2/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@@ -176,6 -176,7 +176,6 @@@ static bool mlx5e_query_global_pause_co
   
   int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
   {
- -
         switch (sset) {
         case ETH_SS_STATS:
                 return NUM_SW_COUNTERS +
@@@ -206,7 -207,7 +206,7 @@@ static int mlx5e_get_sset_count(struct 
         return mlx5e_ethtool_get_sset_count(priv, sset);
   }
   
- -static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
+ +static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data)
   {
         int i, j, tc, prio, idx = 0;
         unsigned long pfc_combined;
@@@ -241,22 -242,10 +241,22 @@@
                 strcpy(data + (idx++) * ETH_GSTRING_LEN,
                        pport_phy_statistical_stats_desc[i].format);
   
+ +      for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++)
+ +              strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ +                     pport_eth_ext_stats_desc[i].format);
+ +
         for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++)
                 strcpy(data + (idx++) * ETH_GSTRING_LEN,
                        pcie_perf_stats_desc[i].format);
   
+ +      for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++)
+ +              strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ +                     pcie_perf_stats_desc64[i].format);
+ +
+ +      for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++)
+ +              strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ +                     pcie_perf_stall_stats_desc[i].format);
+ +
         for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
                 for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
                         sprintf(data + (idx++) * ETH_GSTRING_LEN,
@@@ -308,7 -297,8 +308,7 @@@
                                         priv->channel_tc2txq[i][tc]);
   }
   
- -void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv,
- -                             uint32_t stringset, uint8_t *data)
+ +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
   {
         int i;
   
@@@ -330,7 -320,8 +330,7 @@@
         }
   }
   
- -static void mlx5e_get_strings(struct net_device *dev,
- -                            uint32_t stringset, uint8_t *data)
+ +static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data)
   {
         struct mlx5e_priv *priv = netdev_priv(dev);
   
@@@ -382,22 -373,10 +382,22 @@@ void mlx5e_ethtool_get_ethtool_stats(st
                 data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
                                                   pport_phy_statistical_stats_desc, i);
   
+ +      for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++)
+ +              data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters,
+ +                                                pport_eth_ext_stats_desc, i);
+ +
         for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++)
                 data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
                                                   pcie_perf_stats_desc, i);
   
+ +      for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++)
+ +              data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters,
+ +                                                pcie_perf_stats_desc64, i);
+ +
+ +      for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++)
+ +              data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+ +                                                pcie_perf_stall_stats_desc, i);
+ +
         for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
                 for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
                         data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
@@@ -662,8 -641,10 +662,10 @@@ int mlx5e_ethtool_set_channels(struct m
   
         new_channels.params = priv->channels.params;
         new_channels.params.num_channels = count;
-       mlx5e_build_default_indir_rqt(priv->mdev, new_channels.params.indirection_rqt,
-                                     MLX5E_INDIR_RQT_SIZE, count);
+       if (!netif_is_rxfh_configured(priv->netdev))
+               mlx5e_build_default_indir_rqt(priv->mdev,
+                                             new_channels.params.indirection_rqt,
+                                             MLX5E_INDIR_RQT_SIZE, count);
   
         if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
                 priv->channels.params = new_channels.params;
@@@ -985,27 -966,24 +987,27 @@@ static u8 get_connector_port(u32 eth_pr
         if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
                 return ptys2connector_type[connector_type];
   
- -      if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
- -                       | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4)
- -                       | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4)
- -                       | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
- -                      return PORT_FIBRE;
+ +      if (eth_proto &
+ +          (MLX5E_PROT_MASK(MLX5E_10GBASE_SR)   |
+ +           MLX5E_PROT_MASK(MLX5E_40GBASE_SR4)  |
+ +           MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) |
+ +           MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
+ +              return PORT_FIBRE;
         }
   
- -      if (eth_proto & (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4)
- -                       | MLX5E_PROT_MASK(MLX5E_10GBASE_CR)
- -                       | MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) {
- -                      return PORT_DA;
+ +      if (eth_proto &
+ +          (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) |
+ +           MLX5E_PROT_MASK(MLX5E_10GBASE_CR)  |
+ +           MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) {
+ +              return PORT_DA;
         }
   
- -      if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4)
- -                       | MLX5E_PROT_MASK(MLX5E_10GBASE_KR)
- -                       | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4)
- -                       | MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) {
- -                      return PORT_NONE;
+ +      if (eth_proto &
+ +          (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) |
+ +           MLX5E_PROT_MASK(MLX5E_10GBASE_KR)  |
+ +           MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) |
+ +           MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) {
+ +              return PORT_NONE;
         }
   
         return PORT_OTHER;
@@@ -1212,18 -1190,9 +1214,18 @@@ static void mlx5e_modify_tirs_hash(stru
   
         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
                 memset(tirc, 0, ctxlen);
- -              mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc);
+ +              mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
                 mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
         }
+ +
+ +      if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ +              return;
+ +
+ +      for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ +              memset(tirc, 0, ctxlen);
+ +              mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+ +              mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen);
+ +      }
   }
   
   static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c

index 111c7523d4486c24378c697d6a87e41a617f0203,6ad7f07e7861d9c8d6922b0c115fc950e0126dfc..85841e24c65b5a2453a7c6853b33380ce4ec2bf6
--- 1/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
--- 2/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@@ -288,12 -288,6 +288,12 @@@ static void mlx5e_update_pport_counters
                 mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
         }
   
+ +      if (MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) {
+ +              out = pstats->eth_ext_counters;
+ +              MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+ +              mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ +      }
+ +
         MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
         for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
                 out = pstats->per_prio_counters[prio];
@@@ -1975,6 -1969,7 +1975,7 @@@ static void mlx5e_build_rx_cq_param(str
         }
   
         mlx5e_build_common_cq_param(priv, param);
+       param->cq_period_mode = params->rx_cq_period_mode;
   }
   
   static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
@@@ -2349,10 -2344,9 +2350,10 @@@ static void mlx5e_build_tir_ctx_lro(str
   
   void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
                                     enum mlx5e_traffic_types tt,
- -                                  void *tirc)
+ +                                  void *tirc, bool inner)
   {
- -      void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+ +      void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
+ +                           MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
   
   #define MLX5_HASH_IP            (MLX5_HASH_FIELD_SEL_SRC_IP   |\
                                  MLX5_HASH_FIELD_SEL_DST_IP)
@@@ -2501,21 -2495,6 +2502,21 @@@ free_in
         return err;
   }
   
+ +static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
+ +                                          enum mlx5e_traffic_types tt,
+ +                                          u32 *tirc)
+ +{
+ +      MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+ +
+ +      mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+ +
+ +      MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ +      MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
+ +      MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1);
+ +
+ +      mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+ +}
+ +
   static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu)
   {
         struct mlx5_core_dev *mdev = priv->mdev;
@@@ -2603,6 -2582,12 +2604,6 @@@ static void mlx5e_build_channels_tx_map
         }
   }
   
- -static bool mlx5e_is_eswitch_vport_mngr(struct mlx5_core_dev *mdev)
- -{
- -      return (MLX5_CAP_GEN(mdev, vport_group_manager) &&
- -              MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH);
- -}
- -
   void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
   {
         int num_txqs = priv->channels.num * priv->channels.params.num_tc;
@@@ -2616,7 -2601,7 +2617,7 @@@
         mlx5e_activate_channels(&priv->channels);
         netif_tx_start_all_queues(priv->netdev);
   
- -      if (mlx5e_is_eswitch_vport_mngr(priv->mdev))
+ +      if (MLX5_VPORT_MANAGER(priv->mdev))
                 mlx5e_add_sqs_fwd_rules(priv);
   
         mlx5e_wait_channels_min_rx_wqes(&priv->channels);
@@@ -2627,7 -2612,7 +2628,7 @@@ void mlx5e_deactivate_priv_channels(str
   {
         mlx5e_redirect_rqts_to_drop(priv);
   
- -      if (mlx5e_is_eswitch_vport_mngr(priv->mdev))
+ +      if (MLX5_VPORT_MANAGER(priv->mdev))
                 mlx5e_remove_sqs_fwd_rules(priv);
   
         /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
@@@ -2704,8 -2689,6 +2705,8 @@@ int mlx5e_open(struct net_device *netde
   
         mutex_lock(&priv->state_lock);
         err = mlx5e_open_locked(netdev);
+ +      if (!err)
+ +              mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
         mutex_unlock(&priv->state_lock);
   
         return err;
@@@ -2740,7 -2723,6 +2741,7 @@@ int mlx5e_close(struct net_device *netd
                 return -ENODEV;
   
         mutex_lock(&priv->state_lock);
+ +      mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN);
         err = mlx5e_close_locked(netdev);
         mutex_unlock(&priv->state_lock);
   
@@@ -2881,7 -2863,7 +2882,7 @@@ static void mlx5e_build_indir_tir_ctx(s
   
         MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
         MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
- -      mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc);
+ +      mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
   }
   
   static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
@@@ -2900,7 -2882,6 +2901,7 @@@ int mlx5e_create_indirect_tirs(struct m
         struct mlx5e_tir *tir;
         void *tirc;
         int inlen;
+ +      int i = 0;
         int err;
         u32 *in;
         int tt;
@@@ -2916,36 -2897,16 +2917,36 @@@
                 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
                 mlx5e_build_indir_tir_ctx(priv, tt, tirc);
                 err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
- -              if (err)
- -                      goto err_destroy_tirs;
+ +              if (err) {
+ +                      mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
+ +                      goto err_destroy_inner_tirs;
+ +              }
+ +      }
+ +
+ +      if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ +              goto out;
+ +
+ +      for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
+ +              memset(in, 0, inlen);
+ +              tir = &priv->inner_indir_tir[i];
+ +              tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ +              mlx5e_build_inner_indir_tir_ctx(priv, i, tirc);
+ +              err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+ +              if (err) {
+ +                      mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
+ +                      goto err_destroy_inner_tirs;
+ +              }
         }
   
+ +out:
         kvfree(in);
   
         return 0;
   
- -err_destroy_tirs:
- -      mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
+ +err_destroy_inner_tirs:
+ +      for (i--; i >= 0; i--)
+ +              mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
+ +
         for (tt--; tt >= 0; tt--)
                 mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]);
   
@@@ -2999,12 -2960,6 +3000,12 @@@ void mlx5e_destroy_indirect_tirs(struc
   
         for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
                 mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
+ +
+ +      if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ +              return;
+ +
+ +      for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
+ +              mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
   }
   
   void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
@@@ -3044,16 -2999,12 +3045,16 @@@ static int mlx5e_modify_channels_vsd(st
         return 0;
   }
   
- -static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
+ +static int mlx5e_setup_tc_mqprio(struct net_device *netdev,
+ +                               struct tc_mqprio_qopt *mqprio)
   {
         struct mlx5e_priv *priv = netdev_priv(netdev);
         struct mlx5e_channels new_channels = {};
+ +      u8 tc = mqprio->num_tc;
         int err = 0;
   
+ +      mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+ +
         if (tc && tc != MLX5E_MAX_NUM_TC)
                 return -EINVAL;
   
@@@ -3077,42 -3028,39 +3078,42 @@@ out
         return err;
   }
   
- -static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
- -                            u32 chain_index, __be16 proto,
- -                            struct tc_to_netdev *tc)
+ +#ifdef CONFIG_MLX5_ESWITCH
+ +static int mlx5e_setup_tc_cls_flower(struct net_device *dev,
+ +                                   struct tc_cls_flower_offload *cls_flower)
   {
         struct mlx5e_priv *priv = netdev_priv(dev);
   
- -      if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
- -              goto mqprio;
+ +      if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
+ +          cls_flower->common.chain_index)
+ +              return -EOPNOTSUPP;
   
- -      if (chain_index)
+ +      switch (cls_flower->command) {
+ +      case TC_CLSFLOWER_REPLACE:
+ +              return mlx5e_configure_flower(priv, cls_flower);
+ +      case TC_CLSFLOWER_DESTROY:
+ +              return mlx5e_delete_flower(priv, cls_flower);
+ +      case TC_CLSFLOWER_STATS:
+ +              return mlx5e_stats_flower(priv, cls_flower);
+ +      default:
                 return -EOPNOTSUPP;
+ +      }
+ +}
+ +#endif
   
- -      switch (tc->type) {
+ +static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ +                        void *type_data)
+ +{
+ +      switch (type) {
+ +#ifdef CONFIG_MLX5_ESWITCH
         case TC_SETUP_CLSFLOWER:
- -              switch (tc->cls_flower->command) {
- -              case TC_CLSFLOWER_REPLACE:
- -                      return mlx5e_configure_flower(priv, proto, tc->cls_flower);
- -              case TC_CLSFLOWER_DESTROY:
- -                      return mlx5e_delete_flower(priv, tc->cls_flower);
- -              case TC_CLSFLOWER_STATS:
- -                      return mlx5e_stats_flower(priv, tc->cls_flower);
- -              }
+ +              return mlx5e_setup_tc_cls_flower(dev, type_data);
+ +#endif
+ +      case TC_SETUP_MQPRIO:
+ +              return mlx5e_setup_tc_mqprio(dev, type_data);
         default:
                 return -EOPNOTSUPP;
         }
- -
- -mqprio:
- -      if (tc->type != TC_SETUP_MQPRIO)
- -              return -EINVAL;
- -
- -      tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
- -
- -      return mlx5e_setup_tc(dev, tc->mqprio->num_tc);
   }
   
   static void
@@@ -3409,7 -3357,6 +3410,7 @@@ static int mlx5e_ioctl(struct net_devic
         }
   }
   
+ +#ifdef CONFIG_MLX5_ESWITCH
   static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
   {
         struct mlx5e_priv *priv = netdev_priv(dev);
@@@ -3512,7 -3459,6 +3513,7 @@@ static int mlx5e_get_vf_stats(struct ne
         return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
                                             vf_stats);
   }
+ +#endif
   
   static void mlx5e_add_vxlan_port(struct net_device *netdev,
                                  struct udp_tunnel_info *ti)
@@@ -3542,13 -3488,13 +3543,13 @@@ static void mlx5e_del_vxlan_port(struc
         mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0);
   }
   
- -static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv,
- -                                                  struct sk_buff *skb,
- -                                                  netdev_features_t features)
+ +static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
+ +                                                   struct sk_buff *skb,
+ +                                                   netdev_features_t features)
   {
         struct udphdr *udph;
- -      u16 proto;
- -      u16 port = 0;
+ +      u8 proto;
+ +      u16 port;
   
         switch (vlan_get_protocol(skb)) {
         case htons(ETH_P_IP):
@@@ -3561,17 -3507,14 +3562,17 @@@
                 goto out;
         }
   
- -      if (proto == IPPROTO_UDP) {
+ +      switch (proto) {
+ +      case IPPROTO_GRE:
+ +              return features;
+ +      case IPPROTO_UDP:
                 udph = udp_hdr(skb);
                 port = be16_to_cpu(udph->dest);
- -      }
   
- -      /* Verify if UDP port is being offloaded by HW */
- -      if (port && mlx5e_vxlan_lookup_port(priv, port))
- -              return features;
+ +              /* Verify if UDP port is being offloaded by HW */
+ +              if (mlx5e_vxlan_lookup_port(priv, port))
+ +                      return features;
+ +      }
   
   out:
         /* Disable CSUM and GSO if the udp dport is not offloaded by HW */
@@@ -3595,7 -3538,7 +3596,7 @@@ static netdev_features_t mlx5e_features
         /* Validate if the tunneled packet is being offloaded by HW */
         if (skb->encapsulation &&
             (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
- -              return mlx5e_vxlan_features_check(priv, skb, features);
+ +              return mlx5e_tunnel_features_check(priv, skb, features);
   
         return features;
   }
@@@ -3749,11 -3692,11 +3750,11 @@@ static void mlx5e_netpoll(struct net_de
   }
   #endif
   
- -static const struct net_device_ops mlx5e_netdev_ops_basic = {
+ +static const struct net_device_ops mlx5e_netdev_ops = {
         .ndo_open                = mlx5e_open,
         .ndo_stop                = mlx5e_close,
         .ndo_start_xmit          = mlx5e_xmit,
- -      .ndo_setup_tc            = mlx5e_ndo_setup_tc,
+ +      .ndo_setup_tc            = mlx5e_setup_tc,
         .ndo_select_queue        = mlx5e_select_queue,
         .ndo_get_stats64         = mlx5e_get_stats,
         .ndo_set_rx_mode         = mlx5e_set_rx_mode,
@@@ -3764,9 -3707,6 +3765,9 @@@
         .ndo_change_mtu          = mlx5e_change_mtu,
         .ndo_do_ioctl            = mlx5e_ioctl,
         .ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
+ +      .ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
+ +      .ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
+ +      .ndo_features_check      = mlx5e_features_check,
   #ifdef CONFIG_RFS_ACCEL
         .ndo_rx_flow_steer       = mlx5e_rx_flow_steer,
   #endif
@@@ -3775,8 -3715,29 +3776,8 @@@
   #ifdef CONFIG_NET_POLL_CONTROLLER
         .ndo_poll_controller     = mlx5e_netpoll,
   #endif
- -};
- -
- -static const struct net_device_ops mlx5e_netdev_ops_sriov = {
- -      .ndo_open                = mlx5e_open,
- -      .ndo_stop                = mlx5e_close,
- -      .ndo_start_xmit          = mlx5e_xmit,
- -      .ndo_setup_tc            = mlx5e_ndo_setup_tc,
- -      .ndo_select_queue        = mlx5e_select_queue,
- -      .ndo_get_stats64         = mlx5e_get_stats,
- -      .ndo_set_rx_mode         = mlx5e_set_rx_mode,
- -      .ndo_set_mac_address     = mlx5e_set_mac,
- -      .ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
- -      .ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
- -      .ndo_set_features        = mlx5e_set_features,
- -      .ndo_change_mtu          = mlx5e_change_mtu,
- -      .ndo_do_ioctl            = mlx5e_ioctl,
- -      .ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
- -      .ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
- -      .ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
- -      .ndo_features_check      = mlx5e_features_check,
- -#ifdef CONFIG_RFS_ACCEL
- -      .ndo_rx_flow_steer       = mlx5e_rx_flow_steer,
- -#endif
+ +#ifdef CONFIG_MLX5_ESWITCH
+ +      /* SRIOV E-Switch NDOs */
         .ndo_set_vf_mac          = mlx5e_set_vf_mac,
         .ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
         .ndo_set_vf_spoofchk     = mlx5e_set_vf_spoofchk,
@@@ -3785,9 -3746,13 +3786,9 @@@
         .ndo_get_vf_config       = mlx5e_get_vf_config,
         .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
         .ndo_get_vf_stats        = mlx5e_get_vf_stats,
- -      .ndo_tx_timeout          = mlx5e_tx_timeout,
- -      .ndo_xdp                 = mlx5e_xdp,
- -#ifdef CONFIG_NET_POLL_CONTROLLER
- -      .ndo_poll_controller     = mlx5e_netpoll,
- -#endif
         .ndo_has_offload_stats   = mlx5e_has_offload_stats,
         .ndo_get_offload_stats   = mlx5e_get_offload_stats,
+ +#endif
   };
   
   static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@@ -4017,11 -3982,9 +4018,11 @@@ static void mlx5e_set_netdev_dev_addr(s
         }
   }
   
+ +#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH)
   static const struct switchdev_ops mlx5e_switchdev_ops = {
         .switchdev_port_attr_get        = mlx5e_attr_get,
   };
+ +#endif
   
   static void mlx5e_build_nic_netdev(struct net_device *netdev)
   {
@@@ -4032,12 -3995,15 +4033,12 @@@
   
         SET_NETDEV_DEV(netdev, &mdev->pdev->dev);
   
- -      if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
- -              netdev->netdev_ops = &mlx5e_netdev_ops_sriov;
+ +      netdev->netdev_ops = &mlx5e_netdev_ops;
+ +
   #ifdef CONFIG_MLX5_CORE_EN_DCB
- -              if (MLX5_CAP_GEN(mdev, qos))
- -                      netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+ +      if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
+ +              netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
   #endif
- -      } else {
- -              netdev->netdev_ops = &mlx5e_netdev_ops_basic;
- -      }
   
         netdev->watchdog_timeo    = 15 * HZ;
   
@@@ -4060,32 -4026,20 +4061,32 @@@
         netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_RX;
         netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
   
- -      if (mlx5e_vxlan_allowed(mdev)) {
- -              netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
- -                                         NETIF_F_GSO_UDP_TUNNEL_CSUM |
- -                                         NETIF_F_GSO_PARTIAL;
+ +      if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+ +              netdev->hw_features     |= NETIF_F_GSO_PARTIAL;
                 netdev->hw_enc_features |= NETIF_F_IP_CSUM;
                 netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
                 netdev->hw_enc_features |= NETIF_F_TSO;
                 netdev->hw_enc_features |= NETIF_F_TSO6;
- -              netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
- -              netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM |
- -                                         NETIF_F_GSO_PARTIAL;
+ +              netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
+ +      }
+ +
+ +      if (mlx5e_vxlan_allowed(mdev)) {
+ +              netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
+ +                                         NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ +              netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
+ +                                         NETIF_F_GSO_UDP_TUNNEL_CSUM;
                 netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
         }
   
+ +      if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+ +              netdev->hw_features     |= NETIF_F_GSO_GRE |
+ +                                         NETIF_F_GSO_GRE_CSUM;
+ +              netdev->hw_enc_features |= NETIF_F_GSO_GRE |
+ +                                         NETIF_F_GSO_GRE_CSUM;
+ +              netdev->gso_partial_features |= NETIF_F_GSO_GRE |
+ +                                              NETIF_F_GSO_GRE_CSUM;
+ +      }
+ +
         mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
   
         if (fcs_supported)
@@@ -4121,8 -4075,8 +4122,8 @@@
   
         mlx5e_set_netdev_dev_addr(netdev);
   
- -#ifdef CONFIG_NET_SWITCHDEV
- -      if (MLX5_CAP_GEN(mdev, vport_group_manager))
+ +#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH)
+ +      if (MLX5_VPORT_MANAGER(mdev))
                 netdev->switchdev_ops = &mlx5e_switchdev_ops;
   #endif
   
@@@ -4254,10 -4208,6 +4255,10 @@@ static void mlx5e_nic_enable(struct mlx
   
         mlx5e_init_l2_addr(priv);
   
+ +      /* Marking the link as currently not needed by the Driver */
+ +      if (!netif_running(netdev))
+ +              mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
+ +
         /* MTU range: 68 - hw-specific max */
         netdev->min_mtu = ETH_MIN_MTU;
         mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
@@@ -4268,7 -4218,7 +4269,7 @@@
   
         mlx5e_enable_async_events(priv);
   
- -      if (MLX5_CAP_GEN(mdev, vport_group_manager))
+ +      if (MLX5_VPORT_MANAGER(priv->mdev))
                 mlx5e_register_vport_reps(priv);
   
         if (netdev->reg_state != NETREG_REGISTERED)
@@@ -4302,7 -4252,7 +4303,7 @@@ static void mlx5e_nic_disable(struct ml
   
         queue_work(priv->wq, &priv->set_rx_mode_work);
   
- -      if (MLX5_CAP_GEN(mdev, vport_group_manager))
+ +      if (MLX5_VPORT_MANAGER(priv->mdev))
                 mlx5e_unregister_vport_reps(priv);
   
         mlx5e_disable_async_events(priv);
@@@ -4475,29 -4425,32 +4476,29 @@@ static void mlx5e_detach(struct mlx5_co
   
   static void *mlx5e_add(struct mlx5_core_dev *mdev)
   {
- -      struct mlx5_eswitch *esw = mdev->priv.eswitch;
- -      int total_vfs = MLX5_TOTAL_VPORTS(mdev);
- -      struct mlx5e_rep_priv *rpriv = NULL;
+ +      struct net_device *netdev;
+ +      void *rpriv = NULL;
         void *priv;
- -      int vport;
         int err;
- -      struct net_device *netdev;
   
         err = mlx5e_check_required_hca_cap(mdev);
         if (err)
                 return NULL;
   
- -      if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
- -              rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+ +#ifdef CONFIG_MLX5_ESWITCH
+ +      if (MLX5_VPORT_MANAGER(mdev)) {
+ +              rpriv = mlx5e_alloc_nic_rep_priv(mdev);
                 if (!rpriv) {
- -                      mlx5_core_warn(mdev,
- -                                     "Not creating net device, Failed to alloc rep priv data\n");
+ +                      mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");
                         return NULL;
                 }
- -              rpriv->rep = &esw->offloads.vport_reps[0];
         }
+ +#endif
   
         netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv);
         if (!netdev) {
                 mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
- -              goto err_unregister_reps;
+ +              goto err_free_rpriv;
         }
   
         priv = netdev_priv(netdev);
@@@ -4518,9 -4471,14 +4519,9 @@@
   
   err_detach:
         mlx5e_detach(mdev, priv);
- -
   err_destroy_netdev:
         mlx5e_destroy_netdev(priv);
- -
- -err_unregister_reps:
- -      for (vport = 1; vport < total_vfs; vport++)
- -              mlx5_eswitch_unregister_vport_rep(esw, vport);
- -
+ +err_free_rpriv:
         kfree(rpriv);
         return NULL;
   }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index 55a6786d3c4ccb7ecb44dce4eaeab1b66f894456,7344433259fca32fe288ba4c63dff6c64ab2d126..be8197a75a634340a07575ac90af8b61465b03a5
--- 1/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
--- 2/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@@ -222,13 -222,13 +222,13 @@@ static inline int mlx5e_page_alloc_mapp
         if (unlikely(!page))
                 return -ENOMEM;
   
-       dma_info->page = page;
         dma_info->addr = dma_map_page(rq->pdev, page, 0,
                                       RQ_PAGE_SIZE(rq), rq->buff.map_dir);
         if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
                 put_page(page);
                 return -ENOMEM;
         }
+       dma_info->page = page;
   
         return 0;
   }
@@@ -509,8 -509,8 +509,8 @@@ static void mlx5e_lro_update_hdr(struc
         u16 tot_len;
   
         u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
- -      int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA  == l4_hdr_type) ||
- -                     (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
+ +      int tcp_ack = ((l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
+ +                     (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA));
   
         skb->mac_len = ETH_HLEN;
         proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
@@@ -857,7 -857,6 +857,7 @@@ wq_ll_pop
                        &wqe->next.next_wqe_index);
   }
   
+ +#ifdef CONFIG_MLX5_ESWITCH
   void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
   {
         struct net_device *netdev = rq->netdev;
@@@ -902,7 -901,6 +902,7 @@@ wq_ll_pop
         mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
                        &wqe->next.next_wqe_index);
   }
+ +#endif
   
   static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
                                            struct mlx5_cqe64 *cqe,
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

index 3b10d3df7627a9fdf9d00953e83cd1c32b91d6a5,7f282e8f4e7fee460e1140dacc8f86ece6b1ea9e..da503e6411da07374f3250d450fd131c64c63c88
--- 1/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
--- 2/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@@ -1326,7 -1326,7 +1326,7 @@@ static int parse_tc_nic_actions(struct 
         LIST_HEAD(actions);
         int err;
   
- -      if (tc_no_actions(exts))
+ +      if (!tcf_exts_has_actions(exts))
                 return -EINVAL;
   
         attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
@@@ -1443,12 -1443,10 +1443,10 @@@ static int mlx5e_route_lookup_ipv6(stru
         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
         int ret;
   
-       dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6);
-       ret = dst->error;
-       if (ret) {
-               dst_release(dst);
+       ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
+                                        fl6);
+       if (ret < 0)
                 return ret;
-       }
   
         *out_ttl = ip6_dst_hoplimit(dst);
   
@@@ -1839,7 -1837,7 +1837,7 @@@ static int parse_tc_fdb_actions(struct 
         bool encap = false;
         int err = 0;
   
- -      if (tc_no_actions(exts))
+ +      if (!tcf_exts_has_actions(exts))
                 return -EINVAL;
   
         memset(attr, 0, sizeof(*attr));
@@@ -1939,7 -1937,7 +1937,7 @@@
         return err;
   }
   
- -int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
+ +int mlx5e_configure_flower(struct mlx5e_priv *priv,
                            struct tc_cls_flower_offload *f)
   {
         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

index e7c186b585796d1726d242331e1e92f6c6e4a195,5bc0593bd76e706e2b5c38a0a767af0324e67685..d9fd8570b07c8344bb1dc04934392e20f7628245
--- 1/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
--- 2/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@@ -433,8 -433,6 +433,8 @@@ static int esw_create_offloads_fast_fdb
         struct mlx5_flow_table *fdb = NULL;
         int esw_size, err = 0;
         u32 flags = 0;
+ +      u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ +                              MLX5_CAP_GEN(dev, max_flow_counter_15_0);
   
         root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
         if (!root_ns) {
@@@ -445,9 -443,9 +445,9 @@@
   
         esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n",
                   MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
- -                MLX5_CAP_GEN(dev, max_flow_counter), ESW_OFFLOADS_NUM_GROUPS);
+ +                max_flow_counter, ESW_OFFLOADS_NUM_GROUPS);
   
- -      esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS,
+ +      esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS,
                          1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
   
         if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
@@@ -817,7 -815,7 +817,7 @@@ void esw_offloads_cleanup(struct mlx5_e
         struct mlx5_eswitch_rep *rep;
         int vport;
   
-       for (vport = 0; vport < nvports; vport++) {
+       for (vport = nvports - 1; vport >= 0; vport--) {
                 rep = &esw->offloads.vport_reps[vport];
                 if (!rep->valid)
                         continue;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/main.c

index 514c22d21729869162686c014bcf002f26af15c6,16885827367bfd9153f96faaf7c2afedc6c6a5b0..bd84bdf56a83fcb03a5a7a1d0fe7642f8b070ea1
--- 1/drivers/net/ethernet/mellanox/mlx5/core/main.c
--- 2/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@@ -53,8 -53,9 +53,8 @@@
   #include <net/devlink.h>
   #include "mlx5_core.h"
   #include "fs_core.h"
- -#ifdef CONFIG_MLX5_CORE_EN
+ +#include "lib/mpfs.h"
   #include "eswitch.h"
- -#endif
   #include "lib/mlx5.h"
   #include "fpga/core.h"
   #include "accel/ipsec.h"
@@@ -836,6 -837,7 +836,6 @@@ static int mlx5_core_set_issi(struct ml
         return -EOPNOTSUPP;
   }
   
- -
   static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
   {
         struct pci_dev *pdev = dev->pdev;
@@@ -944,17 -946,13 +944,17 @@@ static int mlx5_init_once(struct mlx5_c
                 goto err_tables_cleanup;
         }
   
- -#ifdef CONFIG_MLX5_CORE_EN
+ +      err = mlx5_mpfs_init(dev);
+ +      if (err) {
+ +              dev_err(&pdev->dev, "Failed to init l2 table %d\n", err);
+ +              goto err_rl_cleanup;
+ +      }
+ +
         err = mlx5_eswitch_init(dev);
         if (err) {
                 dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
- -              goto err_rl_cleanup;
+ +              goto err_mpfs_cleanup;
         }
- -#endif
   
         err = mlx5_sriov_init(dev);
         if (err) {
@@@ -973,11 -971,13 +973,11 @@@
   err_sriov_cleanup:
         mlx5_sriov_cleanup(dev);
   err_eswitch_cleanup:
- -#ifdef CONFIG_MLX5_CORE_EN
         mlx5_eswitch_cleanup(dev->priv.eswitch);
- -
+ +err_mpfs_cleanup:
+ +      mlx5_mpfs_cleanup(dev);
   err_rl_cleanup:
- -#endif
         mlx5_cleanup_rl_table(dev);
- -
   err_tables_cleanup:
         mlx5_cleanup_mkey_table(dev);
         mlx5_cleanup_srq_table(dev);
@@@ -995,8 -995,9 +995,8 @@@ static void mlx5_cleanup_once(struct ml
   {
         mlx5_fpga_cleanup(dev);
         mlx5_sriov_cleanup(dev);
- -#ifdef CONFIG_MLX5_CORE_EN
         mlx5_eswitch_cleanup(dev->priv.eswitch);
- -#endif
+ +      mlx5_mpfs_cleanup(dev);
         mlx5_cleanup_rl_table(dev);
         mlx5_cleanup_reserved_gids(dev);
         mlx5_cleanup_mkey_table(dev);
@@@ -1154,6 -1155,10 +1154,6 @@@ static int mlx5_load_one(struct mlx5_co
                 goto err_fs;
         }
   
- -#ifdef CONFIG_MLX5_CORE_EN
- -      mlx5_eswitch_attach(dev->priv.eswitch);
- -#endif
- -
         err = mlx5_sriov_attach(dev);
         if (err) {
                 dev_err(&pdev->dev, "sriov init failed %d\n", err);
@@@ -1181,7 -1186,6 +1181,6 @@@
                 }
         }
   
-       clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
         set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
   out:
         mutex_unlock(&dev->intf_state_mutex);
@@@ -1197,6 -1201,9 +1196,6 @@@ err_fpga_start
         mlx5_sriov_detach(dev);
   
   err_sriov:
- -#ifdef CONFIG_MLX5_CORE_EN
- -      mlx5_eswitch_detach(dev->priv.eswitch);
- -#endif
         mlx5_cleanup_fs(dev);
   
   err_fs:
@@@ -1253,7 -1260,7 +1252,7 @@@ static int mlx5_unload_one(struct mlx5_
                 mlx5_drain_health_recovery(dev);
   
         mutex_lock(&dev->intf_state_mutex);
-       if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
+       if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
                 dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
                          __func__);
                 if (cleanup)
@@@ -1262,7 -1269,6 +1261,6 @@@
         }
   
         clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
-       set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
   
         if (mlx5_device_registered(dev))
                 mlx5_detach_device(dev);
@@@ -1271,6 -1277,9 +1269,6 @@@
         mlx5_fpga_device_stop(dev);
   
         mlx5_sriov_detach(dev);
- -#ifdef CONFIG_MLX5_CORE_EN
- -      mlx5_eswitch_detach(dev->priv.eswitch);
- -#endif
         mlx5_cleanup_fs(dev);
         mlx5_irq_clear_affinity_hints(dev);
         free_comp_eqs(dev);
@@@ -1302,7 -1311,7 +1300,7 @@@ struct mlx5_core_event_handler 
   };
   
   static const struct devlink_ops mlx5_devlink_ops = {
- -#ifdef CONFIG_MLX5_CORE_EN
+ +#ifdef CONFIG_MLX5_ESWITCH
         .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
         .eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
         .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
@@@ -1342,9 -1351,6 +1340,9 @@@ static int init_one(struct pci_dev *pde
         mutex_init(&dev->pci_status_mutex);
         mutex_init(&dev->intf_state_mutex);
   
+ +      INIT_LIST_HEAD(&priv->waiting_events_list);
+ +      priv->is_accum_events = false;
+ +
   #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
         err = init_srcu_struct(&priv->pfault_srcu);
         if (err) {
@@@ -1399,6 -1405,7 +1397,6 @@@ clean_srcu
         cleanup_srcu_struct(&priv->pfault_srcu);
   clean_dev:
   #endif
- -      pci_set_drvdata(pdev, NULL);
         devlink_free(devlink);
   
         return err;
@@@ -1425,6 -1432,7 +1423,6 @@@ static void remove_one(struct pci_dev *
   #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
         cleanup_srcu_struct(&priv->pfault_srcu);
   #endif
- -      pci_set_drvdata(pdev, NULL);
         devlink_free(devlink);
   }
   
@@@ -1555,8 -1563,6 +1553,6 @@@ static void shutdown(struct pci_dev *pd
         int err;
   
         dev_info(&pdev->dev, "Shutdown was called\n");
-       /* Notify mlx5 clients that the kernel is being shut down */
-       set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state);
         err = mlx5_try_fast_unload(dev);
         if (err)
                 mlx5_unload_one(dev, priv, false);
diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum.c

index 17fc98881642d10e0cf1f03dec9d0eac3111faa7,c6a3e61b53bdbf0c32212a6415f4a1d2da769bf8..992cbfa1f2bcd82df2c261c426dcc56283da0670
--- 1/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
--- 2/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@@ -58,7 -58,6 +58,7 @@@
   #include <net/tc_act/tc_mirred.h>
   #include <net/netevent.h>
   #include <net/tc_act/tc_sample.h>
+ +#include <net/addrconf.h>
   
   #include "spectrum.h"
   #include "pci.h"
@@@ -382,14 -381,12 +382,14 @@@ int mlxsw_sp_flow_counter_get(struct ml
         int err;
   
         mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP,
- -                          MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES);
+ +                          MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
         if (err)
                 return err;
- -      *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl);
- -      *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl);
+ +      if (packets)
+ +              *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl);
+ +      if (bytes)
+ +              *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl);
         return 0;
   }
   
@@@ -399,7 -396,7 +399,7 @@@ static int mlxsw_sp_flow_counter_clear(
         char mgpc_pl[MLXSW_REG_MGPC_LEN];
   
         mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_CLEAR,
- -                          MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES);
+ +                          MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
   }
   
@@@ -1619,16 -1616,16 +1619,16 @@@ mlxsw_sp_port_del_cls_matchall_sample(s
   }
   
   static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
- -                                        __be16 protocol,
- -                                        struct tc_cls_matchall_offload *cls,
+ +                                        struct tc_cls_matchall_offload *f,
                                           bool ingress)
   {
         struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
+ +      __be16 protocol = f->common.protocol;
         const struct tc_action *a;
         LIST_HEAD(actions);
         int err;
   
- -      if (!tc_single_action(cls->exts)) {
+ +      if (!tcf_exts_has_one_action(f->exts)) {
                 netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n");
                 return -EOPNOTSUPP;
         }
@@@ -1636,9 -1633,9 +1636,9 @@@
         mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
         if (!mall_tc_entry)
                 return -ENOMEM;
- -      mall_tc_entry->cookie = cls->cookie;
+ +      mall_tc_entry->cookie = f->cookie;
   
- -      tcf_exts_to_list(cls->exts, &actions);
+ +      tcf_exts_to_list(f->exts, &actions);
         a = list_first_entry(&actions, struct tc_action, list);
   
         if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
@@@ -1650,7 -1647,7 +1650,7 @@@
                                                             mirror, a, ingress);
         } else if (is_tcf_sample(a) && protocol == htons(ETH_P_ALL)) {
                 mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE;
- -              err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, cls,
+ +              err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, f,
                                                             a, ingress);
         } else {
                 err = -EOPNOTSUPP;
@@@ -1668,12 -1665,12 +1668,12 @@@ err_add_action
   }
   
   static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
- -                                         struct tc_cls_matchall_offload *cls)
+ +                                         struct tc_cls_matchall_offload *f)
   {
         struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
   
         mall_tc_entry = mlxsw_sp_port_mall_tc_entry_find(mlxsw_sp_port,
- -                                                       cls->cookie);
+ +                                                       f->cookie);
         if (!mall_tc_entry) {
                 netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n");
                 return;
@@@ -1695,72 -1692,49 +1695,72 @@@
         kfree(mall_tc_entry);
   }
   
- -static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle,
- -                           u32 chain_index, __be16 proto,
- -                           struct tc_to_netdev *tc)
+ +static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
+ +                                        struct tc_cls_matchall_offload *f)
   {
- -      struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
- -      bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+ +      bool ingress;
   
- -      if (chain_index)
+ +      if (is_classid_clsact_ingress(f->common.classid))
+ +              ingress = true;
+ +      else if (is_classid_clsact_egress(f->common.classid))
+ +              ingress = false;
+ +      else
                 return -EOPNOTSUPP;
   
- -      switch (tc->type) {
- -      case TC_SETUP_MATCHALL:
- -              switch (tc->cls_mall->command) {
- -              case TC_CLSMATCHALL_REPLACE:
- -                      return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port,
- -                                                            proto,
- -                                                            tc->cls_mall,
- -                                                            ingress);
- -              case TC_CLSMATCHALL_DESTROY:
- -                      mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port,
- -                                                     tc->cls_mall);
- -                      return 0;
- -              default:
- -                      return -EOPNOTSUPP;
- -              }
- -      case TC_SETUP_CLSFLOWER:
- -              switch (tc->cls_flower->command) {
- -              case TC_CLSFLOWER_REPLACE:
- -                      return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress,
- -                                                     proto, tc->cls_flower);
- -              case TC_CLSFLOWER_DESTROY:
- -                      mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress,
- -                                              tc->cls_flower);
- -                      return 0;
- -              case TC_CLSFLOWER_STATS:
- -                      return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress,
- -                                                   tc->cls_flower);
- -              default:
- -                      return -EOPNOTSUPP;
- -              }
+ +      if (f->common.chain_index)
+ +              return -EOPNOTSUPP;
+ +
+ +      switch (f->command) {
+ +      case TC_CLSMATCHALL_REPLACE:
+ +              return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f,
+ +                                                    ingress);
+ +      case TC_CLSMATCHALL_DESTROY:
+ +              mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, f);
+ +              return 0;
+ +      default:
+ +              return -EOPNOTSUPP;
+ +      }
+ +}
+ +
+ +static int
+ +mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port,
+ +                           struct tc_cls_flower_offload *f)
+ +{
+ +      bool ingress;
+ +
+ +      if (is_classid_clsact_ingress(f->common.classid))
+ +              ingress = true;
+ +      else if (is_classid_clsact_egress(f->common.classid))
+ +              ingress = false;
+ +      else
+ +              return -EOPNOTSUPP;
+ +
+ +      switch (f->command) {
+ +      case TC_CLSFLOWER_REPLACE:
+ +              return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f);
+ +      case TC_CLSFLOWER_DESTROY:
+ +              mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, f);
+ +              return 0;
+ +      case TC_CLSFLOWER_STATS:
+ +              return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, f);
+ +      default:
+ +              return -EOPNOTSUPP;
         }
+ +}
   
- -      return -EOPNOTSUPP;
+ +static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ +                           void *type_data)
+ +{
+ +      struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ +
+ +      switch (type) {
+ +      case TC_SETUP_CLSMATCHALL:
+ +              return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data);
+ +      case TC_SETUP_CLSFLOWER:
+ +              return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data);
+ +      default:
+ +              return -EOPNOTSUPP;
+ +      }
   }
   
   static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
@@@ -3359,47 -3333,15 +3359,47 @@@ static const struct mlxsw_listener mlxs
         MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false),
         MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false),
         MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false),
+ +      MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD,
+ +                        false),
+ +      MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+ +                           false),
+ +      MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD,
+ +                           false),
+ +      MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+ +                           false),
         /* L3 traps */
- -      MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
- -      MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
- -      MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
- -      MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false),
- -      MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
- -      MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
- -      MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false),
- -      MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false),
+ +      MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+ +      MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+ +      MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+ +      MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
+ +      MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
+ +                        false),
+ +      MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false),
+ +      MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false),
+ +      MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false),
+ +      MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP,
+ +                        false),
+ +      MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false),
+ +      MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false),
+ +      MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false),
+ +      MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
+ +      MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false),
+ +      MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false),
+ +      MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+ +                        false),
+ +      MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+ +                        false),
+ +      MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+ +                        false),
+ +      MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+ +                        false),
+ +      MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false),
+ +      MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
+ +                        false),
+ +      MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false),
+ +      MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false),
+ +      MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
+ +      MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
         /* PKT Sample trap */
         MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
                   false, SP_IP2ME, DISCARD),
@@@ -3434,17 -3376,15 +3434,17 @@@ static int mlxsw_sp_cpu_policers_set(st
                         burst_size = 7;
                         break;
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+ +              case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
                         rate = 16 * 1024;
                         burst_size = 10;
                         break;
- -              case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+ +              case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
- -              case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+ +              case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+ +              case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
                         rate = 1024;
                         burst_size = 7;
                         break;
@@@ -3493,23 -3433,21 +3493,23 @@@ static int mlxsw_sp_trap_groups_set(str
                         priority = 5;
                         tc = 5;
                         break;
- -              case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+ +              case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
                         priority = 4;
                         tc = 4;
                         break;
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+ +              case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
                         priority = 3;
                         tc = 3;
                         break;
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+ +              case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
                         priority = 2;
                         tc = 2;
                         break;
- -              case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+ +              case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
                 case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
                         priority = 1;
@@@ -3756,7 -3694,7 +3756,7 @@@ static void mlxsw_sp_fini(struct mlxsw_
         mlxsw_sp_fids_fini(mlxsw_sp);
   }
   
- -static struct mlxsw_config_profile mlxsw_sp_config_profile = {
+ +static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
         .used_max_vepa_channels         = 1,
         .max_vepa_channels              = 0,
         .used_max_mid                   = 1,
@@@ -4201,6 -4139,8 +4201,8 @@@ static int mlxsw_sp_netdevice_port_uppe
                         return -EINVAL;
                 if (!info->linking)
                         break;
+               if (netdev_has_any_upper_dev(upper_dev))
+                       return -EINVAL;
                 if (netif_is_lag_master(upper_dev) &&
                     !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev,
                                                info->upper_info))
@@@ -4320,6 -4260,10 +4322,10 @@@ static int mlxsw_sp_netdevice_port_vlan
                 upper_dev = info->upper_dev;
                 if (!netif_is_bridge_master(upper_dev))
                         return -EINVAL;
+               if (!info->linking)
+                       break;
+               if (netdev_has_any_upper_dev(upper_dev))
+                       return -EINVAL;
                 break;
         case NETDEV_CHANGEUPPER:
                 upper_dev = info->upper_dev;
@@@ -4419,10 -4363,6 +4425,10 @@@ static struct notifier_block mlxsw_sp_i
         .priority = 10, /* Must be called before FIB notifier block */
   };
   
+ +static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
+ +      .notifier_call = mlxsw_sp_inet6addr_event,
+ +};
+ +
   static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
         .notifier_call = mlxsw_sp_router_netevent_event,
   };
@@@ -4443,7 -4383,6 +4449,7 @@@ static int __init mlxsw_sp_module_init(
   
         register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
         register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+ +      register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
         register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
   
         err = mlxsw_core_driver_register(&mlxsw_sp_driver);
@@@ -4460,7 -4399,6 +4466,7 @@@ err_pci_driver_register
         mlxsw_core_driver_unregister(&mlxsw_sp_driver);
   err_core_driver_register:
         unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+ +      unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
         unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
         unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
         return err;
@@@ -4471,7 -4409,6 +4477,7 @@@ static void __exit mlxsw_sp_module_exit
         mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver);
         mlxsw_core_driver_unregister(&mlxsw_sp_driver);
         unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+ +      unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
         unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
         unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
   }
diff --combined drivers/net/ethernet/netronome/nfp/flower/offload.c

index d868a5700e01595ddc763ab526324403380ab116,74a96d6bb05ce1c4b3a64d03d18016327b697a84..d396183108f76dc25f0d28724c5bee42b1948666
--- 1/drivers/net/ethernet/netronome/nfp/flower/offload.c
--- 2/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@@ -105,43 -105,62 +105,62 @@@ static in
   nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
                                 struct tc_cls_flower_offload *flow)
   {
-       struct flow_dissector_key_control *mask_enc_ctl;
-       struct flow_dissector_key_basic *mask_basic;
-       struct flow_dissector_key_basic *key_basic;
+       struct flow_dissector_key_basic *mask_basic = NULL;
+       struct flow_dissector_key_basic *key_basic = NULL;
+       struct flow_dissector_key_ip *mask_ip = NULL;
         u32 key_layer_two;
         u8 key_layer;
         int key_size;
   
-       mask_enc_ctl = skb_flow_dissector_target(flow->dissector,
-                                                FLOW_DISSECTOR_KEY_ENC_CONTROL,
-                                                flow->mask);
+       if (dissector_uses_key(flow->dissector,
+                              FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+               struct flow_dissector_key_control *mask_enc_ctl =
+                       skb_flow_dissector_target(flow->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_CONTROL,
+                                                 flow->mask);
+               /* We are expecting a tunnel. For now we ignore offloading. */
+               if (mask_enc_ctl->addr_type)
+                       return -EOPNOTSUPP;
+       }
+ 
+       if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+               mask_basic = skb_flow_dissector_target(flow->dissector,
+                                                      FLOW_DISSECTOR_KEY_BASIC,
+                                                      flow->mask);
   
-       mask_basic = skb_flow_dissector_target(flow->dissector,
-                                              FLOW_DISSECTOR_KEY_BASIC,
-                                              flow->mask);
+               key_basic = skb_flow_dissector_target(flow->dissector,
+                                                     FLOW_DISSECTOR_KEY_BASIC,
+                                                     flow->key);
+       }
+ 
+       if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP))
+               mask_ip = skb_flow_dissector_target(flow->dissector,
+                                                   FLOW_DISSECTOR_KEY_IP,
+                                                   flow->mask);
   
-       key_basic = skb_flow_dissector_target(flow->dissector,
-                                             FLOW_DISSECTOR_KEY_BASIC,
-                                             flow->key);
         key_layer_two = 0;
         key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
         key_size = sizeof(struct nfp_flower_meta_one) +
                    sizeof(struct nfp_flower_in_port) +
                    sizeof(struct nfp_flower_mac_mpls);
   
-       /* We are expecting a tunnel. For now we ignore offloading. */
-       if (mask_enc_ctl->addr_type)
-               return -EOPNOTSUPP;
- 
-       if (mask_basic->n_proto) {
+       if (mask_basic && mask_basic->n_proto) {
                 /* Ethernet type is present in the key. */
                 switch (key_basic->n_proto) {
                 case cpu_to_be16(ETH_P_IP):
+                       if (mask_ip && mask_ip->tos)
+                               return -EOPNOTSUPP;
+                       if (mask_ip && mask_ip->ttl)
+                               return -EOPNOTSUPP;
                         key_layer |= NFP_FLOWER_LAYER_IPV4;
                         key_size += sizeof(struct nfp_flower_ipv4);
                         break;
   
                 case cpu_to_be16(ETH_P_IPV6):
+                       if (mask_ip && mask_ip->tos)
+                               return -EOPNOTSUPP;
+                       if (mask_ip && mask_ip->ttl)
+                               return -EOPNOTSUPP;
                         key_layer |= NFP_FLOWER_LAYER_IPV6;
                         key_size += sizeof(struct nfp_flower_ipv6);
                         break;
@@@ -152,6 -171,11 +171,11 @@@
                 case cpu_to_be16(ETH_P_ARP):
                         return -EOPNOTSUPP;
   
+               /* Currently we do not offload MPLS. */
+               case cpu_to_be16(ETH_P_MPLS_UC):
+               case cpu_to_be16(ETH_P_MPLS_MC):
+                       return -EOPNOTSUPP;
+ 
                 /* Will be included in layer 2. */
                 case cpu_to_be16(ETH_P_8021Q):
                         break;
@@@ -166,7 -190,7 +190,7 @@@
                 }
         }
   
-       if (mask_basic->ip_proto) {
+       if (mask_basic && mask_basic->ip_proto) {
                 /* Ethernet type is present in the key. */
                 switch (key_basic->ip_proto) {
                 case IPPROTO_TCP:
@@@ -385,15 -409,16 +409,15 @@@ nfp_flower_repr_offload(struct nfp_app 
   }
   
   int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
- -                      u32 handle, __be16 proto, struct tc_to_netdev *tc)
+ +                      enum tc_setup_type type, void *type_data)
   {
- -      if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
- -              return -EOPNOTSUPP;
+ +      struct tc_cls_flower_offload *cls_flower = type_data;
   
- -      if (!eth_proto_is_802_3(proto))
+ +      if (type != TC_SETUP_CLSFLOWER ||
+ +          !is_classid_clsact_ingress(cls_flower->common.classid) ||
+ +          !eth_proto_is_802_3(cls_flower->common.protocol) ||
+ +          cls_flower->common.chain_index)
                 return -EOPNOTSUPP;
   
- -      if (tc->type != TC_SETUP_CLSFLOWER)
- -              return -EINVAL;
- -
- -      return nfp_flower_repr_offload(app, netdev, tc->cls_flower);
+ +      return nfp_flower_repr_offload(app, netdev, cls_flower);
   }
diff --combined drivers/net/ethernet/netronome/nfp/nfp_main.c

index dd769eceb33d3a91ebb237aa4ccdb8b2de84ff0d,3f199db2002e5ce1c4a0dabc1475647426f6c876..f055b1774d65312a492010dd92b790559435c71c
--- 1/drivers/net/ethernet/netronome/nfp/nfp_main.c
--- 2/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@@ -98,21 -98,20 +98,20 @@@ static int nfp_pcie_sriov_enable(struc
         struct nfp_pf *pf = pci_get_drvdata(pdev);
         int err;
   
-       mutex_lock(&pf->lock);
- 
         if (num_vfs > pf->limit_vfs) {
                 nfp_info(pf->cpp, "Firmware limits number of VFs to %u\n",
                          pf->limit_vfs);
-               err = -EINVAL;
-               goto err_unlock;
+               return -EINVAL;
         }
   
         err = pci_enable_sriov(pdev, num_vfs);
         if (err) {
                 dev_warn(&pdev->dev, "Failed to enable PCI SR-IOV: %d\n", err);
-               goto err_unlock;
+               return err;
         }
   
+       mutex_lock(&pf->lock);
+ 
         err = nfp_app_sriov_enable(pf->app, num_vfs);
         if (err) {
                 dev_warn(&pdev->dev,
@@@ -129,9 -128,8 +128,8 @@@
         return num_vfs;
   
   err_sriov_disable:
-       pci_disable_sriov(pdev);
- err_unlock:
         mutex_unlock(&pf->lock);
+       pci_disable_sriov(pdev);
         return err;
   #endif
         return 0;
@@@ -158,10 -156,10 +156,10 @@@ static int nfp_pcie_sriov_disable(struc
   
         pf->num_vfs = 0;
   
+       mutex_unlock(&pf->lock);
+ 
         pci_disable_sriov(pdev);
         dev_dbg(&pdev->dev, "Removed VFs.\n");
- 
-       mutex_unlock(&pf->lock);
   #endif
         return 0;
   }
@@@ -174,21 -172,6 +172,21 @@@ static int nfp_pcie_sriov_configure(str
                 return nfp_pcie_sriov_enable(pdev, num_vfs);
   }
   
+ +static const struct firmware *
+ +nfp_net_fw_request(struct pci_dev *pdev, struct nfp_pf *pf, const char *name)
+ +{
+ +      const struct firmware *fw = NULL;
+ +      int err;
+ +
+ +      err = request_firmware_direct(&fw, name, &pdev->dev);
+ +      nfp_info(pf->cpp, "  %s: %s\n",
+ +               name, err ? "not found" : "found, loading...");
+ +      if (err)
+ +              return NULL;
+ +
+ +      return fw;
+ +}
+ +
   /**
    * nfp_net_fw_find() - Find the correct firmware image for netdev mode
    * @pdev:     PCI Device structure
@@@ -199,32 -182,13 +197,32 @@@
   static const struct firmware *
   nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf)
   {
- -      const struct firmware *fw = NULL;
         struct nfp_eth_table_port *port;
+ +      const struct firmware *fw;
         const char *fw_model;
         char fw_name[256];
- -      int spc, err = 0;
- -      int i, j;
- -
+ +      const u8 *serial;
+ +      u16 interface;
+ +      int spc, i, j;
+ +
+ +      nfp_info(pf->cpp, "Looking for firmware file in order of priority:\n");
+ +
+ +      /* First try to find a firmware image specific for this device */
+ +      interface = nfp_cpp_interface(pf->cpp);
+ +      nfp_cpp_serial(pf->cpp, &serial);
+ +      sprintf(fw_name, "netronome/serial-%pMF-%02hhx-%02hhx.nffw",
+ +              serial, interface >> 8, interface & 0xff);
+ +      fw = nfp_net_fw_request(pdev, pf, fw_name);
+ +      if (fw)
+ +              return fw;
+ +
+ +      /* Then try the PCI name */
+ +      sprintf(fw_name, "netronome/pci-%s.nffw", pci_name(pdev));
+ +      fw = nfp_net_fw_request(pdev, pf, fw_name);
+ +      if (fw)
+ +              return fw;
+ +
+ +      /* Finally try the card type and media */
         if (!pf->eth_tbl) {
                 dev_err(&pdev->dev, "Error: can't identify media config\n");
                 return NULL;
@@@ -257,7 -221,13 +255,7 @@@
         if (spc <= 0)
                 return NULL;
   
- -      err = request_firmware(&fw, fw_name, &pdev->dev);
- -      if (err)
- -              return NULL;
- -
- -      dev_info(&pdev->dev, "Loading FW image: %s\n", fw_name);
- -
- -      return fw;
+ +      return nfp_net_fw_request(pdev, pf, fw_name);
   }
   
   /**
diff --combined drivers/net/ethernet/netronome/nfp/nfp_net_common.c

index ecbec28cfa76441bed835b94204fca16ccf75027,66a09e490cf5a5f2ce19193653910e0fcbe5632e..2920889fa6d6198a0a44abe32c079fd6419c3e32
--- 1/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
--- 2/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@@ -71,7 -71,6 +71,7 @@@
   #include "nfp_app.h"
   #include "nfp_net_ctrl.h"
   #include "nfp_net.h"
+ +#include "nfp_net_sriov.h"
   #include "nfp_port.h"
   
   /**
@@@ -896,6 -895,8 +896,8 @@@ static int nfp_net_tx(struct sk_buff *s
   
         netdev_tx_sent_queue(nd_q, txbuf->real_len);
   
+       skb_tx_timestamp(skb);
+ 
         tx_ring->wr_p += nr_frags + 1;
         if (nfp_net_tx_ring_should_stop(tx_ring))
                 nfp_net_tx_ring_stop(nd_q, tx_ring);
@@@ -904,8 -905,6 +906,6 @@@
         if (!skb->xmit_more || netif_xmit_stopped(nd_q))
                 nfp_net_tx_xmit_more_flush(tx_ring);
   
-       skb_tx_timestamp(skb);
- 
         return NETDEV_TX_OK;
   
   err_unmap:
@@@ -1752,6 -1751,10 +1752,10 @@@ static int nfp_net_rx(struct nfp_net_rx
                         continue;
                 }
   
+               nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
+ 
+               nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
+ 
                 if (likely(!meta.portid)) {
                         netdev = dp->netdev;
                 } else {
@@@ -1760,16 -1763,12 +1764,12 @@@
                         nn = netdev_priv(dp->netdev);
                         netdev = nfp_app_repr_get(nn->app, meta.portid);
                         if (unlikely(!netdev)) {
-                               nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
+                               nfp_net_rx_drop(dp, r_vec, rx_ring, NULL, skb);
                                 continue;
                         }
                         nfp_repr_inc_rx_stats(netdev, pkt_len);
                 }
   
-               nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
- 
-               nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
- 
                 skb_reserve(skb, pkt_off);
                 skb_put(skb, pkt_len);
   
@@@ -2660,7 -2659,6 +2660,7 @@@ static int nfp_net_netdev_close(struct 
         /* Step 2: Tell NFP
          */
         nfp_net_clear_config_and_disable(nn);
+ +      nfp_port_configure(netdev, false);
   
         /* Step 3: Free resources
          */
@@@ -2778,21 -2776,16 +2778,21 @@@ static int nfp_net_netdev_open(struct n
                 goto err_free_all;
   
         /* Step 2: Configure the NFP
+ +       * - Ifup the physical interface if it exists
          * - Enable rings from 0 to tx_rings/rx_rings - 1.
          * - Write MAC address (in case it changed)
          * - Set the MTU
          * - Set the Freelist buffer size
          * - Enable the FW
          */
- -      err = nfp_net_set_config_and_enable(nn);
+ +      err = nfp_port_configure(netdev, true);
         if (err)
                 goto err_free_all;
   
+ +      err = nfp_net_set_config_and_enable(nn);
+ +      if (err)
+ +              goto err_port_disable;
+ +
         /* Step 3: Enable for kernel
          * - put some freelist descriptors on each RX ring
          * - enable NAPI on each ring
@@@ -2803,8 -2796,6 +2803,8 @@@
   
         return 0;
   
+ +err_port_disable:
+ +      nfp_port_configure(netdev, false);
   err_free_all:
         nfp_net_close_free_all(nn);
         return err;
@@@ -3422,11 -3413,6 +3422,11 @@@ const struct net_device_ops nfp_net_net
         .ndo_get_stats64        = nfp_net_stat64,
         .ndo_vlan_rx_add_vid    = nfp_net_vlan_rx_add_vid,
         .ndo_vlan_rx_kill_vid   = nfp_net_vlan_rx_kill_vid,
+ +      .ndo_set_vf_mac         = nfp_app_set_vf_mac,
+ +      .ndo_set_vf_vlan        = nfp_app_set_vf_vlan,
+ +      .ndo_set_vf_spoofchk    = nfp_app_set_vf_spoofchk,
+ +      .ndo_get_vf_config      = nfp_app_get_vf_config,
+ +      .ndo_set_vf_link_state  = nfp_app_set_vf_link_state,
         .ndo_setup_tc           = nfp_port_setup_tc,
         .ndo_tx_timeout         = nfp_net_tx_timeout,
         .ndo_set_rx_mode        = nfp_net_set_rx_mode,
diff --combined drivers/net/ethernet/netronome/nfp/nfp_net_main.c

index 2da083fd5e137c25e06376816314f332cdba9271,34b985384d26129435686dca6c86b3e6e092cde2..7c22cc4654b74867338c9e087f6f2af7e7d63a2f
--- 1/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
--- 2/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@@ -57,7 -57,6 +57,7 @@@
   #include "nfpcore/nfp6000_pcie.h"
   #include "nfp_app.h"
   #include "nfp_net_ctrl.h"
+ +#include "nfp_net_sriov.h"
   #include "nfp_net.h"
   #include "nfp_main.h"
   #include "nfp_port.h"
@@@ -389,7 -388,7 +389,7 @@@ nfp_net_pf_app_init(struct nfp_pf *pf, 
                                         NFP_PF_CSR_SLICE_SIZE,
                                         &pf->ctrl_vnic_bar);
         if (IS_ERR(ctrl_bar)) {
- -              nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
+ +              nfp_err(pf->cpp, "Failed to find ctrl vNIC memory symbol\n");
                 err = PTR_ERR(ctrl_bar);
                 goto err_app_clean;
         }
@@@ -457,13 -456,9 +457,9 @@@ static int nfp_net_pf_app_start(struct 
   {
         int err;
   
-       err = nfp_net_pf_app_start_ctrl(pf);
-       if (err)
-               return err;
- 
         err = nfp_app_start(pf->app, pf->ctrl_vnic);
         if (err)
-               goto err_ctrl_stop;
+               return err;
   
         if (pf->num_vfs) {
                 err = nfp_app_sriov_enable(pf->app, pf->num_vfs);
@@@ -475,8 -470,6 +471,6 @@@
   
   err_app_stop:
         nfp_app_stop(pf->app);
- err_ctrl_stop:
-       nfp_net_pf_app_stop_ctrl(pf);
         return err;
   }
   
@@@ -485,13 -478,10 +479,12 @@@ static void nfp_net_pf_app_stop(struct 
         if (pf->num_vfs)
                 nfp_app_sriov_disable(pf->app);
         nfp_app_stop(pf->app);
-       nfp_net_pf_app_stop_ctrl(pf);
   }
   
   static void nfp_net_pci_unmap_mem(struct nfp_pf *pf)
   {
+ +      if (pf->vfcfg_tbl2_area)
+ +              nfp_cpp_area_release_free(pf->vfcfg_tbl2_area);
         if (pf->vf_cfg_bar)
                 nfp_cpp_area_release_free(pf->vf_cfg_bar);
         if (pf->mac_stats_bar)
@@@ -507,7 -497,7 +500,7 @@@ static int nfp_net_pci_map_mem(struct n
         int err;
   
         min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
- -      mem = nfp_net_pf_map_rtsym(pf, "net.ctrl", "_pf%d_net_bar0",
+ +      mem = nfp_net_pf_map_rtsym(pf, "net.bar0", "_pf%d_net_bar0",
                                    min_size, &pf->data_vnic_bar);
         if (IS_ERR(mem)) {
                 nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
@@@ -538,32 -528,17 +531,32 @@@
                 pf->vf_cfg_mem = NULL;
         }
   
+ +      min_size = NFP_NET_VF_CFG_SZ * pf->limit_vfs + NFP_NET_VF_CFG_MB_SZ;
+ +      pf->vfcfg_tbl2 = nfp_net_pf_map_rtsym(pf, "net.vfcfg_tbl2",
+ +                                            "_pf%d_net_vf_cfg2",
+ +                                            min_size, &pf->vfcfg_tbl2_area);
+ +      if (IS_ERR(pf->vfcfg_tbl2)) {
+ +              if (PTR_ERR(pf->vfcfg_tbl2) != -ENOENT) {
+ +                      err = PTR_ERR(pf->vfcfg_tbl2);
+ +                      goto err_unmap_vf_cfg;
+ +              }
+ +              pf->vfcfg_tbl2 = NULL;
+ +      }
+ +
         mem = nfp_cpp_map_area(pf->cpp, "net.qc", 0, 0,
                                NFP_PCIE_QUEUE(0), NFP_QCP_QUEUE_AREA_SZ,
                                &pf->qc_area);
         if (IS_ERR(mem)) {
                 nfp_err(pf->cpp, "Failed to map Queue Controller area.\n");
                 err = PTR_ERR(mem);
- -              goto err_unmap_vf_cfg;
+ +              goto err_unmap_vfcfg_tbl2;
         }
   
         return 0;
   
+ +err_unmap_vfcfg_tbl2:
+ +      if (pf->vfcfg_tbl2_area)
+ +              nfp_cpp_area_release_free(pf->vfcfg_tbl2_area);
   err_unmap_vf_cfg:
         if (pf->vf_cfg_bar)
                 nfp_cpp_area_release_free(pf->vf_cfg_bar);
@@@ -577,7 -552,7 +570,7 @@@ err_unmap_ctrl
   
   static void nfp_net_pci_remove_finish(struct nfp_pf *pf)
   {
-       nfp_net_pf_app_stop(pf);
+       nfp_net_pf_app_stop_ctrl(pf);
         /* stop app first, to avoid double free of ctrl vNIC's ddir */
         nfp_net_debugfs_dir_clean(&pf->ddir);
   
@@@ -708,6 -683,7 +701,7 @@@ int nfp_net_pci_probe(struct nfp_pf *pf
   {
         struct nfp_net_fw_version fw_ver;
         u8 __iomem *ctrl_bar, *qc_bar;
+       struct nfp_net *nn;
         int stride;
         int err;
   
@@@ -722,7 -698,7 +716,7 @@@
         if (!pf->rtbl) {
                 nfp_err(pf->cpp, "No %s, giving up.\n",
                         pf->fw_loaded ? "symbol table" : "firmware found");
- -              return -EPROBE_DEFER;
+ +              return -EINVAL;
         }
   
         mutex_lock(&pf->lock);
@@@ -784,7 -760,7 +778,7 @@@
         if (err)
                 goto err_free_vnics;
   
-       err = nfp_net_pf_app_start(pf);
+       err = nfp_net_pf_app_start_ctrl(pf);
         if (err)
                 goto err_free_irqs;
   
@@@ -792,12 -768,20 +786,20 @@@
         if (err)
                 goto err_stop_app;
   
+       err = nfp_net_pf_app_start(pf);
+       if (err)
+               goto err_clean_vnics;
+ 
         mutex_unlock(&pf->lock);
   
         return 0;
   
+ err_clean_vnics:
+       list_for_each_entry(nn, &pf->vnics, vnic_list)
+               if (nfp_net_is_data_vnic(nn))
+                       nfp_net_pf_clean_vnic(pf, nn);
   err_stop_app:
-       nfp_net_pf_app_stop(pf);
+       nfp_net_pf_app_stop_ctrl(pf);
   err_free_irqs:
         nfp_net_pf_free_irqs(pf);
   err_free_vnics:
@@@ -821,6 -805,8 +823,8 @@@ void nfp_net_pci_remove(struct nfp_pf *
         if (list_empty(&pf->vnics))
                 goto out;
   
+       nfp_net_pf_app_stop(pf);
+ 
         list_for_each_entry(nn, &pf->vnics, vnic_list)
                 if (nfp_net_is_data_vnic(nn))
                         nfp_net_pf_clean_vnic(pf, nn);
diff --combined drivers/net/ethernet/qlogic/qlge/qlge_dbg.c

index 458d55ba423f599ec824dc327f248a9b64d8af96,e3223f2fe2ffc9d4b186a42e0cac87fc37021afd..fe2599b83d09066353ecd98d8f33c0f5ae75756b
--- 1/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
--- 2/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
@@@ -144,23 -144,42 +144,23 @@@ static int ql_get_serdes_regs(struct ql
         xaui_direct_valid = xaui_indirect_valid = 1;
   
         /* The XAUI needs to be read out per port */
- -      if (qdev->func & 1) {
- -              /* We are NIC 2 */
- -              status = ql_read_other_func_serdes_reg(qdev,
- -                              XG_SERDES_XAUI_HSS_PCS_START, &temp);
- -              if (status)
- -                      temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
- -              if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
- -                                      XG_SERDES_ADDR_XAUI_PWR_DOWN)
- -                      xaui_indirect_valid = 0;
- -
- -              status = ql_read_serdes_reg(qdev,
- -                              XG_SERDES_XAUI_HSS_PCS_START, &temp);
- -              if (status)
- -                      temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
- -
- -              if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
- -                                      XG_SERDES_ADDR_XAUI_PWR_DOWN)
- -                      xaui_direct_valid = 0;
- -      } else {
- -              /* We are NIC 1 */
- -              status = ql_read_other_func_serdes_reg(qdev,
- -                              XG_SERDES_XAUI_HSS_PCS_START, &temp);
- -              if (status)
- -                      temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
- -              if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
- -                                      XG_SERDES_ADDR_XAUI_PWR_DOWN)
- -                      xaui_indirect_valid = 0;
- -
- -              status = ql_read_serdes_reg(qdev,
- -                              XG_SERDES_XAUI_HSS_PCS_START, &temp);
- -              if (status)
- -                      temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
- -              if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
- -                                      XG_SERDES_ADDR_XAUI_PWR_DOWN)
- -                      xaui_direct_valid = 0;
- -      }
+ +      status = ql_read_other_func_serdes_reg(qdev,
+ +                      XG_SERDES_XAUI_HSS_PCS_START, &temp);
+ +      if (status)
+ +              temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
+ +
+ +      if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
+ +                              XG_SERDES_ADDR_XAUI_PWR_DOWN)
+ +              xaui_indirect_valid = 0;
+ +
+ +      status = ql_read_serdes_reg(qdev, XG_SERDES_XAUI_HSS_PCS_START, &temp);
+ +
+ +      if (status)
+ +              temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
+ +
+ +      if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
+ +                              XG_SERDES_ADDR_XAUI_PWR_DOWN)
+ +              xaui_direct_valid = 0;
   
         /*
          * XFI register is shared so only need to read one
@@@ -705,7 -724,7 +705,7 @@@ static void ql_build_coredump_seg_heade
         seg_hdr->cookie = MPI_COREDUMP_COOKIE;
         seg_hdr->segNum = seg_number;
         seg_hdr->segSize = seg_size;
-       memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1);
+       strncpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1);
   }
   
   /*
diff --combined drivers/net/hyperv/netvsc_drv.c

index fac44c5c8d0d6fa5f4ced3e267f3b6a5e3f653f2,d91cbc6c3ca4eee43090bccc70c76b9a9d1fbb85..05ee870c3636cca58cac402d7ab98d1d111ad54d
--- 1/drivers/net/hyperv/netvsc_drv.c
--- 2/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@@ -33,9 -33,6 +33,9 @@@
   #include <linux/if_vlan.h>
   #include <linux/in.h>
   #include <linux/slab.h>
+ +#include <linux/rtnetlink.h>
+ +#include <linux/netpoll.h>
+ +
   #include <net/arp.h>
   #include <net/route.h>
   #include <net/sock.h>
@@@ -45,14 -42,8 +45,14 @@@
   
   #include "hyperv_net.h"
   
- -#define RING_SIZE_MIN 64
+ +#define RING_SIZE_MIN         64
+ +#define NETVSC_MIN_TX_SECTIONS        10
+ +#define NETVSC_DEFAULT_TX     192     /* ~1M */
+ +#define NETVSC_MIN_RX_SECTIONS        10      /* ~64K */
+ +#define NETVSC_DEFAULT_RX     2048    /* ~4M */
+ +
   #define LINKCHANGE_INT (2 * HZ)
+ +#define VF_TAKEOVER_INT (HZ / 10)
   
   static int ring_size = 128;
   module_param(ring_size, int, S_IRUGO);
@@@ -78,8 -69,7 +78,8 @@@ static void netvsc_set_multicast_list(s
   static int netvsc_open(struct net_device *net)
   {
         struct net_device_context *ndev_ctx = netdev_priv(net);
- -      struct netvsc_device *nvdev = ndev_ctx->nvdev;
+ +      struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+ +      struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev);
         struct rndis_device *rdev;
         int ret = 0;
   
@@@ -95,40 -85,22 +95,40 @@@
         netif_tx_wake_all_queues(net);
   
         rdev = nvdev->extension;
- -      if (!rdev->link_state && !ndev_ctx->datapath)
+ +
+ +      if (!rdev->link_state)
                 netif_carrier_on(net);
   
- -      return ret;
+ +      if (vf_netdev) {
+ +              /* Setting synthetic device up transparently sets
+ +               * slave as up. If open fails, then slave will be
+ +               * still be offline (and not used).
+ +               */
+ +              ret = dev_open(vf_netdev);
+ +              if (ret)
+ +                      netdev_warn(net,
+ +                                  "unable to open slave: %s: %d\n",
+ +                                  vf_netdev->name, ret);
+ +      }
+ +      return 0;
   }
   
   static int netvsc_close(struct net_device *net)
   {
         struct net_device_context *net_device_ctx = netdev_priv(net);
+ +      struct net_device *vf_netdev
+ +              = rtnl_dereference(net_device_ctx->vf_netdev);
         struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
- -      int ret;
+ +      int ret = 0;
         u32 aread, i, msec = 10, retry = 0, retry_max = 20;
         struct vmbus_channel *chn;
   
         netif_tx_disable(net);
   
+ +      /* No need to close rndis filter if it is removed already */
+ +      if (!nvdev)
+ +              goto out;
+ +
         ret = rndis_filter_close(nvdev);
         if (ret != 0) {
                 netdev_err(net, "unable to close device (ret %d).\n", ret);
@@@ -167,15 -139,11 +167,15 @@@
                 ret = -ETIMEDOUT;
         }
   
+ +out:
+ +      if (vf_netdev)
+ +              dev_close(vf_netdev);
+ +
         return ret;
   }
   
   static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
- -                              int pkt_type)
+ +                         int pkt_type)
   {
         struct rndis_packet *rndis_pkt;
         struct rndis_per_packet_info *ppi;
@@@ -195,12 -163,10 +195,12 @@@
         return ppi;
   }
   
- -/* Azure hosts don't support non-TCP port numbers in hashing yet. We compute
- - * hash for non-TCP traffic with only IP numbers.
+ +/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
+ + * packets. We can use ethtool to change UDP hash level when necessary.
    */
- -static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk)
+ +static inline u32 netvsc_get_hash(
+ +      struct sk_buff *skb,
+ +      const struct net_device_context *ndc)
   {
         struct flow_keys flow;
         u32 hash;
@@@ -211,11 -177,7 +211,11 @@@
         if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
                 return 0;
   
- -      if (flow.basic.ip_proto == IPPROTO_TCP) {
+ +      if (flow.basic.ip_proto == IPPROTO_TCP ||
+ +          (flow.basic.ip_proto == IPPROTO_UDP &&
+ +           ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) ||
+ +            (flow.basic.n_proto == htons(ETH_P_IPV6) &&
+ +             ndc->udp6_l4_hash)))) {
                 return skb_get_hash(skb);
         } else {
                 if (flow.basic.n_proto == htons(ETH_P_IP))
@@@ -238,7 -200,7 +238,7 @@@ static inline int netvsc_get_tx_queue(s
         struct sock *sk = skb->sk;
         int q_idx;
   
- -      q_idx = ndc->tx_send_table[netvsc_get_hash(skb, sk) &
+ +      q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) &
                                    (VRSS_SEND_TAB_SIZE - 1)];
   
         /* If queue index changed record the new value */
@@@ -260,11 -222,13 +260,11 @@@
    *
    * TODO support XPS - but get_xps_queue not exported
    */
- -static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
- -                      void *accel_priv, select_queue_fallback_t fallback)
+ +static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
   {
- -      unsigned int num_tx_queues = ndev->real_num_tx_queues;
         int q_idx = sk_tx_queue_get(skb->sk);
   
- -      if (q_idx < 0 || skb->ooo_okay) {
+ +      if (q_idx < 0 || skb->ooo_okay || q_idx >= ndev->real_num_tx_queues) {
                 /* If forwarding a packet, we use the recorded queue when
                  * available for better cache locality.
                  */
@@@ -274,35 -238,14 +274,35 @@@
                         q_idx = netvsc_get_tx_queue(ndev, skb, q_idx);
         }
   
- -      while (unlikely(q_idx >= num_tx_queues))
- -              q_idx -= num_tx_queues;
- -
         return q_idx;
   }
   
+ +static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+ +                             void *accel_priv,
+ +                             select_queue_fallback_t fallback)
+ +{
+ +      struct net_device_context *ndc = netdev_priv(ndev);
+ +      struct net_device *vf_netdev;
+ +      u16 txq;
+ +
+ +      rcu_read_lock();
+ +      vf_netdev = rcu_dereference(ndc->vf_netdev);
+ +      if (vf_netdev) {
+ +              txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+ +              qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+ +      } else {
+ +              txq = netvsc_pick_tx(ndev, skb);
+ +      }
+ +      rcu_read_unlock();
+ +
+ +      while (unlikely(txq >= ndev->real_num_tx_queues))
+ +              txq -= ndev->real_num_tx_queues;
+ +
+ +      return txq;
+ +}
+ +
   static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
- -                      struct hv_page_buffer *pb)
+ +                     struct hv_page_buffer *pb)
   {
         int j = 0;
   
@@@ -337,8 -280,9 +337,8 @@@
   
   static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
                            struct hv_netvsc_packet *packet,
- -                         struct hv_page_buffer **page_buf)
+ +                         struct hv_page_buffer *pb)
   {
- -      struct hv_page_buffer *pb = *page_buf;
         u32 slots_used = 0;
         char *data = skb->data;
         int frags = skb_shinfo(skb)->nr_frags;
@@@ -349,9 -293,10 +349,9 @@@
          * 2. skb linear data
          * 3. skb fragment data
          */
- -      if (hdr != NULL)
- -              slots_used += fill_pg_buf(virt_to_page(hdr),
- -                                      offset_in_page(hdr),
- -                                      len, &pb[slots_used]);
+ +      slots_used += fill_pg_buf(virt_to_page(hdr),
+ +                                offset_in_page(hdr),
+ +                                len, &pb[slots_used]);
   
         packet->rmsg_size = len;
         packet->rmsg_pgcnt = slots_used;
@@@ -414,40 -359,13 +414,40 @@@ static u32 net_checksum_info(struct sk_
   
                 if (ip6->nexthdr == IPPROTO_TCP)
                         return TRANSPORT_INFO_IPV6_TCP;
- -              else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
+ +              else if (ip6->nexthdr == IPPROTO_UDP)
                         return TRANSPORT_INFO_IPV6_UDP;
         }
   
         return TRANSPORT_INFO_NOT_IP;
   }
   
+ +/* Send skb on the slave VF device. */
+ +static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev,
+ +                        struct sk_buff *skb)
+ +{
+ +      struct net_device_context *ndev_ctx = netdev_priv(net);
+ +      unsigned int len = skb->len;
+ +      int rc;
+ +
+ +      skb->dev = vf_netdev;
+ +      skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
+ +
+ +      rc = dev_queue_xmit(skb);
+ +      if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) {
+ +              struct netvsc_vf_pcpu_stats *pcpu_stats
+ +                      = this_cpu_ptr(ndev_ctx->vf_stats);
+ +
+ +              u64_stats_update_begin(&pcpu_stats->syncp);
+ +              pcpu_stats->tx_packets++;
+ +              pcpu_stats->tx_bytes += len;
+ +              u64_stats_update_end(&pcpu_stats->syncp);
+ +      } else {
+ +              this_cpu_inc(ndev_ctx->vf_stats->tx_dropped);
+ +      }
+ +
+ +      return rc;
+ +}
+ +
   static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
   {
         struct net_device_context *net_device_ctx = netdev_priv(net);
@@@ -456,19 -374,11 +456,19 @@@
         unsigned int num_data_pgs;
         struct rndis_message *rndis_msg;
         struct rndis_packet *rndis_pkt;
+ +      struct net_device *vf_netdev;
         u32 rndis_msg_size;
         struct rndis_per_packet_info *ppi;
         u32 hash;
- -      struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
- -      struct hv_page_buffer *pb = page_buf;
+ +      struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT];
+ +
+ +      /* if VF is present and up then redirect packets
+ +       * already called with rcu_read_lock_bh
+ +       */
+ +      vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev);
+ +      if (vf_netdev && netif_running(vf_netdev) &&
+ +          !netpoll_tx_running(net))
+ +              return netvsc_vf_xmit(net, vf_netdev, skb);
   
         /* We will atmost need two pages to describe the rndis
          * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
@@@ -538,9 -448,9 +538,9 @@@
   
                 rndis_msg_size += NDIS_VLAN_PPI_SIZE;
                 ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
- -                                      IEEE_8021Q_INFO);
- -              vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
- -                                              ppi->ppi_offset);
+ +                                  IEEE_8021Q_INFO);
+ +
+ +              vlan = (void *)ppi + ppi->ppi_offset;
                 vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
                 vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
                                 VLAN_PRIO_SHIFT;
@@@ -553,7 -463,8 +553,7 @@@
                 ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
                                     TCP_LARGESEND_PKTINFO);
   
- -              lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
- -                                                      ppi->ppi_offset);
+ +              lso_info = (void *)ppi + ppi->ppi_offset;
   
                 lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
                 if (skb->protocol == htons(ETH_P_IP)) {
@@@ -613,12 -524,12 +613,12 @@@
         rndis_msg->msg_len += rndis_msg_size;
         packet->total_data_buflen = rndis_msg->msg_len;
         packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
- -                                             skb, packet, &pb);
+ +                                             skb, packet, pb);
   
         /* timestamp packet in software */
         skb_tx_timestamp(skb);
- -      ret = netvsc_send(net_device_ctx->device_ctx, packet,
- -                        rndis_msg, &pb, skb);
+ +
+ +      ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb);
         if (likely(ret == 0))
                 return NETDEV_TX_OK;
   
@@@ -640,7 -551,6 +640,7 @@@ no_memory
         ++net_device_ctx->eth_stats.tx_no_memory;
         goto drop;
   }
+ +
   /*
    * netvsc_linkstatus_callback - Link up/down notification
    */
@@@ -664,8 -574,8 +664,8 @@@ void netvsc_linkstatus_callback(struct 
         if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
                 u32 speed;
   
- -              speed = *(u32 *)((void *)indicate + indicate->
- -                               status_buf_offset) / 10000;
+ +              speed = *(u32 *)((void *)indicate
+ +                               + indicate->status_buf_offset) / 10000;
                 ndev_ctx->speed = speed;
                 return;
         }
@@@ -748,18 -658,29 +748,18 @@@ int netvsc_recv_callback(struct net_dev
         struct netvsc_device *net_device;
         u16 q_idx = channel->offermsg.offer.sub_channel_index;
         struct netvsc_channel *nvchan;
- -      struct net_device *vf_netdev;
         struct sk_buff *skb;
         struct netvsc_stats *rx_stats;
   
         if (net->reg_state != NETREG_REGISTERED)
                 return NVSP_STAT_FAIL;
   
- -      /*
- -       * If necessary, inject this packet into the VF interface.
- -       * On Hyper-V, multicast and brodcast packets are only delivered
- -       * to the synthetic interface (after subjecting these to
- -       * policy filters on the host). Deliver these via the VF
- -       * interface in the guest.
- -       */
         rcu_read_lock();
         net_device = rcu_dereference(net_device_ctx->nvdev);
         if (unlikely(!net_device))
                 goto drop;
   
         nvchan = &net_device->chan_table[q_idx];
- -      vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
- -      if (vf_netdev && (vf_netdev->flags & IFF_UP))
- -              net = vf_netdev;
   
         /* Allocate a skb - TODO direct I/O to pages? */
         skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
@@@ -771,7 -692,8 +771,7 @@@ drop
                 return NVSP_STAT_FAIL;
         }
   
- -      if (net != vf_netdev)
- -              skb_record_rx_queue(skb, q_idx);
+ +      skb_record_rx_queue(skb, q_idx);
   
         /*
          * Even if injecting the packet, record the statistics
@@@ -814,16 -736,39 +814,16 @@@ static void netvsc_get_channels(struct 
         }
   }
   
- -static int netvsc_set_queues(struct net_device *net, struct hv_device *dev,
- -                           u32 num_chn)
- -{
- -      struct netvsc_device_info device_info;
- -      int ret;
- -
- -      memset(&device_info, 0, sizeof(device_info));
- -      device_info.num_chn = num_chn;
- -      device_info.ring_size = ring_size;
- -      device_info.max_num_vrss_chns = num_chn;
- -
- -      ret = rndis_filter_device_add(dev, &device_info);
- -      if (ret)
- -              return ret;
- -
- -      ret = netif_set_real_num_tx_queues(net, num_chn);
- -      if (ret)
- -              return ret;
- -
- -      ret = netif_set_real_num_rx_queues(net, num_chn);
- -
- -      return ret;
- -}
- -
   static int netvsc_set_channels(struct net_device *net,
                                struct ethtool_channels *channels)
   {
         struct net_device_context *net_device_ctx = netdev_priv(net);
         struct hv_device *dev = net_device_ctx->device_ctx;
         struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
- -      unsigned int count = channels->combined_count;
- -      bool was_running;
- -      int ret;
+ +      unsigned int orig, count = channels->combined_count;
+ +      struct netvsc_device_info device_info;
+ +      bool was_opened;
+ +      int ret = 0;
   
         /* We do not support separate count for rx, tx, or other */
         if (count == 0 ||
@@@ -842,40 -787,25 +842,40 @@@
         if (count > nvdev->max_chn)
                 return -EINVAL;
   
- -      was_running = netif_running(net);
- -      if (was_running) {
- -              ret = netvsc_close(net);
- -              if (ret)
- -                      return ret;
- -      }
+ +      orig = nvdev->num_chn;
+ +      was_opened = rndis_filter_opened(nvdev);
+ +      if (was_opened)
+ +              rndis_filter_close(nvdev);
+ +
+ +      memset(&device_info, 0, sizeof(device_info));
+ +      device_info.num_chn = count;
+ +      device_info.ring_size = ring_size;
+ +      device_info.send_sections = nvdev->send_section_cnt;
+ +      device_info.recv_sections = nvdev->recv_section_cnt;
   
         rndis_filter_device_remove(dev, nvdev);
   
- -      ret = netvsc_set_queues(net, dev, count);
- -      if (ret == 0)
- -              nvdev->num_chn = count;
- -      else
- -              netvsc_set_queues(net, dev, nvdev->num_chn);
+ +      nvdev = rndis_filter_device_add(dev, &device_info);
+ +      if (!IS_ERR(nvdev)) {
+ +              netif_set_real_num_tx_queues(net, nvdev->num_chn);
+ +              netif_set_real_num_rx_queues(net, nvdev->num_chn);
+ +      } else {
+ +              ret = PTR_ERR(nvdev);
+ +              device_info.num_chn = orig;
+ +              nvdev = rndis_filter_device_add(dev, &device_info);
   
- -      if (was_running)
- -              ret = netvsc_open(net);
+ +              if (IS_ERR(nvdev)) {
+ +                      netdev_err(net, "restoring channel setting failed: %ld\n",
+ +                                 PTR_ERR(nvdev));
+ +                      return ret;
+ +              }
+ +      }
+ +
+ +      if (was_opened)
+ +              rndis_filter_open(nvdev);
   
         /* We may have missed link change notifications */
+ +      net_device_ctx->last_reconfig = 0;
         schedule_delayed_work(&net_device_ctx->dwork, 0);
   
         return ret;
@@@ -902,9 -832,6 +902,9 @@@ static void netvsc_init_settings(struc
   {
         struct net_device_context *ndc = netdev_priv(dev);
   
+ +      ndc->udp4_l4_hash = true;
+ +      ndc->udp6_l4_hash = true;
+ +
         ndc->speed = SPEED_UNKNOWN;
         ndc->duplex = DUPLEX_FULL;
   }
@@@ -942,61 -869,41 +942,61 @@@ static int netvsc_set_link_ksettings(st
   static int netvsc_change_mtu(struct net_device *ndev, int mtu)
   {
         struct net_device_context *ndevctx = netdev_priv(ndev);
+ +      struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
         struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
         struct hv_device *hdev = ndevctx->device_ctx;
+ +      int orig_mtu = ndev->mtu;
         struct netvsc_device_info device_info;
- -      bool was_running;
+ +      bool was_opened;
         int ret = 0;
   
         if (!nvdev || nvdev->destroy)
                 return -ENODEV;
   
- -      was_running = netif_running(ndev);
- -      if (was_running) {
- -              ret = netvsc_close(ndev);
+ +      /* Change MTU of underlying VF netdev first. */
+ +      if (vf_netdev) {
+ +              ret = dev_set_mtu(vf_netdev, mtu);
                 if (ret)
                         return ret;
         }
   
+ +      netif_device_detach(ndev);
+ +      was_opened = rndis_filter_opened(nvdev);
+ +      if (was_opened)
+ +              rndis_filter_close(nvdev);
+ +
         memset(&device_info, 0, sizeof(device_info));
         device_info.ring_size = ring_size;
         device_info.num_chn = nvdev->num_chn;
- -      device_info.max_num_vrss_chns = nvdev->num_chn;
+ +      device_info.send_sections = nvdev->send_section_cnt;
+ +      device_info.recv_sections = nvdev->recv_section_cnt;
   
         rndis_filter_device_remove(hdev, nvdev);
   
- -      /* 'nvdev' has been freed in rndis_filter_device_remove() ->
- -       * netvsc_device_remove () -> free_netvsc_device().
- -       * We mustn't access it before it's re-created in
- -       * rndis_filter_device_add() -> netvsc_device_add().
- -       */
- -
         ndev->mtu = mtu;
   
- -      rndis_filter_device_add(hdev, &device_info);
+ +      nvdev = rndis_filter_device_add(hdev, &device_info);
+ +      if (IS_ERR(nvdev)) {
+ +              ret = PTR_ERR(nvdev);
+ +
+ +              /* Attempt rollback to original MTU */
+ +              ndev->mtu = orig_mtu;
+ +              nvdev = rndis_filter_device_add(hdev, &device_info);
+ +
+ +              if (vf_netdev)
+ +                      dev_set_mtu(vf_netdev, orig_mtu);
+ +
+ +              if (IS_ERR(nvdev)) {
+ +                      netdev_err(ndev, "restoring mtu failed: %ld\n",
+ +                                 PTR_ERR(nvdev));
+ +                      return ret;
+ +              }
+ +      }
+ +
+ +      if (was_opened)
+ +              rndis_filter_open(nvdev);
   
- -      if (was_running)
- -              ret = netvsc_open(ndev);
+ +      netif_device_attach(ndev);
   
         /* We may have missed link change notifications */
         schedule_delayed_work(&ndevctx->dwork, 0);
@@@ -1004,56 -911,16 +1004,56 @@@
         return ret;
   }
   
+ +static void netvsc_get_vf_stats(struct net_device *net,
+ +                              struct netvsc_vf_pcpu_stats *tot)
+ +{
+ +      struct net_device_context *ndev_ctx = netdev_priv(net);
+ +      int i;
+ +
+ +      memset(tot, 0, sizeof(*tot));
+ +
+ +      for_each_possible_cpu(i) {
+ +              const struct netvsc_vf_pcpu_stats *stats
+ +                      = per_cpu_ptr(ndev_ctx->vf_stats, i);
+ +              u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ +              unsigned int start;
+ +
+ +              do {
+ +                      start = u64_stats_fetch_begin_irq(&stats->syncp);
+ +                      rx_packets = stats->rx_packets;
+ +                      tx_packets = stats->tx_packets;
+ +                      rx_bytes = stats->rx_bytes;
+ +                      tx_bytes = stats->tx_bytes;
+ +              } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ +
+ +              tot->rx_packets += rx_packets;
+ +              tot->tx_packets += tx_packets;
+ +              tot->rx_bytes   += rx_bytes;
+ +              tot->tx_bytes   += tx_bytes;
+ +              tot->tx_dropped += stats->tx_dropped;
+ +      }
+ +}
+ +
   static void netvsc_get_stats64(struct net_device *net,
                                struct rtnl_link_stats64 *t)
   {
         struct net_device_context *ndev_ctx = netdev_priv(net);
         struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+ +      struct netvsc_vf_pcpu_stats vf_tot;
         int i;
   
         if (!nvdev)
                 return;
   
+ +      netdev_stats_to_stats64(t, &net->stats);
+ +
+ +      netvsc_get_vf_stats(net, &vf_tot);
+ +      t->rx_packets += vf_tot.rx_packets;
+ +      t->tx_packets += vf_tot.tx_packets;
+ +      t->rx_bytes   += vf_tot.rx_bytes;
+ +      t->tx_bytes   += vf_tot.tx_bytes;
+ +      t->tx_dropped += vf_tot.tx_dropped;
+ +
         for (i = 0; i < nvdev->num_chn; i++) {
                 const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
                 const struct netvsc_stats *stats;
@@@ -1082,36 -949,33 +1082,36 @@@
                 t->rx_packets   += packets;
                 t->multicast    += multicast;
         }
- -
- -      t->tx_dropped   = net->stats.tx_dropped;
- -      t->tx_errors    = net->stats.tx_errors;
- -
- -      t->rx_dropped   = net->stats.rx_dropped;
- -      t->rx_errors    = net->stats.rx_errors;
   }
   
   static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
   {
+ +      struct net_device_context *ndc = netdev_priv(ndev);
+ +      struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev);
+ +      struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
         struct sockaddr *addr = p;
- -      char save_adr[ETH_ALEN];
- -      unsigned char save_aatype;
         int err;
   
- -      memcpy(save_adr, ndev->dev_addr, ETH_ALEN);
- -      save_aatype = ndev->addr_assign_type;
- -
- -      err = eth_mac_addr(ndev, p);
- -      if (err != 0)
+ +      err = eth_prepare_mac_addr_change(ndev, p);
+ +      if (err)
                 return err;
   
- -      err = rndis_filter_set_device_mac(ndev, addr->sa_data);
- -      if (err != 0) {
- -              /* roll back to saved MAC */
- -              memcpy(ndev->dev_addr, save_adr, ETH_ALEN);
- -              ndev->addr_assign_type = save_aatype;
+ +      if (!nvdev)
+ +              return -ENODEV;
+ +
+ +      if (vf_netdev) {
+ +              err = dev_set_mac_address(vf_netdev, addr);
+ +              if (err)
+ +                      return err;
+ +      }
+ +
+ +      err = rndis_filter_set_device_mac(nvdev, addr->sa_data);
+ +      if (!err) {
+ +              eth_commit_mac_addr_change(ndev, p);
+ +      } else if (vf_netdev) {
+ +              /* rollback change on VF */
+ +              memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN);
+ +              dev_set_mac_address(vf_netdev, addr);
         }
   
         return err;
@@@ -1126,18 -990,9 +1126,18 @@@ static const struct 
         { "tx_no_space",  offsetof(struct netvsc_ethtool_stats, tx_no_space) },
         { "tx_too_big",   offsetof(struct netvsc_ethtool_stats, tx_too_big) },
         { "tx_busy",      offsetof(struct netvsc_ethtool_stats, tx_busy) },
+ +      { "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) },
+ +      { "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) },
+ +}, vf_stats[] = {
+ +      { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
+ +      { "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
+ +      { "vf_tx_packets", offsetof(struct netvsc_vf_pcpu_stats, tx_packets) },
+ +      { "vf_tx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, tx_bytes) },
+ +      { "vf_tx_dropped", offsetof(struct netvsc_vf_pcpu_stats, tx_dropped) },
   };
   
   #define NETVSC_GLOBAL_STATS_LEN       ARRAY_SIZE(netvsc_stats)
+ +#define NETVSC_VF_STATS_LEN   ARRAY_SIZE(vf_stats)
   
   /* 4 statistics per queue (rx/tx packets/bytes) */
   #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
@@@ -1152,9 -1007,7 +1152,9 @@@ static int netvsc_get_sset_count(struc
   
         switch (string_set) {
         case ETH_SS_STATS:
- -              return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev);
+ +              return NETVSC_GLOBAL_STATS_LEN
+ +                      + NETVSC_VF_STATS_LEN
+ +                      + NETVSC_QUEUE_STATS_LEN(nvdev);
         default:
                 return -EINVAL;
         }
@@@ -1164,10 -1017,9 +1164,10 @@@ static void netvsc_get_ethtool_stats(st
                                      struct ethtool_stats *stats, u64 *data)
   {
         struct net_device_context *ndc = netdev_priv(dev);
- -      struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ +      struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
         const void *nds = &ndc->eth_stats;
         const struct netvsc_stats *qstats;
+ +      struct netvsc_vf_pcpu_stats sum;
         unsigned int start;
         u64 packets, bytes;
         int i, j;
@@@ -1178,10 -1030,6 +1178,10 @@@
         for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
                 data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
   
+ +      netvsc_get_vf_stats(dev, &sum);
+ +      for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
+ +              data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
+ +
         for (j = 0; j < nvdev->num_chn; j++) {
                 qstats = &nvdev->chan_table[j].tx_stats;
   
@@@ -1207,7 -1055,7 +1207,7 @@@
   static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
   {
         struct net_device_context *ndc = netdev_priv(dev);
- -      struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ +      struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
         u8 *p = data;
         int i;
   
@@@ -1216,16 -1064,11 +1216,16 @@@
   
         switch (stringset) {
         case ETH_SS_STATS:
- -              for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
- -                      memcpy(p + i * ETH_GSTRING_LEN,
- -                             netvsc_stats[i].name, ETH_GSTRING_LEN);
+ +              for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) {
+ +                      memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN);
+ +                      p += ETH_GSTRING_LEN;
+ +              }
+ +
+ +              for (i = 0; i < ARRAY_SIZE(vf_stats); i++) {
+ +                      memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN);
+ +                      p += ETH_GSTRING_LEN;
+ +              }
   
- -              p += i * ETH_GSTRING_LEN;
                 for (i = 0; i < nvdev->num_chn; i++) {
                         sprintf(p, "tx_queue_%u_packets", i);
                         p += ETH_GSTRING_LEN;
@@@ -1242,7 -1085,7 +1242,7 @@@
   }
   
   static int
- -netvsc_get_rss_hash_opts(struct netvsc_device *nvdev,
+ +netvsc_get_rss_hash_opts(struct net_device_context *ndc,
                          struct ethtool_rxnfc *info)
   {
         info->data = RXH_IP_SRC | RXH_IP_DST;
@@@ -1251,20 -1094,9 +1251,20 @@@
         case TCP_V4_FLOW:
         case TCP_V6_FLOW:
                 info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
- -              /* fallthrough */
+ +              break;
+ +
         case UDP_V4_FLOW:
+ +              if (ndc->udp4_l4_hash)
+ +                      info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ +
+ +              break;
+ +
         case UDP_V6_FLOW:
+ +              if (ndc->udp6_l4_hash)
+ +                      info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ +
+ +              break;
+ +
         case IPV4_FLOW:
         case IPV6_FLOW:
                 break;
@@@ -1281,7 -1113,7 +1281,7 @@@ netvsc_get_rxnfc(struct net_device *dev
                  u32 *rules)
   {
         struct net_device_context *ndc = netdev_priv(dev);
- -      struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ +      struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
   
         if (!nvdev)
                 return -ENODEV;
@@@ -1292,48 -1124,8 +1292,48 @@@
                 return 0;
   
         case ETHTOOL_GRXFH:
- -              return netvsc_get_rss_hash_opts(nvdev, info);
+ +              return netvsc_get_rss_hash_opts(ndc, info);
+ +      }
+ +      return -EOPNOTSUPP;
+ +}
+ +
+ +static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
+ +                                  struct ethtool_rxnfc *info)
+ +{
+ +      if (info->data == (RXH_IP_SRC | RXH_IP_DST |
+ +                         RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ +              if (info->flow_type == UDP_V4_FLOW)
+ +                      ndc->udp4_l4_hash = true;
+ +              else if (info->flow_type == UDP_V6_FLOW)
+ +                      ndc->udp6_l4_hash = true;
+ +              else
+ +                      return -EOPNOTSUPP;
+ +
+ +              return 0;
+ +      }
+ +
+ +      if (info->data == (RXH_IP_SRC | RXH_IP_DST)) {
+ +              if (info->flow_type == UDP_V4_FLOW)
+ +                      ndc->udp4_l4_hash = false;
+ +              else if (info->flow_type == UDP_V6_FLOW)
+ +                      ndc->udp6_l4_hash = false;
+ +              else
+ +                      return -EOPNOTSUPP;
+ +
+ +              return 0;
         }
+ +
+ +      return -EOPNOTSUPP;
+ +}
+ +
+ +static int
+ +netvsc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *info)
+ +{
+ +      struct net_device_context *ndc = netdev_priv(ndev);
+ +
+ +      if (info->cmd == ETHTOOL_SRXFH)
+ +              return netvsc_set_rss_hash_opts(ndc, info);
+ +
         return -EOPNOTSUPP;
   }
   
@@@ -1371,7 -1163,7 +1371,7 @@@ static int netvsc_get_rxfh(struct net_d
                            u8 *hfunc)
   {
         struct net_device_context *ndc = netdev_priv(dev);
- -      struct netvsc_device *ndev = rcu_dereference(ndc->nvdev);
+ +      struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
         struct rndis_device *rndis_dev;
         int i;
   
@@@ -1427,104 -1219,6 +1427,104 @@@ static int netvsc_set_rxfh(struct net_d
         return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn);
   }
   
+ +/* Hyper-V RNDIS protocol does not have ring in the HW sense.
+ + * It does have pre-allocated receive area which is divided into sections.
+ + */
+ +static void __netvsc_get_ringparam(struct netvsc_device *nvdev,
+ +                                 struct ethtool_ringparam *ring)
+ +{
+ +      u32 max_buf_size;
+ +
+ +      ring->rx_pending = nvdev->recv_section_cnt;
+ +      ring->tx_pending = nvdev->send_section_cnt;
+ +
+ +      if (nvdev->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
+ +              max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
+ +      else
+ +              max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+ +
+ +      ring->rx_max_pending = max_buf_size / nvdev->recv_section_size;
+ +      ring->tx_max_pending = NETVSC_SEND_BUFFER_SIZE
+ +              / nvdev->send_section_size;
+ +}
+ +
+ +static void netvsc_get_ringparam(struct net_device *ndev,
+ +                               struct ethtool_ringparam *ring)
+ +{
+ +      struct net_device_context *ndevctx = netdev_priv(ndev);
+ +      struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+ +
+ +      if (!nvdev)
+ +              return;
+ +
+ +      __netvsc_get_ringparam(nvdev, ring);
+ +}
+ +
+ +static int netvsc_set_ringparam(struct net_device *ndev,
+ +                              struct ethtool_ringparam *ring)
+ +{
+ +      struct net_device_context *ndevctx = netdev_priv(ndev);
+ +      struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+ +      struct hv_device *hdev = ndevctx->device_ctx;
+ +      struct netvsc_device_info device_info;
+ +      struct ethtool_ringparam orig;
+ +      u32 new_tx, new_rx;
+ +      bool was_opened;
+ +      int ret = 0;
+ +
+ +      if (!nvdev || nvdev->destroy)
+ +              return -ENODEV;
+ +
+ +      memset(&orig, 0, sizeof(orig));
+ +      __netvsc_get_ringparam(nvdev, &orig);
+ +
+ +      new_tx = clamp_t(u32, ring->tx_pending,
+ +                       NETVSC_MIN_TX_SECTIONS, orig.tx_max_pending);
+ +      new_rx = clamp_t(u32, ring->rx_pending,
+ +                       NETVSC_MIN_RX_SECTIONS, orig.rx_max_pending);
+ +
+ +      if (new_tx == orig.tx_pending &&
+ +          new_rx == orig.rx_pending)
+ +              return 0;        /* no change */
+ +
+ +      memset(&device_info, 0, sizeof(device_info));
+ +      device_info.num_chn = nvdev->num_chn;
+ +      device_info.ring_size = ring_size;
+ +      device_info.send_sections = new_tx;
+ +      device_info.recv_sections = new_rx;
+ +
+ +      netif_device_detach(ndev);
+ +      was_opened = rndis_filter_opened(nvdev);
+ +      if (was_opened)
+ +              rndis_filter_close(nvdev);
+ +
+ +      rndis_filter_device_remove(hdev, nvdev);
+ +
+ +      nvdev = rndis_filter_device_add(hdev, &device_info);
+ +      if (IS_ERR(nvdev)) {
+ +              ret = PTR_ERR(nvdev);
+ +
+ +              device_info.send_sections = orig.tx_pending;
+ +              device_info.recv_sections = orig.rx_pending;
+ +              nvdev = rndis_filter_device_add(hdev, &device_info);
+ +              if (IS_ERR(nvdev)) {
+ +                      netdev_err(ndev, "restoring ringparam failed: %ld\n",
+ +                                 PTR_ERR(nvdev));
+ +                      return ret;
+ +              }
+ +      }
+ +
+ +      if (was_opened)
+ +              rndis_filter_open(nvdev);
+ +      netif_device_attach(ndev);
+ +
+ +      /* We may have missed link change notifications */
+ +      ndevctx->last_reconfig = 0;
+ +      schedule_delayed_work(&ndevctx->dwork, 0);
+ +
+ +      return ret;
+ +}
+ +
   static const struct ethtool_ops ethtool_ops = {
         .get_drvinfo    = netvsc_get_drvinfo,
         .get_link       = ethtool_op_get_link,
@@@ -1535,15 -1229,12 +1535,15 @@@
         .set_channels   = netvsc_set_channels,
         .get_ts_info    = ethtool_op_get_ts_info,
         .get_rxnfc      = netvsc_get_rxnfc,
+ +      .set_rxnfc      = netvsc_set_rxnfc,
         .get_rxfh_key_size = netvsc_get_rxfh_key_size,
         .get_rxfh_indir_size = netvsc_rss_indir_size,
         .get_rxfh       = netvsc_get_rxfh,
         .set_rxfh       = netvsc_set_rxfh,
         .get_link_ksettings = netvsc_get_link_ksettings,
         .set_link_ksettings = netvsc_set_link_ksettings,
+ +      .get_ringparam  = netvsc_get_ringparam,
+ +      .set_ringparam  = netvsc_set_ringparam,
   };
   
   static const struct net_device_ops device_ops = {
@@@ -1578,7 -1269,12 +1578,12 @@@ static void netvsc_link_change(struct w
         bool notify = false, reschedule = false;
         unsigned long flags, next_reconfig, delay;
   
-       rtnl_lock();
+       /* if changes are happening, comeback later */
+       if (!rtnl_trylock()) {
+               schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT);
+               return;
+       }
+ 
         net_device = rtnl_dereference(ndev_ctx->nvdev);
         if (!net_device)
                 goto out_unlock;
@@@ -1617,7 -1313,8 +1622,7 @@@
         case RNDIS_STATUS_MEDIA_CONNECT:
                 if (rdev->link_state) {
                         rdev->link_state = false;
- -                      if (!ndev_ctx->datapath)
- -                              netif_carrier_on(net);
+ +                      netif_carrier_on(net);
                         netif_tx_wake_all_queues(net);
                 } else {
                         notify = true;
@@@ -1694,7 -1391,7 +1699,7 @@@ static struct net_device *get_netvsc_by
                         continue;       /* not a netvsc device */
   
                 net_device_ctx = netdev_priv(dev);
- -              if (net_device_ctx->nvdev == NULL)
+ +              if (!rtnl_dereference(net_device_ctx->nvdev))
                         continue;       /* device is removed */
   
                 if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev)
@@@ -1704,108 -1401,6 +1709,108 @@@
         return NULL;
   }
   
+ +/* Called when VF is injecting data into network stack.
+ + * Change the associated network device from VF to netvsc.
+ + * note: already called with rcu_read_lock
+ + */
+ +static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
+ +{
+ +      struct sk_buff *skb = *pskb;
+ +      struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data);
+ +      struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ +      struct netvsc_vf_pcpu_stats *pcpu_stats
+ +               = this_cpu_ptr(ndev_ctx->vf_stats);
+ +
+ +      skb->dev = ndev;
+ +
+ +      u64_stats_update_begin(&pcpu_stats->syncp);
+ +      pcpu_stats->rx_packets++;
+ +      pcpu_stats->rx_bytes += skb->len;
+ +      u64_stats_update_end(&pcpu_stats->syncp);
+ +
+ +      return RX_HANDLER_ANOTHER;
+ +}
+ +
+ +static int netvsc_vf_join(struct net_device *vf_netdev,
+ +                        struct net_device *ndev)
+ +{
+ +      struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ +      int ret;
+ +
+ +      ret = netdev_rx_handler_register(vf_netdev,
+ +                                       netvsc_vf_handle_frame, ndev);
+ +      if (ret != 0) {
+ +              netdev_err(vf_netdev,
+ +                         "can not register netvsc VF receive handler (err = %d)\n",
+ +                         ret);
+ +              goto rx_handler_failed;
+ +      }
+ +
+ +      ret = netdev_upper_dev_link(vf_netdev, ndev);
+ +      if (ret != 0) {
+ +              netdev_err(vf_netdev,
+ +                         "can not set master device %s (err = %d)\n",
+ +                         ndev->name, ret);
+ +              goto upper_link_failed;
+ +      }
+ +
+ +      /* set slave flag before open to prevent IPv6 addrconf */
+ +      vf_netdev->flags |= IFF_SLAVE;
+ +
+ +      schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+ +
+ +      call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
+ +
+ +      netdev_info(vf_netdev, "joined to %s\n", ndev->name);
+ +      return 0;
+ +
+ +upper_link_failed:
+ +      netdev_rx_handler_unregister(vf_netdev);
+ +rx_handler_failed:
+ +      return ret;
+ +}
+ +
+ +static void __netvsc_vf_setup(struct net_device *ndev,
+ +                            struct net_device *vf_netdev)
+ +{
+ +      int ret;
+ +
+ +      /* Align MTU of VF with master */
+ +      ret = dev_set_mtu(vf_netdev, ndev->mtu);
+ +      if (ret)
+ +              netdev_warn(vf_netdev,
+ +                          "unable to change mtu to %u\n", ndev->mtu);
+ +
+ +      if (netif_running(ndev)) {
+ +              ret = dev_open(vf_netdev);
+ +              if (ret)
+ +                      netdev_warn(vf_netdev,
+ +                                  "unable to open: %d\n", ret);
+ +      }
+ +}
+ +
+ +/* Setup VF as slave of the synthetic device.
+ + * Runs in workqueue to avoid recursion in netlink callbacks.
+ + */
+ +static void netvsc_vf_setup(struct work_struct *w)
+ +{
+ +      struct net_device_context *ndev_ctx
+ +              = container_of(w, struct net_device_context, vf_takeover.work);
+ +      struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+ +      struct net_device *vf_netdev;
+ +
+ +      if (!rtnl_trylock()) {
+ +              schedule_delayed_work(&ndev_ctx->vf_takeover, 0);
+ +              return;
+ +      }
+ +
+ +      vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+ +      if (vf_netdev)
+ +              __netvsc_vf_setup(ndev, vf_netdev);
+ +
+ +      rtnl_unlock();
+ +}
+ +
   static int netvsc_register_vf(struct net_device *vf_netdev)
   {
         struct net_device *ndev;
@@@ -1829,12 -1424,10 +1834,12 @@@
         if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
                 return NOTIFY_DONE;
   
+ +      if (netvsc_vf_join(vf_netdev, ndev) != 0)
+ +              return NOTIFY_DONE;
+ +
         netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
- -      /*
- -       * Take a reference on the module.
- -       */
+ +
+ +      /* Prevent this module from being unloaded while VF is registered */
         try_module_get(THIS_MODULE);
   
         dev_hold(vf_netdev);
@@@ -1844,9 -1437,9 +1849,9 @@@
   
   static int netvsc_vf_up(struct net_device *vf_netdev)
   {
- -      struct net_device *ndev;
- -      struct netvsc_device *netvsc_dev;
         struct net_device_context *net_device_ctx;
+ +      struct netvsc_device *netvsc_dev;
+ +      struct net_device *ndev;
   
         ndev = get_netvsc_byref(vf_netdev);
         if (!ndev)
@@@ -1854,24 -1447,33 +1859,24 @@@
   
         net_device_ctx = netdev_priv(ndev);
         netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
+ +      if (!netvsc_dev)
+ +              return NOTIFY_DONE;
   
- -      netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
- -
- -      /*
- -       * Open the device before switching data path.
- -       */
+ +      /* Bump refcount when datapath is acvive - Why? */
         rndis_filter_open(netvsc_dev);
   
- -      /*
- -       * notify the host to switch the data path.
- -       */
+ +      /* notify the host to switch the data path. */
         netvsc_switch_datapath(ndev, true);
         netdev_info(ndev, "Data path switched to VF: %s\n", vf_netdev->name);
   
- -      netif_carrier_off(ndev);
- -
- -      /* Now notify peers through VF device. */
- -      call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
- -
         return NOTIFY_OK;
   }
   
   static int netvsc_vf_down(struct net_device *vf_netdev)
   {
- -      struct net_device *ndev;
- -      struct netvsc_device *netvsc_dev;
         struct net_device_context *net_device_ctx;
+ +      struct netvsc_device *netvsc_dev;
+ +      struct net_device *ndev;
   
         ndev = get_netvsc_byref(vf_netdev);
         if (!ndev)
@@@ -1879,12 -1481,15 +1884,12 @@@
   
         net_device_ctx = netdev_priv(ndev);
         netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
+ +      if (!netvsc_dev)
+ +              return NOTIFY_DONE;
   
- -      netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
         netvsc_switch_datapath(ndev, false);
         netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
         rndis_filter_close(netvsc_dev);
- -      netif_carrier_on(ndev);
- -
- -      /* Now notify peers through netvsc device. */
- -      call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
   
         return NOTIFY_OK;
   }
@@@ -1899,11 -1504,9 +1904,11 @@@ static int netvsc_unregister_vf(struct 
                 return NOTIFY_DONE;
   
         net_device_ctx = netdev_priv(ndev);
+ +      cancel_delayed_work_sync(&net_device_ctx->vf_takeover);
   
         netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
   
+ +      netdev_upper_dev_unlink(vf_netdev, ndev);
         RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
         dev_put(vf_netdev);
         module_put(THIS_MODULE);
@@@ -1917,12 -1520,12 +1922,12 @@@ static int netvsc_probe(struct hv_devic
         struct net_device_context *net_device_ctx;
         struct netvsc_device_info device_info;
         struct netvsc_device *nvdev;
- -      int ret;
+ +      int ret = -ENOMEM;
   
         net = alloc_etherdev_mq(sizeof(struct net_device_context),
                                 VRSS_CHANNEL_MAX);
         if (!net)
- -              return -ENOMEM;
+ +              goto no_net;
   
         netif_carrier_off(net);
   
@@@ -1941,12 -1544,6 +1946,12 @@@
   
         spin_lock_init(&net_device_ctx->lock);
         INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
+ +      INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
+ +
+ +      net_device_ctx->vf_stats
+ +              = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
+ +      if (!net_device_ctx->vf_stats)
+ +              goto no_stats;
   
         net->netdev_ops = &device_ops;
         net->ethtool_ops = &ethtool_ops;
@@@ -1959,16 -1556,13 +1964,16 @@@
         memset(&device_info, 0, sizeof(device_info));
         device_info.ring_size = ring_size;
         device_info.num_chn = VRSS_CHANNEL_DEFAULT;
- -      ret = rndis_filter_device_add(dev, &device_info);
- -      if (ret != 0) {
+ +      device_info.send_sections = NETVSC_DEFAULT_TX;
+ +      device_info.recv_sections = NETVSC_DEFAULT_RX;
+ +
+ +      nvdev = rndis_filter_device_add(dev, &device_info);
+ +      if (IS_ERR(nvdev)) {
+ +              ret = PTR_ERR(nvdev);
                 netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
- -              free_netdev(net);
- -              hv_set_drvdata(dev, NULL);
- -              return ret;
+ +              goto rndis_failed;
         }
+ +
         memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
   
         /* hw_features computed in rndis_filter_device_add */
@@@ -1977,11 -1571,11 +1982,11 @@@
                 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
         net->vlan_features = net->features;
   
- -      /* RCU not necessary here, device not registered */
- -      nvdev = net_device_ctx->nvdev;
         netif_set_real_num_tx_queues(net, nvdev->num_chn);
         netif_set_real_num_rx_queues(net, nvdev->num_chn);
   
+ +      netdev_lockdep_set_classes(net);
+ +
         /* MTU range: 68 - 1500 or 65521 */
         net->min_mtu = NETVSC_MTU_MIN;
         if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
@@@ -1992,20 -1586,11 +1997,20 @@@
         ret = register_netdev(net);
         if (ret != 0) {
                 pr_err("Unable to register netdev.\n");
- -              rndis_filter_device_remove(dev, nvdev);
- -              free_netdev(net);
+ +              goto register_failed;
         }
   
         return ret;
+ +
+ +register_failed:
+ +      rndis_filter_device_remove(dev, nvdev);
+ +rndis_failed:
+ +      free_percpu(net_device_ctx->vf_stats);
+ +no_stats:
+ +      hv_set_drvdata(dev, NULL);
+ +      free_netdev(net);
+ +no_net:
+ +      return ret;
   }
   
   static int netvsc_remove(struct hv_device *dev)
@@@ -2031,15 -1616,13 +2036,15 @@@
          * removed. Also blocks mtu and channel changes.
          */
         rtnl_lock();
- -      rndis_filter_device_remove(dev, ndev_ctx->nvdev);
+ +      rndis_filter_device_remove(dev,
+ +                                 rtnl_dereference(ndev_ctx->nvdev));
         rtnl_unlock();
   
         unregister_netdev(net);
   
         hv_set_drvdata(dev, NULL);
   
+ +      free_percpu(ndev_ctx->vf_stats);
         free_netdev(net);
         return 0;
   }
diff --combined drivers/net/phy/phy.c

index dae13f028c84ee177800a138dd80e3a5228a2d60,d0626bf5c540911b0d15bdbab1b960145b6d124c..e842d2cd1ee750f8930028370c1c7fac5a52dc77
--- 1/drivers/net/phy/phy.c
--- 2/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@@ -30,6 -30,7 +30,6 @@@
   #include <linux/ethtool.h>
   #include <linux/phy.h>
   #include <linux/phy_led_triggers.h>
- -#include <linux/timer.h>
   #include <linux/workqueue.h>
   #include <linux/mdio.h>
   #include <linux/io.h>
@@@ -38,6 -39,42 +38,6 @@@
   
   #include <asm/irq.h>
   
- -static const char *phy_speed_to_str(int speed)
- -{
- -      switch (speed) {
- -      case SPEED_10:
- -              return "10Mbps";
- -      case SPEED_100:
- -              return "100Mbps";
- -      case SPEED_1000:
- -              return "1Gbps";
- -      case SPEED_2500:
- -              return "2.5Gbps";
- -      case SPEED_5000:
- -              return "5Gbps";
- -      case SPEED_10000:
- -              return "10Gbps";
- -      case SPEED_14000:
- -              return "14Gbps";
- -      case SPEED_20000:
- -              return "20Gbps";
- -      case SPEED_25000:
- -              return "25Gbps";
- -      case SPEED_40000:
- -              return "40Gbps";
- -      case SPEED_50000:
- -              return "50Gbps";
- -      case SPEED_56000:
- -              return "56Gbps";
- -      case SPEED_100000:
- -              return "100Gbps";
- -      case SPEED_UNKNOWN:
- -              return "Unknown";
- -      default:
- -              return "Unsupported (update phy.c)";
- -      }
- -}
- -
   #define PHY_STATE_STR(_state)                 \
         case PHY_##_state:                      \
                 return __stringify(_state);     \
@@@ -73,7 -110,7 +73,7 @@@ void phy_print_status(struct phy_devic
                 netdev_info(phydev->attached_dev,
                         "Link is Up - %s/%s - flow control %s\n",
                         phy_speed_to_str(phydev->speed),
- -                      DUPLEX_FULL == phydev->duplex ? "Full" : "Half",
+ +                      phy_duplex_to_str(phydev->duplex),
                         phydev->pause ? "rx/tx" : "off");
         } else  {
                 netdev_info(phydev->attached_dev, "Link is Down\n");
@@@ -157,6 -194,123 +157,6 @@@ int phy_aneg_done(struct phy_device *ph
   }
   EXPORT_SYMBOL(phy_aneg_done);
   
- -/* A structure for mapping a particular speed and duplex
- - * combination to a particular SUPPORTED and ADVERTISED value
- - */
- -struct phy_setting {
- -      int speed;
- -      int duplex;
- -      u32 setting;
- -};
- -
- -/* A mapping of all SUPPORTED settings to speed/duplex.  This table
- - * must be grouped by speed and sorted in descending match priority
- - * - iow, descending speed. */
- -static const struct phy_setting settings[] = {
- -      {
- -              .speed = SPEED_10000,
- -              .duplex = DUPLEX_FULL,
- -              .setting = SUPPORTED_10000baseKR_Full,
- -      },
- -      {
- -              .speed = SPEED_10000,
- -              .duplex = DUPLEX_FULL,
- -              .setting = SUPPORTED_10000baseKX4_Full,
- -      },
- -      {
- -              .speed = SPEED_10000,
- -              .duplex = DUPLEX_FULL,
- -              .setting = SUPPORTED_10000baseT_Full,
- -      },
- -      {
- -              .speed = SPEED_2500,
- -              .duplex = DUPLEX_FULL,
- -              .setting = SUPPORTED_2500baseX_Full,
- -      },
- -      {
- -              .speed = SPEED_1000,
- -              .duplex = DUPLEX_FULL,
- -              .setting = SUPPORTED_1000baseKX_Full,
- -      },
- -      {
- -              .speed = SPEED_1000,
- -              .duplex = DUPLEX_FULL,
- -              .setting = SUPPORTED_1000baseT_Full,
- -      },
- -      {
- -              .speed = SPEED_1000,
- -              .duplex = DUPLEX_HALF,
- -              .setting = SUPPORTED_1000baseT_Half,
- -      },
- -      {
- -              .speed = SPEED_100,
- -              .duplex = DUPLEX_FULL,
- -              .setting = SUPPORTED_100baseT_Full,
- -      },
- -      {
- -              .speed = SPEED_100,
- -              .duplex = DUPLEX_HALF,
- -              .setting = SUPPORTED_100baseT_Half,
- -      },
- -      {
- -              .speed = SPEED_10,
- -              .duplex = DUPLEX_FULL,
- -              .setting = SUPPORTED_10baseT_Full,
- -      },
- -      {
- -              .speed = SPEED_10,
- -              .duplex = DUPLEX_HALF,
- -              .setting = SUPPORTED_10baseT_Half,
- -      },
- -};
- -
- -/**
- - * phy_lookup_setting - lookup a PHY setting
- - * @speed: speed to match
- - * @duplex: duplex to match
- - * @features: allowed link modes
- - * @exact: an exact match is required
- - *
- - * Search the settings array for a setting that matches the speed and
- - * duplex, and which is supported.
- - *
- - * If @exact is unset, either an exact match or %NULL for no match will
- - * be returned.
- - *
- - * If @exact is set, an exact match, the fastest supported setting at
- - * or below the specified speed, the slowest supported setting, or if
- - * they all fail, %NULL will be returned.
- - */
- -static const struct phy_setting *
- -phy_lookup_setting(int speed, int duplex, u32 features, bool exact)
- -{
- -      const struct phy_setting *p, *match = NULL, *last = NULL;
- -      int i;
- -
- -      for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) {
- -              if (p->setting & features) {
- -                      last = p;
- -                      if (p->speed == speed && p->duplex == duplex) {
- -                              /* Exact match for speed and duplex */
- -                              match = p;
- -                              break;
- -                      } else if (!exact) {
- -                              if (!match && p->speed <= speed)
- -                                      /* Candidate */
- -                                      match = p;
- -
- -                              if (p->speed < speed)
- -                                      break;
- -                      }
- -              }
- -      }
- -
- -      if (!match && !exact)
- -              match = last;
- -
- -      return match;
- -}
- -
   /**
    * phy_find_valid - find a PHY setting that matches the requested parameters
    * @speed: desired speed
@@@ -173,9 -327,7 +173,9 @@@
   static const struct phy_setting *
   phy_find_valid(int speed, int duplex, u32 supported)
   {
- -      return phy_lookup_setting(speed, duplex, supported, false);
+ +      unsigned long mask = supported;
+ +
+ +      return phy_lookup_setting(speed, duplex, &mask, BITS_PER_LONG, false);
   }
   
   /**
@@@ -192,9 -344,16 +192,9 @@@ unsigned int phy_supported_speeds(struc
                                   unsigned int *speeds,
                                   unsigned int size)
   {
- -      unsigned int count = 0;
- -      unsigned int idx = 0;
+ +      unsigned long supported = phy->supported;
   
- -      for (idx = 0; idx < ARRAY_SIZE(settings) && count < size; idx++)
- -              /* Assumes settings are grouped by speed */
- -              if ((settings[idx].setting & phy->supported) &&
- -                  (count == 0 || speeds[count - 1] != settings[idx].speed))
- -                      speeds[count++] = settings[idx].speed;
- -
- -      return count;
+ +      return phy_speeds(speeds, size, &supported, BITS_PER_LONG);
   }
   
   /**
@@@ -208,9 -367,7 +208,9 @@@
    */
   static inline bool phy_check_valid(int speed, int duplex, u32 features)
   {
- -      return !!phy_lookup_setting(speed, duplex, features, true);
+ +      unsigned long mask = features;
+ +
+ +      return !!phy_lookup_setting(speed, duplex, &mask, BITS_PER_LONG, true);
   }
   
   /**
@@@ -548,15 -705,14 +548,15 @@@ EXPORT_SYMBOL(phy_start_aneg)
    *
    * Description: The PHY infrastructure can run a state machine
    *   which tracks whether the PHY is starting up, negotiating,
- - *   etc.  This function starts the timer which tracks the state
- - *   of the PHY.  If you want to maintain your own state machine,
+ + *   etc.  This function starts the delayed workqueue which tracks
+ + *   the state of the PHY. If you want to maintain your own state machine,
    *   do not call this function.
    */
   void phy_start_machine(struct phy_device *phydev)
   {
         queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
   }
+ +EXPORT_SYMBOL_GPL(phy_start_machine);
   
   /**
    * phy_trigger_machine - trigger the state machine to run
@@@ -581,9 -737,9 +581,9 @@@ void phy_trigger_machine(struct phy_dev
    * phy_stop_machine - stop the PHY state machine tracking
    * @phydev: target phy_device struct
    *
- - * Description: Stops the state machine timer, sets the state to UP
- - *   (unless it wasn't up yet). This function must be called BEFORE
- - *   phy_detach.
+ + * Description: Stops the state machine delayed workqueue, sets the
+ + *   state to UP (unless it wasn't up yet). This function must be
+ + *   called BEFORE phy_detach.
    */
   void phy_stop_machine(struct phy_device *phydev)
   {
@@@ -593,9 -749,6 +593,6 @@@
         if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
                 phydev->state = PHY_UP;
         mutex_unlock(&phydev->lock);
- 
-       /* Now we can run the state machine synchronously */
-       phy_state_machine(&phydev->state_queue.work);
   }
   
   /**
@@@ -866,15 -1019,9 +863,15 @@@ void phy_start(struct phy_device *phyde
   }
   EXPORT_SYMBOL(phy_start);
   
- -static void phy_adjust_link(struct phy_device *phydev)
+ +static void phy_link_up(struct phy_device *phydev)
   {
- -      phydev->adjust_link(phydev->attached_dev);
+ +      phydev->phy_link_change(phydev, true, true);
+ +      phy_led_trigger_change_speed(phydev);
+ +}
+ +
+ +static void phy_link_down(struct phy_device *phydev, bool do_carrier)
+ +{
+ +      phydev->phy_link_change(phydev, false, do_carrier);
         phy_led_trigger_change_speed(phydev);
   }
   
@@@ -919,7 -1066,8 +916,7 @@@ void phy_state_machine(struct work_stru
                 /* If the link is down, give up on negotiation for now */
                 if (!phydev->link) {
                         phydev->state = PHY_NOLINK;
- -                      netif_carrier_off(phydev->attached_dev);
- -                      phy_adjust_link(phydev);
+ +                      phy_link_down(phydev, true);
                         break;
                 }
   
@@@ -931,7 -1079,9 +928,7 @@@
                 /* If AN is done, we're running */
                 if (err > 0) {
                         phydev->state = PHY_RUNNING;
- -                      netif_carrier_on(phydev->attached_dev);
- -                      phy_adjust_link(phydev);
- -
+ +                      phy_link_up(phydev);
                 } else if (0 == phydev->link_timeout--)
                         needs_aneg = true;
                 break;
@@@ -956,7 -1106,8 +953,7 @@@
                                 }
                         }
                         phydev->state = PHY_RUNNING;
- -                      netif_carrier_on(phydev->attached_dev);
- -                      phy_adjust_link(phydev);
+ +                      phy_link_up(phydev);
                 }
                 break;
         case PHY_FORCING:
@@@ -966,12 -1117,13 +963,12 @@@
   
                 if (phydev->link) {
                         phydev->state = PHY_RUNNING;
- -                      netif_carrier_on(phydev->attached_dev);
+ +                      phy_link_up(phydev);
                 } else {
                         if (0 == phydev->link_timeout--)
                                 needs_aneg = true;
+ +                      phy_link_down(phydev, false);
                 }
- -
- -              phy_adjust_link(phydev);
                 break;
         case PHY_RUNNING:
                 /* Only register a CHANGE if we are polling and link changed
@@@ -1003,12 -1155,14 +1000,12 @@@
   
                 if (phydev->link) {
                         phydev->state = PHY_RUNNING;
- -                      netif_carrier_on(phydev->attached_dev);
+ +                      phy_link_up(phydev);
                 } else {
                         phydev->state = PHY_NOLINK;
- -                      netif_carrier_off(phydev->attached_dev);
+ +                      phy_link_down(phydev, true);
                 }
   
- -              phy_adjust_link(phydev);
- -
                 if (phy_interrupt_is_valid(phydev))
                         err = phy_config_interrupt(phydev,
                                                    PHY_INTERRUPT_ENABLED);
@@@ -1016,7 -1170,8 +1013,7 @@@
         case PHY_HALTED:
                 if (phydev->link) {
                         phydev->link = 0;
- -                      netif_carrier_off(phydev->attached_dev);
- -                      phy_adjust_link(phydev);
+ +                      phy_link_down(phydev, true);
                         do_suspend = true;
                 }
                 break;
@@@ -1036,11 -1191,11 +1033,11 @@@
   
                                 if (phydev->link) {
                                         phydev->state = PHY_RUNNING;
- -                                      netif_carrier_on(phydev->attached_dev);
+ +                                      phy_link_up(phydev);
                                 } else  {
                                         phydev->state = PHY_NOLINK;
+ +                                      phy_link_down(phydev, false);
                                 }
- -                              phy_adjust_link(phydev);
                         } else {
                                 phydev->state = PHY_AN;
                                 phydev->link_timeout = PHY_AN_TIMEOUT;
@@@ -1052,11 -1207,11 +1049,11 @@@
   
                         if (phydev->link) {
                                 phydev->state = PHY_RUNNING;
- -                              netif_carrier_on(phydev->attached_dev);
+ +                              phy_link_up(phydev);
                         } else  {
                                 phydev->state = PHY_NOLINK;
+ +                              phy_link_down(phydev, false);
                         }
- -                      phy_adjust_link(phydev);
                 }
                 break;
         }
@@@ -1071,10 -1226,9 +1068,10 @@@
         if (err < 0)
                 phy_error(phydev);
   
- -      phydev_dbg(phydev, "PHY state change %s -> %s\n",
- -                 phy_state_to_str(old_state),
- -                 phy_state_to_str(phydev->state));
+ +      if (old_state != phydev->state)
+ +              phydev_dbg(phydev, "PHY state change %s -> %s\n",
+ +                         phy_state_to_str(old_state),
+ +                         phy_state_to_str(phydev->state));
   
         /* Only re-schedule a PHY state machine change if we are polling the
          * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
diff --combined drivers/net/phy/phy_device.c

index 9493fb369682eba2f5a6b80e3517ec9f31598746,2f742ae5b92ee7d7be080ec60fca7958f722e576..810f6fd2f6391e508432091d171b42664869013c
--- 1/drivers/net/phy/phy_device.c
--- 2/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@@ -688,19 -688,6 +688,19 @@@ struct phy_device *phy_find_first(struc
   }
   EXPORT_SYMBOL(phy_find_first);
   
+ +static void phy_link_change(struct phy_device *phydev, bool up, bool do_carrier)
+ +{
+ +      struct net_device *netdev = phydev->attached_dev;
+ +
+ +      if (do_carrier) {
+ +              if (up)
+ +                      netif_carrier_on(netdev);
+ +              else
+ +                      netif_carrier_off(netdev);
+ +      }
+ +      phydev->adjust_link(netdev);
+ +}
+ +
   /**
    * phy_prepare_link - prepares the PHY layer to monitor link status
    * @phydev: target phy_device struct
@@@ -877,15 -864,17 +877,17 @@@ EXPORT_SYMBOL(phy_attached_info)
   #define ATTACHED_FMT "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)"
   void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
   {
+       const char *drv_name = phydev->drv ? phydev->drv->name : "unbound";
+ 
         if (!fmt) {
                 dev_info(&phydev->mdio.dev, ATTACHED_FMT "\n",
-                        phydev->drv->name, phydev_name(phydev),
+                        drv_name, phydev_name(phydev),
                          phydev->irq);
         } else {
                 va_list ap;
   
                 dev_info(&phydev->mdio.dev, ATTACHED_FMT,
-                        phydev->drv->name, phydev_name(phydev),
+                        drv_name, phydev_name(phydev),
                          phydev->irq);
   
                 va_start(ap, fmt);
@@@ -964,7 -953,6 +966,7 @@@ int phy_attach_direct(struct net_devic
                 goto error;
         }
   
+ +      phydev->phy_link_change = phy_link_change;
         phydev->attached_dev = dev;
         dev->phydev = phydev;
   
@@@ -1084,7 -1072,6 +1086,7 @@@ void phy_detach(struct phy_device *phyd
         phydev->attached_dev->phydev = NULL;
         phydev->attached_dev = NULL;
         phy_suspend(phydev);
+ +      phydev->phylink = NULL;
   
         phy_led_triggers_unregister(phydev);
   
diff --combined drivers/net/usb/cdc_ncm.c

index 811b18215cae1f3f6636d15050d5e76ea5be441f,9c80e80c5493b4f5a5d56c06f0d188debe53eca1..47cab1bde0659a518cea55e1ae54d9dccccdaa25
--- 1/drivers/net/usb/cdc_ncm.c
--- 2/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@@ -367,7 -367,7 +367,7 @@@ static struct attribute *cdc_ncm_sysfs_
         NULL,
   };
   
- -static struct attribute_group cdc_ncm_sysfs_attr_group = {
+ +static const struct attribute_group cdc_ncm_sysfs_attr_group = {
         .name = "cdc_ncm",
         .attrs = cdc_ncm_sysfs_attrs,
   };
@@@ -1758,6 -1758,13 +1758,13 @@@ static const struct usb_device_id cdc_d
           .driver_info = (unsigned long)&wwan_noarp_info,
         },
   
+       /* u-blox TOBY-L4 */
+       { USB_DEVICE_AND_INTERFACE_INFO(0x1546, 0x1010,
+               USB_CLASS_COMM,
+               USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE),
+         .driver_info = (unsigned long)&wwan_info,
+       },
+ 
         /* Generic CDC-NCM devices */
         { USB_INTERFACE_INFO(USB_CLASS_COMM,
                 USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE),
diff --combined drivers/net/virtio_net.c

index 52ae78ca3d3865e8ce589de047ac3268a133fadf,b06169ea60dc9d519f9a7673e9aea809539dbd5c..511f8339fa963c300d41cfd2e27480ea43c6ed73
--- 1/drivers/net/virtio_net.c
--- 2/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@@ -57,13 -57,6 +57,13 @@@ DECLARE_EWMA(pkt_len, 0, 64
   
   #define VIRTNET_DRIVER_VERSION "1.0.0"
   
+ +static const unsigned long guest_offloads[] = {
+ +      VIRTIO_NET_F_GUEST_TSO4,
+ +      VIRTIO_NET_F_GUEST_TSO6,
+ +      VIRTIO_NET_F_GUEST_ECN,
+ +      VIRTIO_NET_F_GUEST_UFO
+ +};
+ +
   struct virtnet_stats {
         struct u64_stats_sync tx_syncp;
         struct u64_stats_sync rx_syncp;
@@@ -171,13 -164,10 +171,13 @@@ struct virtnet_info 
         u8 ctrl_promisc;
         u8 ctrl_allmulti;
         u16 ctrl_vid;
+ +      u64 ctrl_offloads;
   
         /* Ethtool settings */
         u8 duplex;
         u32 speed;
+ +
+ +      unsigned long guest_offloads;
   };
   
   struct padded_vnet_hdr {
@@@ -280,23 -270,6 +280,23 @@@ static void skb_xmit_done(struct virtqu
                 netif_wake_subqueue(vi->dev, vq2txq(vq));
   }
   
+ +#define MRG_CTX_HEADER_SHIFT 22
+ +static void *mergeable_len_to_ctx(unsigned int truesize,
+ +                                unsigned int headroom)
+ +{
+ +      return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
+ +}
+ +
+ +static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
+ +{
+ +      return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
+ +}
+ +
+ +static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
+ +{
+ +      return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
+ +}
+ +
   /* Called from bottom half context */
   static struct sk_buff *page_to_skb(struct virtnet_info *vi,
                                    struct receive_queue *rq,
@@@ -319,7 -292,7 +319,7 @@@
   
         hdr_len = vi->hdr_len;
         if (vi->mergeable_rx_bufs)
- -              hdr_padded_len = sizeof *hdr;
+ +              hdr_padded_len = sizeof(*hdr);
         else
                 hdr_padded_len = sizeof(struct padded_vnet_hdr);
   
@@@ -417,85 -390,19 +417,85 @@@ static unsigned int virtnet_get_headroo
         return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
   }
   
+ +/* We copy the packet for XDP in the following cases:
+ + *
+ + * 1) Packet is scattered across multiple rx buffers.
+ + * 2) Headroom space is insufficient.
+ + *
+ + * This is inefficient but it's a temporary condition that
+ + * we hit right after XDP is enabled and until queue is refilled
+ + * with large buffers with sufficient headroom - so it should affect
+ + * at most queue size packets.
+ + * Afterwards, the conditions to enable
+ + * XDP should preclude the underlying device from sending packets
+ + * across multiple buffers (num_buf > 1), and we make sure buffers
+ + * have enough headroom.
+ + */
+ +static struct page *xdp_linearize_page(struct receive_queue *rq,
+ +                                     u16 *num_buf,
+ +                                     struct page *p,
+ +                                     int offset,
+ +                                     int page_off,
+ +                                     unsigned int *len)
+ +{
+ +      struct page *page = alloc_page(GFP_ATOMIC);
+ +
+ +      if (!page)
+ +              return NULL;
+ +
+ +      memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
+ +      page_off += *len;
+ +
+ +      while (--*num_buf) {
+ +              unsigned int buflen;
+ +              void *buf;
+ +              int off;
+ +
+ +              buf = virtqueue_get_buf(rq->vq, &buflen);
+ +              if (unlikely(!buf))
+ +                      goto err_buf;
+ +
+ +              p = virt_to_head_page(buf);
+ +              off = buf - page_address(p);
+ +
+ +              /* guard against a misconfigured or uncooperative backend that
+ +               * is sending packet larger than the MTU.
+ +               */
+ +              if ((page_off + buflen) > PAGE_SIZE) {
+ +                      put_page(p);
+ +                      goto err_buf;
+ +              }
+ +
+ +              memcpy(page_address(page) + page_off,
+ +                     page_address(p) + off, buflen);
+ +              page_off += buflen;
+ +              put_page(p);
+ +      }
+ +
+ +      /* Headroom does not contribute to packet length */
+ +      *len = page_off - VIRTIO_XDP_HEADROOM;
+ +      return page;
+ +err_buf:
+ +      __free_pages(page, 0);
+ +      return NULL;
+ +}
+ +
   static struct sk_buff *receive_small(struct net_device *dev,
                                      struct virtnet_info *vi,
                                      struct receive_queue *rq,
- -                                   void *buf, unsigned int len)
+ +                                   void *buf, void *ctx,
+ +                                   unsigned int len)
   {
         struct sk_buff *skb;
         struct bpf_prog *xdp_prog;
- -      unsigned int xdp_headroom = virtnet_get_headroom(vi);
+ +      unsigned int xdp_headroom = (unsigned long)ctx;
         unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
         unsigned int headroom = vi->hdr_len + header_offset;
         unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
                               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ +      struct page *page = virt_to_head_page(buf);
         unsigned int delta = 0;
+ +      struct page *xdp_page;
         len -= vi->hdr_len;
   
         rcu_read_lock();
@@@ -509,27 -416,6 +509,27 @@@
                 if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
                         goto err_xdp;
   
+ +              if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
+ +                      int offset = buf - page_address(page) + header_offset;
+ +                      unsigned int tlen = len + vi->hdr_len;
+ +                      u16 num_buf = 1;
+ +
+ +                      xdp_headroom = virtnet_get_headroom(vi);
+ +                      header_offset = VIRTNET_RX_PAD + xdp_headroom;
+ +                      headroom = vi->hdr_len + header_offset;
+ +                      buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+ +                               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ +                      xdp_page = xdp_linearize_page(rq, &num_buf, page,
+ +                                                    offset, header_offset,
+ +                                                    &tlen);
+ +                      if (!xdp_page)
+ +                              goto err_xdp;
+ +
+ +                      buf = page_address(xdp_page);
+ +                      put_page(page);
+ +                      page = xdp_page;
+ +              }
+ +
                 xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
                 xdp.data = xdp.data_hard_start + xdp_headroom;
                 xdp.data_end = xdp.data + len;
@@@ -558,7 -444,7 +558,7 @@@
   
         skb = build_skb(buf, buflen);
         if (!skb) {
- -              put_page(virt_to_head_page(buf));
+ +              put_page(page);
                 goto err;
         }
         skb_reserve(skb, headroom - delta);
@@@ -574,7 -460,7 +574,7 @@@ err
   err_xdp:
         rcu_read_unlock();
         dev->stats.rx_dropped++;
- -      put_page(virt_to_head_page(buf));
+ +      put_page(page);
   xdp_xmit:
         return NULL;
   }
@@@ -599,6 -485,66 +599,6 @@@ err
         return NULL;
   }
   
- -/* The conditions to enable XDP should preclude the underlying device from
- - * sending packets across multiple buffers (num_buf > 1). However per spec
- - * it does not appear to be illegal to do so but rather just against convention.
- - * So in order to avoid making a system unresponsive the packets are pushed
- - * into a page and the XDP program is run. This will be extremely slow and we
- - * push a warning to the user to fix this as soon as possible. Fixing this may
- - * require resolving the underlying hardware to determine why multiple buffers
- - * are being received or simply loading the XDP program in the ingress stack
- - * after the skb is built because there is no advantage to running it here
- - * anymore.
- - */
- -static struct page *xdp_linearize_page(struct receive_queue *rq,
- -                                     u16 *num_buf,
- -                                     struct page *p,
- -                                     int offset,
- -                                     unsigned int *len)
- -{
- -      struct page *page = alloc_page(GFP_ATOMIC);
- -      unsigned int page_off = VIRTIO_XDP_HEADROOM;
- -
- -      if (!page)
- -              return NULL;
- -
- -      memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
- -      page_off += *len;
- -
- -      while (--*num_buf) {
- -              unsigned int buflen;
- -              void *buf;
- -              int off;
- -
- -              buf = virtqueue_get_buf(rq->vq, &buflen);
- -              if (unlikely(!buf))
- -                      goto err_buf;
- -
- -              p = virt_to_head_page(buf);
- -              off = buf - page_address(p);
- -
- -              /* guard against a misconfigured or uncooperative backend that
- -               * is sending packet larger than the MTU.
- -               */
- -              if ((page_off + buflen) > PAGE_SIZE) {
- -                      put_page(p);
- -                      goto err_buf;
- -              }
- -
- -              memcpy(page_address(page) + page_off,
- -                     page_address(p) + off, buflen);
- -              page_off += buflen;
- -              put_page(p);
- -      }
- -
- -      /* Headroom does not contribute to packet length */
- -      *len = page_off - VIRTIO_XDP_HEADROOM;
- -      return page;
- -err_buf:
- -      __free_pages(page, 0);
- -      return NULL;
- -}
- -
   static struct sk_buff *receive_mergeable(struct net_device *dev,
                                          struct virtnet_info *vi,
                                          struct receive_queue *rq,
@@@ -613,7 -559,6 +613,7 @@@
         struct sk_buff *head_skb, *curr_skb;
         struct bpf_prog *xdp_prog;
         unsigned int truesize;
+ +      unsigned int headroom = mergeable_ctx_to_headroom(ctx);
   
         head_skb = NULL;
   
@@@ -626,13 -571,10 +626,13 @@@
                 u32 act;
   
                 /* This happens when rx buffer size is underestimated */
- -              if (unlikely(num_buf > 1)) {
+ +              if (unlikely(num_buf > 1 ||
+ +                           headroom < virtnet_get_headroom(vi))) {
                         /* linearize data for XDP */
                         xdp_page = xdp_linearize_page(rq, &num_buf,
- -                                                    page, offset, &len);
+ +                                                    page, offset,
+ +                                                    VIRTIO_XDP_HEADROOM,
+ +                                                    &len);
                         if (!xdp_page)
                                 goto err_xdp;
                         offset = VIRTIO_XDP_HEADROOM;
@@@ -697,14 -639,13 +697,14 @@@
         }
         rcu_read_unlock();
   
- -      if (unlikely(len > (unsigned long)ctx)) {
+ +      truesize = mergeable_ctx_to_truesize(ctx);
+ +      if (unlikely(len > truesize)) {
                 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
                          dev->name, len, (unsigned long)ctx);
                 dev->stats.rx_length_errors++;
                 goto err_skb;
         }
- -      truesize = (unsigned long)ctx;
+ +
         head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
         curr_skb = head_skb;
   
@@@ -724,14 -665,13 +724,14 @@@
                 }
   
                 page = virt_to_head_page(buf);
- -              if (unlikely(len > (unsigned long)ctx)) {
+ +
+ +              truesize = mergeable_ctx_to_truesize(ctx);
+ +              if (unlikely(len > truesize)) {
                         pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
                                  dev->name, len, (unsigned long)ctx);
                         dev->stats.rx_length_errors++;
                         goto err_skb;
                 }
- -              truesize = (unsigned long)ctx;
   
                 num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
                 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
@@@ -814,7 -754,7 +814,7 @@@ static int receive_buf(struct virtnet_i
         else if (vi->big_packets)
                 skb = receive_big(dev, vi, rq, buf, len);
         else
- -              skb = receive_small(dev, vi, rq, buf, len);
+ +              skb = receive_small(dev, vi, rq, buf, ctx, len);
   
         if (unlikely(!skb))
                 return 0;
@@@ -847,18 -787,12 +847,18 @@@ frame_err
         return 0;
   }
   
+ +/* Unlike mergeable buffers, all buffers are allocated to the
+ + * same size, except for the headroom. For this reason we do
+ + * not need to use  mergeable_len_to_ctx here - it is enough
+ + * to store the headroom as the context ignoring the truesize.
+ + */
   static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
                              gfp_t gfp)
   {
         struct page_frag *alloc_frag = &rq->alloc_frag;
         char *buf;
         unsigned int xdp_headroom = virtnet_get_headroom(vi);
+ +      void *ctx = (void *)(unsigned long)xdp_headroom;
         int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
         int err;
   
@@@ -872,9 -806,10 +872,9 @@@
         alloc_frag->offset += len;
         sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
                     vi->hdr_len + GOOD_PACKET_LEN);
- -      err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
+ +      err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
         if (err < 0)
                 put_page(virt_to_head_page(buf));
- -
         return err;
   }
   
@@@ -967,7 -902,7 +967,7 @@@ static int add_recvbuf_mergeable(struc
         }
   
         sg_init_one(rq->sg, buf, len);
- -      ctx = (void *)(unsigned long)len;
+ +      ctx = mergeable_len_to_ctx(len, headroom);
         err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
         if (err < 0)
                 put_page(virt_to_head_page(buf));
@@@ -1079,7 -1014,7 +1079,7 @@@ static int virtnet_receive(struct recei
         void *buf;
         struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
   
- -      if (vi->mergeable_rx_bufs) {
+ +      if (!vi->big_packets || vi->mergeable_rx_bufs) {
                 void *ctx;
   
                 while (received < budget &&
@@@ -1123,7 -1058,7 +1123,7 @@@ static void free_old_xmit_skbs(struct s
                 bytes += skb->len;
                 packets++;
   
-               dev_kfree_skb_any(skb);
+               dev_consume_skb_any(skb);
         }
   
         /* Avoid overhead when no packets have been processed
@@@ -1878,6 -1813,7 +1878,6 @@@ static void virtnet_freeze_down(struct 
   }
   
   static int init_vqs(struct virtnet_info *vi);
- -static void _remove_vq_common(struct virtnet_info *vi);
   
   static int virtnet_restore_up(struct virtio_device *vdev)
   {
@@@ -1906,45 -1842,37 +1906,45 @@@
         return err;
   }
   
- -static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
+ +static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
   {
- -      struct virtio_device *dev = vi->vdev;
- -      int ret;
+ +      struct scatterlist sg;
+ +      vi->ctrl_offloads = cpu_to_virtio64(vi->vdev, offloads);
   
- -      virtio_config_disable(dev);
- -      dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
- -      virtnet_freeze_down(dev);
- -      _remove_vq_common(vi);
+ +      sg_init_one(&sg, &vi->ctrl_offloads, sizeof(vi->ctrl_offloads));
   
- -      virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
- -      virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
+ +      if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
+ +                                VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
+ +              dev_warn(&vi->dev->dev, "Fail to set guest offload. \n");
+ +              return -EINVAL;
+ +      }
   
- -      ret = virtio_finalize_features(dev);
- -      if (ret)
- -              goto err;
+ +      return 0;
+ +}
   
- -      vi->xdp_queue_pairs = xdp_qp;
- -      ret = virtnet_restore_up(dev);
- -      if (ret)
- -              goto err;
- -      ret = _virtnet_set_queues(vi, curr_qp);
- -      if (ret)
- -              goto err;
+ +static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
+ +{
+ +      u64 offloads = 0;
   
- -      virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
- -      virtio_config_enable(dev);
- -      return 0;
- -err:
- -      virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
- -      return ret;
+ +      if (!vi->guest_offloads)
+ +              return 0;
+ +
+ +      if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+ +              offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+ +
+ +      return virtnet_set_guest_offloads(vi, offloads);
+ +}
+ +
+ +static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
+ +{
+ +      u64 offloads = vi->guest_offloads;
+ +
+ +      if (!vi->guest_offloads)
+ +              return 0;
+ +      if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+ +              offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+ +
+ +      return virtnet_set_guest_offloads(vi, offloads);
   }
   
   static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
@@@ -1956,11 -1884,10 +1956,11 @@@
         u16 xdp_qp = 0, curr_qp;
         int i, err;
   
- -      if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
- -          virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
- -          virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
- -          virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) {
+ +      if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
+ +          && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+ +              virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
+ +              virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
+ +              virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
                 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
                 return -EOPNOTSUPP;
         }
@@@ -1994,35 -1921,35 +1994,35 @@@
                         return PTR_ERR(prog);
         }
   
- -      /* Changing the headroom in buffers is a disruptive operation because
- -       * existing buffers must be flushed and reallocated. This will happen
- -       * when a xdp program is initially added or xdp is disabled by removing
- -       * the xdp program resulting in number of XDP queues changing.
- -       */
- -      if (vi->xdp_queue_pairs != xdp_qp) {
- -              err = virtnet_reset(vi, curr_qp + xdp_qp, xdp_qp);
- -              if (err) {
- -                      dev_warn(&dev->dev, "XDP reset failure.\n");
- -                      goto virtio_reset_err;
- -              }
- -      }
+ +      /* Make sure NAPI is not using any XDP TX queues for RX. */
+ +      for (i = 0; i < vi->max_queue_pairs; i++)
+ +              napi_disable(&vi->rq[i].napi);
   
         netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+ +      err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+ +      if (err)
+ +              goto err;
+ +      vi->xdp_queue_pairs = xdp_qp;
   
         for (i = 0; i < vi->max_queue_pairs; i++) {
                 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
                 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+ +              if (i == 0) {
+ +                      if (!old_prog)
+ +                              virtnet_clear_guest_offloads(vi);
+ +                      if (!prog)
+ +                              virtnet_restore_guest_offloads(vi);
+ +              }
                 if (old_prog)
                         bpf_prog_put(old_prog);
+ +              virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
         }
   
         return 0;
   
- -virtio_reset_err:
- -      /* On reset error do our best to unwind XDP changes inflight and return
- -       * error up to user space for resolution. The underlying reset hung on
- -       * us so not much we can do here.
- -       */
+ +err:
+ +      for (i = 0; i < vi->max_queue_pairs; i++)
+ +              virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
         if (prog)
                 bpf_prog_sub(prog, vi->max_queue_pairs - 1);
         return err;
@@@ -2255,7 -2182,7 +2255,7 @@@ static int virtnet_find_vqs(struct virt
         names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
         if (!names)
                 goto err_names;
- -      if (vi->mergeable_rx_bufs) {
+ +      if (!vi->big_packets || vi->mergeable_rx_bufs) {
                 ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
                 if (!ctx)
                         goto err_ctx;
@@@ -2376,7 -2303,7 +2376,7 @@@ err
   
   #ifdef CONFIG_SYSFS
   static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
- -              struct rx_queue_attribute *attribute, char *buf)
+ +              char *buf)
   {
         struct virtnet_info *vi = netdev_priv(queue->dev);
         unsigned int queue_index = get_netdev_rx_queue_index(queue);
@@@ -2501,7 -2428,7 +2501,7 @@@ static int virtnet_probe(struct virtio_
                         dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
   
                 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
- -                      dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
+ +                      dev->hw_features |= NETIF_F_TSO
                                 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
                 }
                 /* Individual feature bits: what can host handle? */
@@@ -2511,11 -2438,13 +2511,11 @@@
                         dev->hw_features |= NETIF_F_TSO6;
                 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
                         dev->hw_features |= NETIF_F_TSO_ECN;
- -              if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
- -                      dev->hw_features |= NETIF_F_UFO;
   
                 dev->features |= NETIF_F_GSO_ROBUST;
   
                 if (gso)
- -                      dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
+ +                      dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
                 /* (!csum && gso) case will be fixed by register_netdev() */
         }
         if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
@@@ -2648,10 -2577,6 +2648,10 @@@
                 netif_carrier_on(dev);
         }
   
+ +      for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
+ +              if (virtio_has_feature(vi->vdev, guest_offloads[i]))
+ +                      set_bit(guest_offloads[i], &vi->guest_offloads);
+ +
         pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
                  dev->name, max_queue_pairs);
   
@@@ -2672,6 -2597,15 +2672,6 @@@ free
         return err;
   }
   
- -static void _remove_vq_common(struct virtnet_info *vi)
- -{
- -      vi->vdev->config->reset(vi->vdev);
- -      free_unused_bufs(vi);
- -      _free_receive_bufs(vi);
- -      free_receive_page_frags(vi);
- -      virtnet_del_vqs(vi);
- -}
- -
   static void remove_vq_common(struct virtnet_info *vi)
   {
         vi->vdev->config->reset(vi->vdev);
@@@ -2703,7 -2637,8 +2703,7 @@@ static void virtnet_remove(struct virti
         free_netdev(vi->dev);
   }
   
- -#ifdef CONFIG_PM_SLEEP
- -static int virtnet_freeze(struct virtio_device *vdev)
+ +static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
   {
         struct virtnet_info *vi = vdev->priv;
   
@@@ -2714,7 -2649,7 +2714,7 @@@
         return 0;
   }
   
- -static int virtnet_restore(struct virtio_device *vdev)
+ +static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
   {
         struct virtnet_info *vi = vdev->priv;
         int err;
@@@ -2730,6 -2665,7 +2730,6 @@@
   
         return 0;
   }
- -#endif
   
   static struct virtio_device_id id_table[] = {
         { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
@@@ -2746,7 -2682,7 +2746,7 @@@
         VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
         VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
         VIRTIO_NET_F_CTRL_MAC_ADDR, \
- -      VIRTIO_NET_F_MTU
+ +      VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
   
   static unsigned int features[] = {
         VIRTNET_FEATURES,
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/internal.h

index 79020cf8c79cfd7b09a4310dd4f94d31c5254fdc,a1ea9ef97ed97adc31e7c399b66123cbab8058e1..4fb7647995c3964590e3d4b984da054e887016cb
--- 1/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
--- 2/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@@ -661,16 -661,10 +661,16 @@@ static inline void iwl_pcie_sw_reset(st
         usleep_range(5000, 6000);
   }
   
+ +static inline u8 iwl_pcie_get_cmd_index(struct iwl_txq *q, u32 index)
+ +{
+ +      return index & (q->n_window - 1);
+ +}
+ +
   static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
                                      struct iwl_txq *txq, int idx)
   {
- -      return txq->tfds + trans_pcie->tfd_size * idx;
+ +      return txq->tfds + trans_pcie->tfd_size * iwl_pcie_get_cmd_index(txq,
+ +                                                                       idx);
   }
   
   static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
@@@ -732,6 -726,11 +732,6 @@@ static inline bool iwl_queue_used(cons
                 !(i < q->read_ptr && i >= q->write_ptr);
   }
   
- -static inline u8 get_cmd_index(struct iwl_txq *q, u32 index)
- -{
- -      return index & (q->n_window - 1);
- -}
- -
   static inline bool iwl_is_rfkill_set(struct iwl_trans *trans)
   {
         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@@ -788,11 -787,13 +788,13 @@@ int iwl_pci_fw_enter_d0i3(struct iwl_tr
   
   void iwl_pcie_enable_rx_wake(struct iwl_trans *trans, bool enable);
   
+ void iwl_pcie_rx_allocator_work(struct work_struct *data);
+ 
   /* common functions that are used by gen2 transport */
   void iwl_pcie_apm_config(struct iwl_trans *trans);
   int iwl_pcie_prepare_card_hw(struct iwl_trans *trans);
   void iwl_pcie_synchronize_irqs(struct iwl_trans *trans);
- -bool iwl_trans_check_hw_rf_kill(struct iwl_trans *trans);
+ +bool iwl_pcie_check_hw_rf_kill(struct iwl_trans *trans);
   void iwl_trans_pcie_handle_stop_rfkill(struct iwl_trans *trans,
                                        bool was_in_rfkill);
   void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq);
@@@ -807,8 -808,6 +809,8 @@@ int iwl_pcie_alloc_dma_ptr(struct iwl_t
                            struct iwl_dma_ptr *ptr, size_t size);
   void iwl_pcie_free_dma_ptr(struct iwl_trans *trans, struct iwl_dma_ptr *ptr);
   void iwl_pcie_apply_destination(struct iwl_trans *trans);
+ +void iwl_pcie_free_tso_page(struct iwl_trans_pcie *trans_pcie,
+ +                          struct sk_buff *skb);
   #ifdef CONFIG_INET
   struct iwl_tso_hdr_page *get_page_hdr(struct iwl_trans *trans, size_t len);
   #endif
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/rx.c

index e5d2bf0bde3738b898398a3f4483be95a610ae86,942736d3fa75521018580aed3518dd2fea251fc6..a06b6612b6583d6b5efa1d2396bc2060a2ffa37f
--- 1/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
--- 2/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@@ -597,7 -597,7 +597,7 @@@ static void iwl_pcie_rx_allocator_get(s
         rxq->free_count += RX_CLAIM_REQ_ALLOC;
   }
   
- static void iwl_pcie_rx_allocator_work(struct work_struct *data)
+ void iwl_pcie_rx_allocator_work(struct work_struct *data)
   {
         struct iwl_rb_allocator *rba_p =
                 container_of(data, struct iwl_rb_allocator, rx_alloc);
@@@ -900,10 -900,6 +900,6 @@@ static int _iwl_pcie_rx_init(struct iwl
                         return err;
         }
         def_rxq = trans_pcie->rxq;
-       if (!rba->alloc_wq)
-               rba->alloc_wq = alloc_workqueue("rb_allocator",
-                                               WQ_HIGHPRI | WQ_UNBOUND, 1);
-       INIT_WORK(&rba->rx_alloc, iwl_pcie_rx_allocator_work);
   
         spin_lock(&rba->lock);
         atomic_set(&rba->req_pending, 0);
@@@ -1017,10 -1013,6 +1013,6 @@@ void iwl_pcie_rx_free(struct iwl_trans 
         }
   
         cancel_work_sync(&rba->rx_alloc);
-       if (rba->alloc_wq) {
-               destroy_workqueue(rba->alloc_wq);
-               rba->alloc_wq = NULL;
-       }
   
         iwl_pcie_free_rbs_pool(trans);
   
@@@ -1176,7 -1168,7 +1168,7 @@@ static void iwl_pcie_rx_handle_rb(struc
   
                 sequence = le16_to_cpu(pkt->hdr.sequence);
                 index = SEQ_TO_INDEX(sequence);
- -              cmd_index = get_cmd_index(txq, index);
+ +              cmd_index = iwl_pcie_get_cmd_index(txq, index);
   
                 if (rxq->id == 0)
                         iwl_op_mode_rx(trans->op_mode, &rxq->napi,
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/trans.c

index 58873cc27396f038812b33658a916cff1e408000,3927bbf04f727d5e0b9a9c0a9d1c78cee42e3d94..2e3e013ec95acf94eecb843edef305864cf6939b
--- 1/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
--- 2/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@@ -986,7 -986,7 +986,7 @@@ static int iwl_pcie_load_given_ucode_80
                                                &first_ucode_section);
   }
   
- -bool iwl_trans_check_hw_rf_kill(struct iwl_trans *trans)
+ +bool iwl_pcie_check_hw_rf_kill(struct iwl_trans *trans)
   {
         struct iwl_trans_pcie *trans_pcie =  IWL_TRANS_GET_PCIE_TRANS(trans);
         bool hw_rfkill = iwl_is_rfkill_set(trans);
@@@ -1252,7 -1252,7 +1252,7 @@@ static int iwl_trans_pcie_start_fw(stru
         mutex_lock(&trans_pcie->mutex);
   
         /* If platform's RF_KILL switch is NOT set to KILL */
- -      hw_rfkill = iwl_trans_check_hw_rf_kill(trans);
+ +      hw_rfkill = iwl_pcie_check_hw_rf_kill(trans);
         if (hw_rfkill && !run_in_rfkill) {
                 ret = -ERFKILL;
                 goto out;
@@@ -1300,7 -1300,7 +1300,7 @@@
                 ret = iwl_pcie_load_given_ucode(trans, fw);
   
         /* re-check RF-Kill state since we may have missed the interrupt */
- -      hw_rfkill = iwl_trans_check_hw_rf_kill(trans);
+ +      hw_rfkill = iwl_pcie_check_hw_rf_kill(trans);
         if (hw_rfkill && !run_in_rfkill)
                 ret = -ERFKILL;
   
@@@ -1663,7 -1663,7 +1663,7 @@@ static int _iwl_trans_pcie_start_hw(str
         trans_pcie->is_down = false;
   
         /* ...rfkill can call stop_device and set it false if needed */
- -      iwl_trans_check_hw_rf_kill(trans);
+ +      iwl_pcie_check_hw_rf_kill(trans);
   
         /* Make sure we sync here, because we'll need full access later */
         if (low_power)
@@@ -1786,6 -1786,11 +1786,11 @@@ void iwl_trans_pcie_free(struct iwl_tra
                 iwl_pcie_tx_free(trans);
         iwl_pcie_rx_free(trans);
   
+       if (trans_pcie->rba.alloc_wq) {
+               destroy_workqueue(trans_pcie->rba.alloc_wq);
+               trans_pcie->rba.alloc_wq = NULL;
+       }
+ 
         if (trans_pcie->msix_enabled) {
                 for (i = 0; i < trans_pcie->alloc_vecs; i++) {
                         irq_set_affinity_hint(
@@@ -1842,8 -1847,8 +1847,8 @@@ static bool iwl_trans_pcie_grab_nic_acc
          * These bits say the device is running, and should keep running for
          * at least a short while (at least as long as MAC_ACCESS_REQ stays 1),
          * but they do not indicate that embedded SRAM is restored yet;
- -       * 3945 and 4965 have volatile SRAM, and must save/restore contents
- -       * to/from host DRAM when sleeping/waking for power-saving.
+ +       * HW with volatile SRAM must save/restore contents to/from
+ +       * host DRAM when sleeping/waking for power-saving.
          * Each direction takes approximately 1/4 millisecond; with this
          * overhead, it's a good idea to grab and hold MAC_ACCESS_REQUEST if a
          * series of register accesses are expected (e.g. reading Event Log),
@@@ -1851,9 -1856,8 +1856,9 @@@
          *
          * CSR_UCODE_DRV_GP1 register bit MAC_SLEEP == 0 indicates that
          * SRAM is okay/restored.  We don't check that here because this call
- -       * is just for hardware register access; but GP1 MAC_SLEEP check is a
- -       * good idea before accessing 3945/4965 SRAM (e.g. reading Event Log).
+ +       * is just for hardware register access; but GP1 MAC_SLEEP
+ +       * check is a good idea before accessing the SRAM of HW with
+ +       * volatile SRAM (e.g. reading Event Log).
          *
          * 5000 series and later (including 1000 series) have non-volatile SRAM,
          * and do not save/restore SRAM when power cycling.
@@@ -2835,7 -2839,7 +2840,7 @@@ static struct iwl_trans_dump_dat
         spin_lock_bh(&cmdq->lock);
         ptr = cmdq->write_ptr;
         for (i = 0; i < cmdq->n_window; i++) {
- -              u8 idx = get_cmd_index(cmdq, ptr);
+ +              u8 idx = iwl_pcie_get_cmd_index(cmdq, ptr);
                 u32 caplen, cmdlen;
   
                 cmdlen = iwl_trans_pcie_get_cmdlen(trans, cmdq->tfds +
@@@ -3138,18 -3142,7 +3143,18 @@@ struct iwl_trans *iwl_trans_pcie_alloc(
                 iwl_set_bit(trans, CSR_HOST_CHICKEN,
                             CSR_HOST_CHICKEN_PM_IDLE_SRC_DIS_SB_PME);
   
+ +#if IS_ENABLED(CONFIG_IWLMVM)
         trans->hw_rf_id = iwl_read32(trans, CSR_HW_RF_ID);
+ +      if (trans->hw_rf_id == CSR_HW_RF_ID_TYPE_HR) {
+ +              u32 hw_status;
+ +
+ +              hw_status = iwl_read_prph(trans, UMAG_GEN_HW_STATUS);
+ +              if (hw_status & UMAG_GEN_HW_IS_FPGA)
+ +                      trans->cfg = &iwla000_2ax_cfg_qnj_hr_f0;
+ +              else
+ +                      trans->cfg = &iwla000_2ac_cfg_hr;
+ +      }
+ +#endif
   
         iwl_pcie_set_interrupt_capa(pdev, trans);
         trans->hw_id = (pdev->device << 16) + pdev->subsystem_device;
@@@ -3181,6 -3174,10 +3186,10 @@@
                 trans_pcie->inta_mask = CSR_INI_SET_MASK;
          }
   
+       trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator",
+                                                  WQ_HIGHPRI | WQ_UNBOUND, 1);
+       INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work);
+ 
   #ifdef CONFIG_IWLWIFI_PCIE_RTPM
         trans->runtime_pm_mode = IWL_PLAT_PM_MODE_D0I3;
   #else
diff --combined include/linux/mlx5/driver.h

index d5b6f6a9fcc508bab79c642239e103f4ef0502eb,205d82d4c468717ac26050358acb65a968481097..023b29d973e6f16310e0a2d6e909d40f208cc228
--- 1/include/linux/mlx5/driver.h
--- 2/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@@ -550,7 -550,6 +550,7 @@@ struct mlx5_fc_stats 
         unsigned long sampling_interval; /* jiffies */
   };
   
+ +struct mlx5_mpfs;
   struct mlx5_eswitch;
   struct mlx5_lag;
   struct mlx5_pagefault;
@@@ -647,11 -646,7 +647,11 @@@ struct mlx5_priv 
         struct list_head        ctx_list;
         spinlock_t              ctx_lock;
   
+ +      struct list_head        waiting_events_list;
+ +      bool                    is_accum_events;
+ +
         struct mlx5_flow_steering *steering;
+ +      struct mlx5_mpfs        *mpfs;
         struct mlx5_eswitch     *eswitch;
         struct mlx5_core_sriov  sriov;
         struct mlx5_lag         *lag;
@@@ -678,9 -673,7 +678,7 @@@ enum mlx5_device_state 
   };
   
   enum mlx5_interface_state {
-       MLX5_INTERFACE_STATE_DOWN = BIT(0),
-       MLX5_INTERFACE_STATE_UP = BIT(1),
-       MLX5_INTERFACE_STATE_SHUTDOWN = BIT(2),
+       MLX5_INTERFACE_STATE_UP = BIT(0),
   };
   
   enum mlx5_pci_status {
@@@ -890,6 -883,8 +888,6 @@@ static inline void *mlx5_buf_offset(str
                 return buf->direct.buf + offset;
   }
   
- -extern struct workqueue_struct *mlx5_core_wq;
- -
   #define STRUCT_FIELD(header, field) \
         .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field),      \
         .struct_size_bytes   = sizeof((struct ib_unpacked_ ## header *)0)->field
diff --combined include/linux/netdevice.h

index 35de8312e0b5a53b2851905a158995611d1709e2,c99ba7914c0a41d6d829e6018df16f4a229412b5..8aba119bb005be3e8bb07a07cdd7ca69129ee12f
--- 1/include/linux/netdevice.h
--- 2/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@@ -35,6 -35,7 +35,6 @@@
   
   #include <linux/percpu.h>
   #include <linux/rculist.h>
- -#include <linux/dmaengine.h>
   #include <linux/workqueue.h>
   #include <linux/dynamic_queue_limits.h>
   
@@@ -65,7 -66,6 +65,7 @@@ struct mpls_dev
   /* UDP Tunnel offloads */
   struct udp_tunnel_info;
   struct bpf_prog;
+ +struct xdp_buff;
   
   void netdev_set_default_ethtool_ops(struct net_device *dev,
                                     const struct ethtool_ops *ops);
@@@ -693,9 -693,10 +693,9 @@@ struct netdev_rx_queue 
    */
   struct rx_queue_attribute {
         struct attribute attr;
- -      ssize_t (*show)(struct netdev_rx_queue *queue,
- -          struct rx_queue_attribute *attr, char *buf);
+ +      ssize_t (*show)(struct netdev_rx_queue *queue, char *buf);
         ssize_t (*store)(struct netdev_rx_queue *queue,
- -          struct rx_queue_attribute *attr, const char *buf, size_t len);
+ +                       const char *buf, size_t len);
   };
   
   #ifdef CONFIG_XPS
@@@ -769,14 -770,31 +769,14 @@@ static inline bool netdev_phys_item_id_
   typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
                                        struct sk_buff *skb);
   
- -/* These structures hold the attributes of qdisc and classifiers
- - * that are being passed to the netdevice through the setup_tc op.
- - */
- -enum {
+ +enum tc_setup_type {
         TC_SETUP_MQPRIO,
         TC_SETUP_CLSU32,
         TC_SETUP_CLSFLOWER,
- -      TC_SETUP_MATCHALL,
+ +      TC_SETUP_CLSMATCHALL,
         TC_SETUP_CLSBPF,
   };
   
- -struct tc_cls_u32_offload;
- -
- -struct tc_to_netdev {
- -      unsigned int type;
- -      union {
- -              struct tc_cls_u32_offload *cls_u32;
- -              struct tc_cls_flower_offload *cls_flower;
- -              struct tc_cls_matchall_offload *cls_mall;
- -              struct tc_cls_bpf_offload *cls_bpf;
- -              struct tc_mqprio_qopt *mqprio;
- -      };
- -      bool egress_dev;
- -};
- -
   /* These structures hold the attributes of xdp state that are being passed
    * to the netdevice through the xdp op.
    */
@@@ -959,8 -977,8 +959,8 @@@ struct xfrmdev_ops 
    *      with PF and querying it may introduce a theoretical security risk.
    * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
    * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
- - * int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u32 chain_index,
- - *                   __be16 protocol, struct tc_to_netdev *tc);
+ + * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type,
+ + *                   void *type_data);
    *    Called to setup any 'tc' scheduler, classifier or action on @dev.
    *    This is always called from the stack with the rtnl lock held and netif
    *    tx queues stopped. This allows the netdevice to perform queue
@@@ -1120,12 -1138,7 +1120,12 @@@
    * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp);
    *    This function is used to set or query state related to XDP on the
    *    netdevice. See definition of enum xdp_netdev_command for details.
- - *
+ + * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp);
+ + *    This function is used to submit a XDP packet for transmit on a
+ + *    netdevice.
+ + * void (*ndo_xdp_flush)(struct net_device *dev);
+ + *    This function is used to inform the driver to flush a particular
+ + *    xdp tx queue. Must be called on same CPU as xdp_xmit.
    */
   struct net_device_ops {
         int                     (*ndo_init)(struct net_device *dev);
@@@ -1208,8 -1221,9 +1208,8 @@@
                                                    struct net_device *dev,
                                                    int vf, bool setting);
         int                     (*ndo_setup_tc)(struct net_device *dev,
- -                                              u32 handle, u32 chain_index,
- -                                              __be16 protocol,
- -                                              struct tc_to_netdev *tc);
+ +                                              enum tc_setup_type type,
+ +                                              void *type_data);
   #if IS_ENABLED(CONFIG_FCOE)
         int                     (*ndo_fcoe_enable)(struct net_device *dev);
         int                     (*ndo_fcoe_disable)(struct net_device *dev);
@@@ -1309,9 -1323,6 +1309,9 @@@
                                                        int needed_headroom);
         int                     (*ndo_xdp)(struct net_device *dev,
                                            struct netdev_xdp *xdp);
+ +      int                     (*ndo_xdp_xmit)(struct net_device *dev,
+ +                                              struct xdp_buff *xdp);
+ +      void                    (*ndo_xdp_flush)(struct net_device *dev);
   };
   
   /**
@@@ -2297,7 -2308,6 +2297,7 @@@ struct netdev_lag_lower_state_info 
   #define NETDEV_PRECHANGEUPPER 0x001A
   #define NETDEV_CHANGELOWERSTATE       0x001B
   #define NETDEV_UDP_TUNNEL_PUSH_INFO   0x001C
+ +#define NETDEV_UDP_TUNNEL_DROP_INFO   0x001D
   #define NETDEV_CHANGE_TX_QUEUE_LEN    0x001E
   
   int register_netdevice_notifier(struct notifier_block *nb);
@@@ -2413,8 -2423,8 +2413,8 @@@ struct net_device *dev_get_by_name_rcu(
   struct net_device *__dev_get_by_name(struct net *net, const char *name);
   int dev_alloc_name(struct net_device *dev, const char *name);
   int dev_open(struct net_device *dev);
- -int dev_close(struct net_device *dev);
- -int dev_close_many(struct list_head *head, bool unlink);
+ +void dev_close(struct net_device *dev);
+ +void dev_close_many(struct list_head *head, bool unlink);
   void dev_disable_lro(struct net_device *dev);
   int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
   int dev_queue_xmit(struct sk_buff *skb);
@@@ -3241,8 -3251,6 +3241,8 @@@ static inline void dev_consume_skb_any(
         __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED);
   }
   
+ +void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
+ +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
   int netif_rx(struct sk_buff *skb);
   int netif_rx_ni(struct sk_buff *skb);
   int netif_receive_skb(struct sk_buff *skb);
@@@ -3858,6 -3866,8 +3858,8 @@@ int netdev_walk_all_upper_dev_rcu(struc
   bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
                                   struct net_device *upper_dev);
   
+ bool netdev_has_any_upper_dev(struct net_device *dev);
+ 
   void *netdev_lower_get_next_private(struct net_device *dev,
                                     struct list_head **iter);
   void *netdev_lower_get_next_private_rcu(struct net_device *dev,
@@@ -4011,22 -4021,22 +4013,22 @@@ static inline netdev_tx_t netdev_start_
         return rc;
   }
   
- -int netdev_class_create_file_ns(struct class_attribute *class_attr,
+ +int netdev_class_create_file_ns(const struct class_attribute *class_attr,
                                 const void *ns);
- -void netdev_class_remove_file_ns(struct class_attribute *class_attr,
+ +void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
                                  const void *ns);
   
- -static inline int netdev_class_create_file(struct class_attribute *class_attr)
+ +static inline int netdev_class_create_file(const struct class_attribute *class_attr)
   {
         return netdev_class_create_file_ns(class_attr, NULL);
   }
   
- -static inline void netdev_class_remove_file(struct class_attribute *class_attr)
+ +static inline void netdev_class_remove_file(const struct class_attribute *class_attr)
   {
         netdev_class_remove_file_ns(class_attr, NULL);
   }
   
- -extern struct kobj_ns_type_operations net_ns_type_operations;
+ +extern const struct kobj_ns_type_operations net_ns_type_operations;
   
   const char *netdev_drivername(const struct net_device *dev);
   
@@@ -4081,6 -4091,7 +4083,6 @@@ static inline bool net_gso_ok(netdev_fe
   
         /* check flags correspondence */
         BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
- -      BUILD_BUG_ON(SKB_GSO_UDP     != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT));
         BUILD_BUG_ON(SKB_GSO_DODGY   != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
         BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
         BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT));
diff --combined include/linux/skbuff.h

index 7594e19bce622a38dc39c054093c3da15b99b67b,d67a8182e5eb2177d978ca8a5effeaf6bd579394..f93cc01064cb720d4ddf76c363044d1a6c62b68b
--- 1/include/linux/skbuff.h
--- 2/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -345,42 -345,6 +345,42 @@@ static inline void skb_frag_size_sub(sk
         frag->size -= delta;
   }
   
+ +static inline bool skb_frag_must_loop(struct page *p)
+ +{
+ +#if defined(CONFIG_HIGHMEM)
+ +      if (PageHighMem(p))
+ +              return true;
+ +#endif
+ +      return false;
+ +}
+ +
+ +/**
+ + *    skb_frag_foreach_page - loop over pages in a fragment
+ + *
+ + *    @f:             skb frag to operate on
+ + *    @f_off:         offset from start of f->page.p
+ + *    @f_len:         length from f_off to loop over
+ + *    @p:             (temp var) current page
+ + *    @p_off:         (temp var) offset from start of current page,
+ + *                               non-zero only on first page.
+ + *    @p_len:         (temp var) length in current page,
+ + *                               < PAGE_SIZE only on first and last page.
+ + *    @copied:        (temp var) length so far, excluding current p_len.
+ + *
+ + *    A fragment can hold a compound page, in which case per-page
+ + *    operations, notably kmap_atomic, must be called for each
+ + *    regular page.
+ + */
+ +#define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied)       \
+ +      for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT),            \
+ +           p_off = (f_off) & (PAGE_SIZE - 1),                         \
+ +           p_len = skb_frag_must_loop(p) ?                            \
+ +           min_t(u32, f_len, PAGE_SIZE - p_off) : f_len,              \
+ +           copied = 0;                                                \
+ +           copied < f_len;                                            \
+ +           copied += p_len, p++, p_off = 0,                           \
+ +           p_len = min_t(u32, f_len - copied, PAGE_SIZE))             \
+ +
   #define HAVE_HW_TIME_STAMP
   
   /**
@@@ -429,7 -393,6 +429,7 @@@ enum 
         SKBTX_SCHED_TSTAMP = 1 << 6,
   };
   
+ +#define SKBTX_ZEROCOPY_FRAG   (SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG)
   #define SKBTX_ANY_SW_TSTAMP   (SKBTX_SW_TSTAMP    | \
                                  SKBTX_SCHED_TSTAMP)
   #define SKBTX_ANY_TSTAMP      (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
@@@ -444,46 -407,10 +444,46 @@@
    */
   struct ubuf_info {
         void (*callback)(struct ubuf_info *, bool zerocopy_success);
- -      void *ctx;
- -      unsigned long desc;
+ +      union {
+ +              struct {
+ +                      unsigned long desc;
+ +                      void *ctx;
+ +              };
+ +              struct {
+ +                      u32 id;
+ +                      u16 len;
+ +                      u16 zerocopy:1;
+ +                      u32 bytelen;
+ +              };
+ +      };
+ +      atomic_t refcnt;
+ +
+ +      struct mmpin {
+ +              struct user_struct *user;
+ +              unsigned int num_pg;
+ +      } mmp;
   };
   
+ +#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+ +
+ +struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
+ +struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
+ +                                      struct ubuf_info *uarg);
+ +
+ +static inline void sock_zerocopy_get(struct ubuf_info *uarg)
+ +{
+ +      atomic_inc(&uarg->refcnt);
+ +}
+ +
+ +void sock_zerocopy_put(struct ubuf_info *uarg);
+ +void sock_zerocopy_put_abort(struct ubuf_info *uarg);
+ +
+ +void sock_zerocopy_callback(struct ubuf_info *uarg, bool success);
+ +
+ +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+ +                           struct msghdr *msg, int len,
+ +                           struct ubuf_info *uarg);
+ +
   /* This data is invariant across clones and lives at
    * the end of the header data, ie. at skb->end.
    */
@@@ -536,38 -463,39 +536,38 @@@ enum 
   
   enum {
         SKB_GSO_TCPV4 = 1 << 0,
- -      SKB_GSO_UDP = 1 << 1,
   
         /* This indicates the skb is from an untrusted source. */
- -      SKB_GSO_DODGY = 1 << 2,
+ +      SKB_GSO_DODGY = 1 << 1,
   
         /* This indicates the tcp segment has CWR set. */
- -      SKB_GSO_TCP_ECN = 1 << 3,
+ +      SKB_GSO_TCP_ECN = 1 << 2,
   
- -      SKB_GSO_TCP_FIXEDID = 1 << 4,
+ +      SKB_GSO_TCP_FIXEDID = 1 << 3,
   
- -      SKB_GSO_TCPV6 = 1 << 5,
+ +      SKB_GSO_TCPV6 = 1 << 4,
   
- -      SKB_GSO_FCOE = 1 << 6,
+ +      SKB_GSO_FCOE = 1 << 5,
   
- -      SKB_GSO_GRE = 1 << 7,
+ +      SKB_GSO_GRE = 1 << 6,
   
- -      SKB_GSO_GRE_CSUM = 1 << 8,
+ +      SKB_GSO_GRE_CSUM = 1 << 7,
   
- -      SKB_GSO_IPXIP4 = 1 << 9,
+ +      SKB_GSO_IPXIP4 = 1 << 8,
   
- -      SKB_GSO_IPXIP6 = 1 << 10,
+ +      SKB_GSO_IPXIP6 = 1 << 9,
   
- -      SKB_GSO_UDP_TUNNEL = 1 << 11,
+ +      SKB_GSO_UDP_TUNNEL = 1 << 10,
   
- -      SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12,
+ +      SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
   
- -      SKB_GSO_PARTIAL = 1 << 13,
+ +      SKB_GSO_PARTIAL = 1 << 12,
   
- -      SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
+ +      SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
   
- -      SKB_GSO_SCTP = 1 << 15,
+ +      SKB_GSO_SCTP = 1 << 14,
   
- -      SKB_GSO_ESP = 1 << 16,
+ +      SKB_GSO_ESP = 1 << 15,
   };
   
   #if BITS_PER_LONG > 32
@@@ -1017,6 -945,12 +1017,6 @@@ static inline struct sk_buff *alloc_skb
         return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
   }
   
- -struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
- -static inline struct sk_buff *alloc_skb_head(gfp_t priority)
- -{
- -      return __alloc_skb_head(priority, -1);
- -}
- -
   struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
   int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
   struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
@@@ -1039,7 -973,23 +1039,23 @@@ int __must_check skb_to_sgvec_nomark(st
   int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg,
                               int offset, int len);
   int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
- int skb_pad(struct sk_buff *skb, int pad);
+ int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error);
+ 
+ /**
+  *    skb_pad                 -       zero pad the tail of an skb
+  *    @skb: buffer to pad
+  *    @pad: space to pad
+  *
+  *    Ensure that a buffer is followed by a padding area that is zero
+  *    filled. Used by network drivers which may DMA or transfer data
+  *    beyond the buffer end onto the wire.
+  *
+  *    May return error in out of memory cases. The skb is freed on error.
+  */
+ static inline int skb_pad(struct sk_buff *skb, int pad)
+ {
+       return __skb_pad(skb, pad, true);
+ }
   #define dev_kfree_skb(a)      consume_skb(a)
   
   int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
@@@ -1195,6 -1145,8 +1211,6 @@@ static inline __u32 skb_get_hash(struc
         return skb->hash;
   }
   
- -__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6);
- -
   static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
   {
         if (!skb->l4_hash && !skb->sw_hash) {
@@@ -1207,6 -1159,20 +1223,6 @@@
         return skb->hash;
   }
   
- -__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl);
- -
- -static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
- -{
- -      if (!skb->l4_hash && !skb->sw_hash) {
- -              struct flow_keys keys;
- -              __u32 hash = __get_hash_from_flowi4(fl4, &keys);
- -
- -              __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
- -      }
- -
- -      return skb->hash;
- -}
- -
   __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb);
   
   static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
@@@ -1251,50 -1217,6 +1267,50 @@@ static inline struct skb_shared_hwtstam
         return &skb_shinfo(skb)->hwtstamps;
   }
   
+ +static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
+ +{
+ +      bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY;
+ +
+ +      return is_zcopy ? skb_uarg(skb) : NULL;
+ +}
+ +
+ +static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
+ +{
+ +      if (skb && uarg && !skb_zcopy(skb)) {
+ +              sock_zerocopy_get(uarg);
+ +              skb_shinfo(skb)->destructor_arg = uarg;
+ +              skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
+ +      }
+ +}
+ +
+ +/* Release a reference on a zerocopy structure */
+ +static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
+ +{
+ +      struct ubuf_info *uarg = skb_zcopy(skb);
+ +
+ +      if (uarg) {
+ +              if (uarg->callback == sock_zerocopy_callback) {
+ +                      uarg->zerocopy = uarg->zerocopy && zerocopy;
+ +                      sock_zerocopy_put(uarg);
+ +              } else {
+ +                      uarg->callback(uarg, zerocopy);
+ +              }
+ +
+ +              skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+ +      }
+ +}
+ +
+ +/* Abort a zerocopy operation and revert zckey on error in send syscall */
+ +static inline void skb_zcopy_abort(struct sk_buff *skb)
+ +{
+ +      struct ubuf_info *uarg = skb_zcopy(skb);
+ +
+ +      if (uarg) {
+ +              sock_zerocopy_put_abort(uarg);
+ +              skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+ +      }
+ +}
+ +
   /**
    *    skb_queue_empty - check if a queue is empty
    *    @list: queue head
@@@ -1877,18 -1799,13 +1893,18 @@@ static inline unsigned int skb_headlen(
         return skb->len - skb->data_len;
   }
   
- -static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+ +static inline unsigned int __skb_pagelen(const struct sk_buff *skb)
   {
         unsigned int i, len = 0;
   
         for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--)
                 len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
- -      return len + skb_headlen(skb);
+ +      return len;
+ +}
+ +
+ +static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+ +{
+ +      return skb_headlen(skb) + __skb_pagelen(skb);
   }
   
   /**
@@@ -2533,17 -2450,7 +2549,17 @@@ static inline void skb_orphan(struct sk
    */
   static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
   {
- -      if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)))
+ +      if (likely(!skb_zcopy(skb)))
+ +              return 0;
+ +      if (skb_uarg(skb)->callback == sock_zerocopy_callback)
+ +              return 0;
+ +      return skb_copy_ubufs(skb, gfp_mask);
+ +}
+ +
+ +/* Frags must be orphaned, even if refcounted, if skb might loop to rx path */
+ +static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask)
+ +{
+ +      if (likely(!skb_zcopy(skb)))
                 return 0;
         return skb_copy_ubufs(skb, gfp_mask);
   }
@@@ -2934,25 -2841,42 +2950,42 @@@ static inline int skb_padto(struct sk_b
    *    skb_put_padto - increase size and pad an skbuff up to a minimal size
    *    @skb: buffer to pad
    *    @len: minimal length
+  *    @free_on_error: free buffer on error
    *
    *    Pads up a buffer to ensure the trailing bytes exist and are
    *    blanked. If the buffer already contains sufficient data it
    *    is untouched. Otherwise it is extended. Returns zero on
-  *    success. The skb is freed on error.
+  *    success. The skb is freed on error if @free_on_error is true.
    */
- static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
+ static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len,
+                                 bool free_on_error)
   {
         unsigned int size = skb->len;
   
         if (unlikely(size < len)) {
                 len -= size;
-               if (skb_pad(skb, len))
+               if (__skb_pad(skb, len, free_on_error))
                         return -ENOMEM;
                 __skb_put(skb, len);
         }
         return 0;
   }
   
+ /**
+  *    skb_put_padto - increase size and pad an skbuff up to a minimal size
+  *    @skb: buffer to pad
+  *    @len: minimal length
+  *
+  *    Pads up a buffer to ensure the trailing bytes exist and are
+  *    blanked. If the buffer already contains sufficient data it
+  *    is untouched. Otherwise it is extended. Returns zero on
+  *    success. The skb is freed on error.
+  */
+ static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
+ {
+       return __skb_put_padto(skb, len, true);
+ }
+ 
   static inline int skb_add_data(struct sk_buff *skb,
                                struct iov_iter *from, int copy)
   {
@@@ -2975,8 -2899,6 +3008,8 @@@
   static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
                                     const struct page *page, int off)
   {
+ +      if (skb_zcopy(skb))
+ +              return false;
         if (i) {
                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
   
@@@ -3231,9 -3153,6 +3264,9 @@@ __wsum skb_copy_and_csum_bits(const str
   int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
                     struct pipe_inode_info *pipe, unsigned int len,
                     unsigned int flags);
+ +int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+ +                       int len);
+ +int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
   void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
   unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
   int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
diff --combined include/net/ip6_fib.h

index 71c1646298ae369e29f266d9e3b079635f22b95e,af509f801084dcf19a27f9d4c82344c5e54fe792..d060d711a6245b63a662c5ecdf839a0d4afe1af8
--- 1/include/net/ip6_fib.h
--- 2/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@@ -16,12 -16,10 +16,12 @@@
   #include <linux/ipv6_route.h>
   #include <linux/rtnetlink.h>
   #include <linux/spinlock.h>
+ +#include <linux/notifier.h>
   #include <net/dst.h>
   #include <net/flow.h>
   #include <net/netlink.h>
   #include <net/inetpeer.h>
+ +#include <net/fib_notifier.h>
   
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
   #define FIB6_TABLE_HASHSZ 256
@@@ -72,6 -70,7 +72,7 @@@ struct fib6_node 
         __u16                   fn_flags;
         int                     fn_sernum;
         struct rt6_info         *rr_ptr;
+       struct rcu_head         rcu;
   };
   
   #ifndef CONFIG_IPV6_SUBTREES
@@@ -106,7 -105,7 +107,7 @@@ struct rt6_info 
          * the same cache line.
          */
         struct fib6_table               *rt6i_table;
-       struct fib6_node                *rt6i_node;
+       struct fib6_node __rcu          *rt6i_node;
   
         struct in6_addr                 rt6i_gateway;
   
@@@ -120,8 -119,6 +121,8 @@@
   
         atomic_t                        rt6i_ref;
   
+ +      unsigned int                    rt6i_nh_flags;
+ +
         /* These are in a separate cache line. */
         struct rt6key                   rt6i_dst ____cacheline_aligned_in_smp;
         u32                             rt6i_flags;
@@@ -171,13 -168,40 +172,40 @@@ static inline void rt6_update_expires(s
         rt0->rt6i_flags |= RTF_EXPIRES;
   }
   
+ /* Function to safely get fn->sernum for passed in rt
+  * and store result in passed in cookie.
+  * Return true if we can get cookie safely
+  * Return false if not
+  */
+ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
+                                      u32 *cookie)
+ {
+       struct fib6_node *fn;
+       bool status = false;
+ 
+       rcu_read_lock();
+       fn = rcu_dereference(rt->rt6i_node);
+ 
+       if (fn) {
+               *cookie = fn->fn_sernum;
+               status = true;
+       }
+ 
+       rcu_read_unlock();
+       return status;
+ }
+ 
   static inline u32 rt6_get_cookie(const struct rt6_info *rt)
   {
+       u32 cookie = 0;
+ 
         if (rt->rt6i_flags & RTF_PCPU ||
             (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
                 rt = (struct rt6_info *)(rt->dst.from);
   
-       return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+       rt6_get_cookie_safe(rt, &cookie);
+ 
+       return cookie;
   }
   
   static inline void ip6_rt_put(struct rt6_info *rt)
@@@ -189,22 -213,6 +217,22 @@@
         dst_release(&rt->dst);
   }
   
+ +void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
+ +
+ +static inline void rt6_hold(struct rt6_info *rt)
+ +{
+ +      atomic_inc(&rt->rt6i_ref);
+ +}
+ +
+ +static inline void rt6_release(struct rt6_info *rt)
+ +{
+ +      if (atomic_dec_and_test(&rt->rt6i_ref)) {
+ +              rt6_free_pcpu(rt);
+ +              dst_dev_put(&rt->dst);
+ +              dst_release(&rt->dst);
+ +      }
+ +}
+ +
   enum fib6_walk_state {
   #ifdef CONFIG_IPV6_SUBTREES
         FWS_S,
@@@ -253,7 -261,6 +281,7 @@@ struct fib6_table 
         struct fib6_node        tb6_root;
         struct inet_peer_base   tb6_peers;
         unsigned int            flags;
+ +      unsigned int            fib_seq;
   #define RT6_TABLE_HAS_DFLT_ROUTER     BIT(0)
   };
   
@@@ -277,11 -284,6 +305,11 @@@ typedef struct rt6_info *(*pol_lookup_t
                                          struct fib6_table *,
                                          struct flowi6 *, int);
   
+ +struct fib6_entry_notifier_info {
+ +      struct fib_notifier_info info; /* must be first */
+ +      struct rt6_info *rt;
+ +};
+ +
   /*
    *    exported functions
    */
@@@ -318,24 -320,9 +346,24 @@@ int fib6_init(void)
   
   int ipv6_route_open(struct inode *inode, struct file *file);
   
+ +int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+ +                     enum fib_event_type event_type,
+ +                     struct fib_notifier_info *info);
+ +int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+ +                      struct fib_notifier_info *info);
+ +
+ +int __net_init fib6_notifier_init(struct net *net);
+ +void __net_exit fib6_notifier_exit(struct net *net);
+ +
+ +unsigned int fib6_tables_seq_read(struct net *net);
+ +int fib6_tables_dump(struct net *net, struct notifier_block *nb);
+ +
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
   int fib6_rules_init(void);
   void fib6_rules_cleanup(void);
+ +bool fib6_rule_default(const struct fib_rule *rule);
+ +int fib6_rules_dump(struct net *net, struct notifier_block *nb);
+ +unsigned int fib6_rules_seq_read(struct net *net);
   #else
   static inline int               fib6_rules_init(void)
   {
@@@ -345,17 -332,5 +373,17 @@@ static inline void              fib6_ru
   {
         return ;
   }
+ +static inline bool fib6_rule_default(const struct fib_rule *rule)
+ +{
+ +      return true;
+ +}
+ +static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+ +{
+ +      return 0;
+ +}
+ +static inline unsigned int fib6_rules_seq_read(struct net *net)
+ +{
+ +      return 0;
+ +}
   #endif
   #endif
diff --combined include/net/sch_generic.h

index d6247a3c40df1209bdf50aacdca5d260c6962e4c,c1109cdbbfa6afb9aff0d6033aef7b615630ffc1..135f5a2dd93122dd905557028068a31aeea37cb0
--- 1/include/net/sch_generic.h
--- 2/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@@ -75,6 -75,7 +75,6 @@@ struct Qdisc 
         struct hlist_node       hash;
         u32                     handle;
         u32                     parent;
- -      void                    *u32_node;
   
         struct netdev_queue     *dev_queue;
   
@@@ -100,6 -101,13 +100,13 @@@
         spinlock_t              busylock ____cacheline_aligned_in_smp;
   };
   
+ static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
+ {
+       if (qdisc->flags & TCQ_F_BUILTIN)
+               return;
+       refcount_inc(&qdisc->refcnt);
+ }
+ 
   static inline bool qdisc_is_running(const struct Qdisc *qdisc)
   {
         return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
@@@ -146,7 -154,8 +153,7 @@@ struct Qdisc_class_ops 
         void                    (*qlen_notify)(struct Qdisc *, unsigned long);
   
         /* Class manipulation routines */
- -      unsigned long           (*get)(struct Qdisc *, u32 classid);
- -      void                    (*put)(struct Qdisc *, unsigned long);
+ +      unsigned long           (*find)(struct Qdisc *, u32 classid);
         int                     (*change)(struct Qdisc *, u32, u32,
                                         struct nlattr **, unsigned long *);
         int                     (*delete)(struct Qdisc *, unsigned long);
@@@ -154,6 -163,7 +161,6 @@@
   
         /* Filter manipulation */
         struct tcf_block *      (*tcf_block)(struct Qdisc *, unsigned long);
- -      bool                    (*tcf_cl_offload)(u32 classid);
         unsigned long           (*bind_tcf)(struct Qdisc *, unsigned long,
                                         u32 classid);
         void                    (*unbind_tcf)(struct Qdisc *, unsigned long);
@@@ -210,17 -220,16 +217,17 @@@ struct tcf_proto_ops 
         int                     (*init)(struct tcf_proto*);
         void                    (*destroy)(struct tcf_proto*);
   
- -      unsigned long           (*get)(struct tcf_proto*, u32 handle);
+ +      void*                   (*get)(struct tcf_proto*, u32 handle);
         int                     (*change)(struct net *net, struct sk_buff *,
                                         struct tcf_proto*, unsigned long,
                                         u32 handle, struct nlattr **,
- -                                      unsigned long *, bool);
- -      int                     (*delete)(struct tcf_proto*, unsigned long, bool*);
+ +                                      void **, bool);
+ +      int                     (*delete)(struct tcf_proto*, void *, bool*);
         void                    (*walk)(struct tcf_proto*, struct tcf_walker *arg);
+ +      void                    (*bind_class)(void *, u32, unsigned long);
   
         /* rtnetlink specific */
- -      int                     (*dump)(struct net*, struct tcf_proto*, unsigned long,
+ +      int                     (*dump)(struct net*, struct tcf_proto*, void *,
                                         struct sk_buff *skb, struct tcmsg*);
   
         struct module           *owner;
@@@ -392,9 -401,6 +399,9 @@@ qdisc_class_find(const struct Qdisc_cla
         struct Qdisc_class_common *cl;
         unsigned int h;
   
+ +      if (!id)
+ +              return NULL;
+ +
         h = qdisc_class_hash(id, hash->hashmask);
         hlist_for_each_entry(cl, &hash->hash[h], hnode) {
                 if (cl->classid == id)
diff --combined include/net/tcp.h

index 9c3db054e47f1a27fe3ff4a4a081674f424a7a93,f642a39f9eeeeb3a1bbff48fd467c3a3acb96160..b510f284427aabc1f508d24d29d0f812e5e0aa61
--- 1/include/net/tcp.h
--- 2/include/net/tcp.h
+++ b/include/net/tcp.h
@@@ -139,7 -139,6 +139,7 @@@ void tcp_time_wait(struct sock *sk, in
   #endif
   #define TCP_RTO_MAX   ((unsigned)(120*HZ))
   #define TCP_RTO_MIN   ((unsigned)(HZ/5))
+ +#define TCP_TIMEOUT_MIN       (2U) /* Min timeout for TCP timers in jiffies */
   #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))   /* RFC6298 2.1 initial RTO value        */
   #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))       /* RFC 1122 initial RTO value, now
                                                  * used as a fallback RTO for the
@@@ -151,6 -150,8 +151,6 @@@
   #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
                                                          * for local resources.
                                                          */
- -#define TCP_REO_TIMEOUT_MIN   (2000) /* Min RACK reordering timeout in usec */
- -
   #define TCP_KEEPALIVE_TIME    (120*60*HZ)     /* two hours */
   #define TCP_KEEPALIVE_PROBES  9               /* Max of 9 keepalive probes    */
   #define TCP_KEEPALIVE_INTVL   (75*HZ)
@@@ -256,6 -257,7 +256,6 @@@ extern int sysctl_tcp_rmem[3]
   extern int sysctl_tcp_app_win;
   extern int sysctl_tcp_adv_win_scale;
   extern int sysctl_tcp_frto;
- -extern int sysctl_tcp_low_latency;
   extern int sysctl_tcp_nometrics_save;
   extern int sysctl_tcp_moderate_rcvbuf;
   extern int sysctl_tcp_tso_win_divisor;
@@@ -350,11 -352,8 +350,11 @@@ int tcp_v4_rcv(struct sk_buff *skb)
   
   int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
   int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+ +int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
   int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
                  int flags);
+ +int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+ +                      size_t size, int flags);
   ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
                  size_t size, int flags);
   void tcp_release_cb(struct sock *sk);
@@@ -364,7 -363,7 +364,7 @@@ void tcp_delack_timer_handler(struct so
   int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
   int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
   void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- -                       const struct tcphdr *th, unsigned int len);
+ +                       const struct tcphdr *th);
   void tcp_rcv_space_adjust(struct sock *sk);
   int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
   void tcp_twsk_destructor(struct sock *sk);
@@@ -797,12 -796,6 +797,12 @@@ struct tcp_skb_cb 
                         u16     tcp_gso_segs;
                         u16     tcp_gso_size;
                 };
+ +
+ +              /* Used to stash the receive timestamp while this skb is in the
+ +               * out of order queue, as skb->tstamp is overwritten by the
+ +               * rbnode.
+ +               */
+ +              ktime_t         swtstamp;
         };
         __u8            tcp_flags;      /* TCP header flags. (tcp[13])  */
   
@@@ -819,8 -812,7 +819,8 @@@
         __u8            ip_dsfield;     /* IPv4 tos or IPv6 dsfield     */
         __u8            txstamp_ack:1,  /* Record TX timestamp for ack? */
                         eor:1,          /* Is skb MSG_EOR marked? */
- -                      unused:6;
+ +                      has_rxtstamp:1, /* SKB has a RX timestamp       */
+ +                      unused:5;
         __u32           ack_seq;        /* Sequence number ACK'd        */
         union {
                 struct {
@@@ -857,16 -849,6 +857,16 @@@ static inline int tcp_v6_iif(const stru
   
         return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
   }
+ +
+ +/* TCP_SKB_CB reference means this can not be used from early demux */
+ +static inline int tcp_v6_sdif(const struct sk_buff *skb)
+ +{
+ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ +      if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags))
+ +              return TCP_SKB_CB(skb)->header.h6.iif;
+ +#endif
+ +      return 0;
+ +}
   #endif
   
   /* TCP_SKB_CB reference means this can not be used from early demux */
@@@ -880,16 -862,6 +880,16 @@@ static inline bool inet_exact_dif_match
         return false;
   }
   
+ +/* TCP_SKB_CB reference means this can not be used from early demux */
+ +static inline int tcp_v4_sdif(struct sk_buff *skb)
+ +{
+ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ +      if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+ +              return TCP_SKB_CB(skb)->header.h4.iif;
+ +#endif
+ +      return 0;
+ +}
+ +
   /* Due to TSO, an SKB can be composed of multiple actual
    * packets.  To keep these tracked properly, we use this.
    */
@@@ -1032,9 -1004,7 +1032,7 @@@ void tcp_get_default_congestion_control
   void tcp_get_available_congestion_control(char *buf, size_t len);
   void tcp_get_allowed_congestion_control(char *buf, size_t len);
   int tcp_set_allowed_congestion_control(char *allowed);
- int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
- void tcp_reinit_congestion_control(struct sock *sk,
-                                  const struct tcp_congestion_ops *ca);
+ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit);
   u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
   void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
   
@@@ -1273,6 -1243,17 +1271,6 @@@ static inline bool tcp_checksum_complet
                 __tcp_checksum_complete(skb);
   }
   
- -/* Prequeue for VJ style copy to user, combined with checksumming. */
- -
- -static inline void tcp_prequeue_init(struct tcp_sock *tp)
- -{
- -      tp->ucopy.task = NULL;
- -      tp->ucopy.len = 0;
- -      tp->ucopy.memory = 0;
- -      skb_queue_head_init(&tp->ucopy.prequeue);
- -}
- -
- -bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
   bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
   int tcp_filter(struct sock *sk, struct sk_buff *skb);
   
@@@ -1564,7 -1545,8 +1562,7 @@@ int tcp_fastopen_reset_cipher(void *key
   void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
   struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
                               struct request_sock *req,
- -                            struct tcp_fastopen_cookie *foc,
- -                            struct dst_entry *dst);
+ +                            struct tcp_fastopen_cookie *foc);
   void tcp_fastopen_init_key_once(bool publish);
   bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
                              struct tcp_fastopen_cookie *cookie);
@@@ -1945,8 -1927,7 +1943,8 @@@ static inline s64 tcp_rto_delta_us(cons
   /*
    * Save and compile IPv4 options, return a pointer to it
    */
- -static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
+ +static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net,
+ +                                                       struct sk_buff *skb)
   {
         const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
         struct ip_options_rcu *dopt = NULL;
@@@ -1955,7 -1936,7 +1953,7 @@@
                 int opt_size = sizeof(*dopt) + opt->optlen;
   
                 dopt = kmalloc(opt_size, GFP_ATOMIC);
- -              if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
+ +              if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
                         kfree(dopt);
                         dopt = NULL;
                 }
diff --combined include/net/udp.h

index 4e5f23fec35e6d4eb165872cc9ebec7dc31bc6d3,626c2d8a70c59f51fb5b2558433d222b56610246..12dfbfe2e2d7853427e244f9d6e2e39ca19bd41e
--- 1/include/net/udp.h
--- 2/include/net/udp.h
+++ b/include/net/udp.h
@@@ -260,7 -260,7 +260,7 @@@ static inline struct sk_buff *skb_recv_
   }
   
   void udp_v4_early_demux(struct sk_buff *skb);
- void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
+ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
   int udp_get_port(struct sock *sk, unsigned short snum,
                  int (*saddr_cmp)(const struct sock *,
                                   const struct sock *));
@@@ -287,7 -287,7 +287,7 @@@ int udp_lib_setsockopt(struct sock *sk
   struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
                              __be32 daddr, __be16 dport, int dif);
   struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
- -                             __be32 daddr, __be16 dport, int dif,
+ +                             __be32 daddr, __be16 dport, int dif, int sdif,
                                struct udp_table *tbl, struct sk_buff *skb);
   struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
                                  __be16 sport, __be16 dport);
@@@ -298,7 -298,7 +298,7 @@@ struct sock *udp6_lib_lookup(struct ne
   struct sock *__udp6_lib_lookup(struct net *net,
                                const struct in6_addr *saddr, __be16 sport,
                                const struct in6_addr *daddr, __be16 dport,
- -                             int dif, struct udp_table *tbl,
+ +                             int dif, int sdif, struct udp_table *tbl,
                                struct sk_buff *skb);
   struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
                                  __be16 sport, __be16 dport);
diff --combined kernel/events/core.c

index 8c01572709aca5c4144d30d2226e1deeab9e3e0e,3504125871d2f058fa717638e785b9c85220213a..36f98198877c71b3ba9a86a6296623a1467f6b89
--- 1/kernel/events/core.c
--- 2/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -8081,7 -8081,7 +8081,7 @@@ static void perf_event_free_bpf_handler
   
   static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
   {
- -      bool is_kprobe, is_tracepoint;
+ +      bool is_kprobe, is_tracepoint, is_syscall_tp;
         struct bpf_prog *prog;
   
         if (event->attr.type != PERF_TYPE_TRACEPOINT)
@@@ -8092,8 -8092,7 +8092,8 @@@
   
         is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
         is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
- -      if (!is_kprobe && !is_tracepoint)
+ +      is_syscall_tp = is_syscall_trace_event(event->tp_event);
+ +      if (!is_kprobe && !is_tracepoint && !is_syscall_tp)
                 /* bpf programs can only be attached to u/kprobe or tracepoint */
                 return -EINVAL;
   
@@@ -8102,14 -8101,13 +8102,14 @@@
                 return PTR_ERR(prog);
   
         if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
- -          (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
+ +          (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
+ +          (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
                 /* valid fd, but invalid bpf program type */
                 bpf_prog_put(prog);
                 return -EINVAL;
         }
   
- -      if (is_tracepoint) {
+ +      if (is_tracepoint || is_syscall_tp) {
                 int off = trace_event_get_offsets(event->tp_event);
   
                 if (prog->aux->max_ctx_offset > off) {
@@@ -10034,28 -10032,27 +10034,27 @@@ SYSCALL_DEFINE5(perf_event_open
                         goto err_context;
   
                 /*
-                * Do not allow to attach to a group in a different
-                * task or CPU context:
+                * Make sure we're both events for the same CPU;
+                * grouping events for different CPUs is broken; since
+                * you can never concurrently schedule them anyhow.
                  */
-               if (move_group) {
-                       /*
-                        * Make sure we're both on the same task, or both
-                        * per-cpu events.
-                        */
-                       if (group_leader->ctx->task != ctx->task)
-                               goto err_context;
+               if (group_leader->cpu != event->cpu)
+                       goto err_context;
   
-                       /*
-                        * Make sure we're both events for the same CPU;
-                        * grouping events for different CPUs is broken; since
-                        * you can never concurrently schedule them anyhow.
-                        */
-                       if (group_leader->cpu != event->cpu)
-                               goto err_context;
-               } else {
-                       if (group_leader->ctx != ctx)
-                               goto err_context;
-               }
+               /*
+                * Make sure we're both on the same task, or both
+                * per-CPU events.
+                */
+               if (group_leader->ctx->task != ctx->task)
+                       goto err_context;
+ 
+               /*
+                * Do not allow to attach to a group in a different task
+                * or CPU context. If we're moving SW events, we'll fix
+                * this up later, so allow that.
+                */
+               if (!move_group && group_leader->ctx != ctx)
+                       goto err_context;
   
                 /*
                  * Only a group leader can be exclusive or pinned
diff --combined net/core/datagram.c

index a4d5f10d83a1ca6cf9bb1e8dc6d6faeae5947e4d,8c2f4489ff8f18680543b6adcad7604036458d5c..f7fb7e3f2acf33e42e1140372d87dc543a2f2c7c
--- 1/net/core/datagram.c
--- 2/net/core/datagram.c
+++ b/net/core/datagram.c
@@@ -362,7 -362,7 +362,7 @@@ int __sk_queue_drop_skb(struct sock *sk
         if (flags & MSG_PEEK) {
                 err = -ENOENT;
                 spin_lock_bh(&sk_queue->lock);
-               if (skb == skb_peek(sk_queue)) {
+               if (skb->next) {
                         __skb_unlink(skb, sk_queue);
                         refcount_dec(&skb->users);
                         if (destructor)
@@@ -579,12 -579,27 +579,12 @@@ fault
   }
   EXPORT_SYMBOL(skb_copy_datagram_from_iter);
   
- -/**
- - *    zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
- - *    @skb: buffer to copy
- - *    @from: the source to copy from
- - *
- - *    The function will first copy up to headlen, and then pin the userspace
- - *    pages and build frags through them.
- - *
- - *    Returns 0, -EFAULT or -EMSGSIZE.
- - */
- -int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+ +int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+ +                          struct iov_iter *from, size_t length)
   {
- -      int len = iov_iter_count(from);
- -      int copy = min_t(int, skb_headlen(skb), len);
- -      int frag = 0;
+ +      int frag = skb_shinfo(skb)->nr_frags;
   
- -      /* copy up to skb headlen */
- -      if (skb_copy_datagram_from_iter(skb, 0, from, copy))
- -              return -EFAULT;
- -
- -      while (iov_iter_count(from)) {
+ +      while (length && iov_iter_count(from)) {
                 struct page *pages[MAX_SKB_FRAGS];
                 size_t start;
                 ssize_t copied;
@@@ -594,24 -609,18 +594,24 @@@
                 if (frag == MAX_SKB_FRAGS)
                         return -EMSGSIZE;
   
- -              copied = iov_iter_get_pages(from, pages, ~0U,
+ +              copied = iov_iter_get_pages(from, pages, length,
                                             MAX_SKB_FRAGS - frag, &start);
                 if (copied < 0)
                         return -EFAULT;
   
                 iov_iter_advance(from, copied);
+ +              length -= copied;
   
                 truesize = PAGE_ALIGN(copied + start);
                 skb->data_len += copied;
                 skb->len += copied;
                 skb->truesize += truesize;
- -              refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+ +              if (sk && sk->sk_type == SOCK_STREAM) {
+ +                      sk->sk_wmem_queued += truesize;
+ +                      sk_mem_charge(sk, truesize);
+ +              } else {
+ +                      refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+ +              }
                 while (copied) {
                         int size = min_t(int, copied, PAGE_SIZE - start);
                         skb_fill_page_desc(skb, frag++, pages[n], start, size);
@@@ -622,28 -631,6 +622,28 @@@
         }
         return 0;
   }
+ +EXPORT_SYMBOL(__zerocopy_sg_from_iter);
+ +
+ +/**
+ + *    zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
+ + *    @skb: buffer to copy
+ + *    @from: the source to copy from
+ + *
+ + *    The function will first copy up to headlen, and then pin the userspace
+ + *    pages and build frags through them.
+ + *
+ + *    Returns 0, -EFAULT or -EMSGSIZE.
+ + */
+ +int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+ +{
+ +      int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
+ +
+ +      /* copy up to skb headlen */
+ +      if (skb_copy_datagram_from_iter(skb, 0, from, copy))
+ +              return -EFAULT;
+ +
+ +      return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
+ +}
   EXPORT_SYMBOL(zerocopy_sg_from_iter);
   
   static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
diff --combined net/core/dev.c

index 270b547548213438c6cde035205758c08105ba9a,86b4b0a79e7abb6554af07ed81a7b91e2f8762bf..6f845e4fec175f333b568bfc44ed85a6a400ff6f
--- 1/net/core/dev.c
--- 2/net/core/dev.c
+++ b/net/core/dev.c
@@@ -144,7 -144,6 +144,7 @@@
   #include <linux/netfilter_ingress.h>
   #include <linux/crash_dump.h>
   #include <linux/sctp.h>
+ +#include <net/udp_tunnel.h>
   
   #include "net-sysfs.h"
   
@@@ -1414,7 -1413,7 +1414,7 @@@ int dev_open(struct net_device *dev
   }
   EXPORT_SYMBOL(dev_open);
   
- -static int __dev_close_many(struct list_head *head)
+ +static void __dev_close_many(struct list_head *head)
   {
         struct net_device *dev;
   
@@@ -1456,18 -1455,23 +1456,18 @@@
                 dev->flags &= ~IFF_UP;
                 netpoll_poll_enable(dev);
         }
- -
- -      return 0;
   }
   
- -static int __dev_close(struct net_device *dev)
+ +static void __dev_close(struct net_device *dev)
   {
- -      int retval;
         LIST_HEAD(single);
   
         list_add(&dev->close_list, &single);
- -      retval = __dev_close_many(&single);
+ +      __dev_close_many(&single);
         list_del(&single);
- -
- -      return retval;
   }
   
- -int dev_close_many(struct list_head *head, bool unlink)
+ +void dev_close_many(struct list_head *head, bool unlink)
   {
         struct net_device *dev, *tmp;
   
@@@ -1484,6 -1488,8 +1484,6 @@@
                 if (unlink)
                         list_del_init(&dev->close_list);
         }
- -
- -      return 0;
   }
   EXPORT_SYMBOL(dev_close_many);
   
@@@ -1496,7 -1502,7 +1496,7 @@@
    *    is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
    *    chain.
    */
- -int dev_close(struct net_device *dev)
+ +void dev_close(struct net_device *dev)
   {
         if (dev->flags & IFF_UP) {
                 LIST_HEAD(single);
@@@ -1505,6 -1511,7 +1505,6 @@@
                 dev_close_many(&single, true);
                 list_del(&single);
         }
- -      return 0;
   }
   EXPORT_SYMBOL(dev_close);
   
@@@ -1853,7 -1860,7 +1853,7 @@@ static inline int deliver_skb(struct sk
                               struct packet_type *pt_prev,
                               struct net_device *orig_dev)
   {
- -      if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+ +      if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
                 return -ENOMEM;
         refcount_inc(&skb->users);
         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@@ -2731,7 -2738,8 +2731,7 @@@ EXPORT_SYMBOL(skb_mac_gso_segment)
   static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
   {
         if (tx_path)
- -              return skb->ip_summed != CHECKSUM_PARTIAL &&
- -                     skb->ip_summed != CHECKSUM_UNNECESSARY;
+ +              return skb->ip_summed != CHECKSUM_PARTIAL;
   
         return skb->ip_summed == CHECKSUM_NONE;
   }
@@@ -3857,121 -3865,6 +3857,121 @@@ drop
         return NET_RX_DROP;
   }
   
+ +static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ +                                   struct bpf_prog *xdp_prog)
+ +{
+ +      struct xdp_buff xdp;
+ +      u32 act = XDP_DROP;
+ +      void *orig_data;
+ +      int hlen, off;
+ +      u32 mac_len;
+ +
+ +      /* Reinjected packets coming from act_mirred or similar should
+ +       * not get XDP generic processing.
+ +       */
+ +      if (skb_cloned(skb))
+ +              return XDP_PASS;
+ +
+ +      if (skb_linearize(skb))
+ +              goto do_drop;
+ +
+ +      /* The XDP program wants to see the packet starting at the MAC
+ +       * header.
+ +       */
+ +      mac_len = skb->data - skb_mac_header(skb);
+ +      hlen = skb_headlen(skb) + mac_len;
+ +      xdp.data = skb->data - mac_len;
+ +      xdp.data_end = xdp.data + hlen;
+ +      xdp.data_hard_start = skb->data - skb_headroom(skb);
+ +      orig_data = xdp.data;
+ +
+ +      act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ +
+ +      off = xdp.data - orig_data;
+ +      if (off > 0)
+ +              __skb_pull(skb, off);
+ +      else if (off < 0)
+ +              __skb_push(skb, -off);
+ +
+ +      switch (act) {
+ +      case XDP_REDIRECT:
+ +      case XDP_TX:
+ +              __skb_push(skb, mac_len);
+ +              /* fall through */
+ +      case XDP_PASS:
+ +              break;
+ +
+ +      default:
+ +              bpf_warn_invalid_xdp_action(act);
+ +              /* fall through */
+ +      case XDP_ABORTED:
+ +              trace_xdp_exception(skb->dev, xdp_prog, act);
+ +              /* fall through */
+ +      case XDP_DROP:
+ +      do_drop:
+ +              kfree_skb(skb);
+ +              break;
+ +      }
+ +
+ +      return act;
+ +}
+ +
+ +/* When doing generic XDP we have to bypass the qdisc layer and the
+ + * network taps in order to match in-driver-XDP behavior.
+ + */
+ +void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+ +{
+ +      struct net_device *dev = skb->dev;
+ +      struct netdev_queue *txq;
+ +      bool free_skb = true;
+ +      int cpu, rc;
+ +
+ +      txq = netdev_pick_tx(dev, skb, NULL);
+ +      cpu = smp_processor_id();
+ +      HARD_TX_LOCK(dev, txq, cpu);
+ +      if (!netif_xmit_stopped(txq)) {
+ +              rc = netdev_start_xmit(skb, dev, txq, 0);
+ +              if (dev_xmit_complete(rc))
+ +                      free_skb = false;
+ +      }
+ +      HARD_TX_UNLOCK(dev, txq);
+ +      if (free_skb) {
+ +              trace_xdp_exception(dev, xdp_prog, XDP_TX);
+ +              kfree_skb(skb);
+ +      }
+ +}
+ +EXPORT_SYMBOL_GPL(generic_xdp_tx);
+ +
+ +static struct static_key generic_xdp_needed __read_mostly;
+ +
+ +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
+ +{
+ +      if (xdp_prog) {
+ +              u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+ +              int err;
+ +
+ +              if (act != XDP_PASS) {
+ +                      switch (act) {
+ +                      case XDP_REDIRECT:
+ +                              err = xdp_do_generic_redirect(skb->dev, skb,
+ +                                                            xdp_prog);
+ +                              if (err)
+ +                                      goto out_redir;
+ +                      /* fallthru to submit skb */
+ +                      case XDP_TX:
+ +                              generic_xdp_tx(skb, xdp_prog);
+ +                              break;
+ +                      }
+ +                      return XDP_DROP;
+ +              }
+ +      }
+ +      return XDP_PASS;
+ +out_redir:
+ +      kfree_skb(skb);
+ +      return XDP_DROP;
+ +}
+ +EXPORT_SYMBOL_GPL(do_xdp_generic);
+ +
   static int netif_rx_internal(struct sk_buff *skb)
   {
         int ret;
@@@ -3979,19 -3872,6 +3979,19 @@@
         net_timestamp_check(netdev_tstamp_prequeue, skb);
   
         trace_netif_rx(skb);
+ +
+ +      if (static_key_false(&generic_xdp_needed)) {
+ +              int ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
+ +                                       skb);
+ +
+ +              /* Consider XDP consuming the packet a success from
+ +               * the netdev point of view we do not want to count
+ +               * this as an error.
+ +               */
+ +              if (ret != XDP_PASS)
+ +                      return NET_RX_SUCCESS;
+ +      }
+ +
   #ifdef CONFIG_RPS
         if (static_key_false(&rps_needed)) {
                 struct rps_dev_flow voidflow, *rflow = &voidflow;
@@@ -4412,7 -4292,7 +4412,7 @@@ skip_classify
         }
   
         if (pt_prev) {
- -              if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+ +              if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
                         goto drop;
                 else
                         ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@@ -4458,6 -4338,8 +4458,6 @@@ static int __netif_receive_skb(struct s
         return ret;
   }
   
- -static struct static_key generic_xdp_needed __read_mostly;
- -
   static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
   {
         struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
@@@ -4491,6 -4373,89 +4491,6 @@@
         return ret;
   }
   
- -static u32 netif_receive_generic_xdp(struct sk_buff *skb,
- -                                   struct bpf_prog *xdp_prog)
- -{
- -      struct xdp_buff xdp;
- -      u32 act = XDP_DROP;
- -      void *orig_data;
- -      int hlen, off;
- -      u32 mac_len;
- -
- -      /* Reinjected packets coming from act_mirred or similar should
- -       * not get XDP generic processing.
- -       */
- -      if (skb_cloned(skb))
- -              return XDP_PASS;
- -
- -      if (skb_linearize(skb))
- -              goto do_drop;
- -
- -      /* The XDP program wants to see the packet starting at the MAC
- -       * header.
- -       */
- -      mac_len = skb->data - skb_mac_header(skb);
- -      hlen = skb_headlen(skb) + mac_len;
- -      xdp.data = skb->data - mac_len;
- -      xdp.data_end = xdp.data + hlen;
- -      xdp.data_hard_start = skb->data - skb_headroom(skb);
- -      orig_data = xdp.data;
- -
- -      act = bpf_prog_run_xdp(xdp_prog, &xdp);
- -
- -      off = xdp.data - orig_data;
- -      if (off > 0)
- -              __skb_pull(skb, off);
- -      else if (off < 0)
- -              __skb_push(skb, -off);
- -
- -      switch (act) {
- -      case XDP_TX:
- -              __skb_push(skb, mac_len);
- -              /* fall through */
- -      case XDP_PASS:
- -              break;
- -
- -      default:
- -              bpf_warn_invalid_xdp_action(act);
- -              /* fall through */
- -      case XDP_ABORTED:
- -              trace_xdp_exception(skb->dev, xdp_prog, act);
- -              /* fall through */
- -      case XDP_DROP:
- -      do_drop:
- -              kfree_skb(skb);
- -              break;
- -      }
- -
- -      return act;
- -}
- -
- -/* When doing generic XDP we have to bypass the qdisc layer and the
- - * network taps in order to match in-driver-XDP behavior.
- - */
- -static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
- -{
- -      struct net_device *dev = skb->dev;
- -      struct netdev_queue *txq;
- -      bool free_skb = true;
- -      int cpu, rc;
- -
- -      txq = netdev_pick_tx(dev, skb, NULL);
- -      cpu = smp_processor_id();
- -      HARD_TX_LOCK(dev, txq, cpu);
- -      if (!netif_xmit_stopped(txq)) {
- -              rc = netdev_start_xmit(skb, dev, txq, 0);
- -              if (dev_xmit_complete(rc))
- -                      free_skb = false;
- -      }
- -      HARD_TX_UNLOCK(dev, txq);
- -      if (free_skb) {
- -              trace_xdp_exception(dev, xdp_prog, XDP_TX);
- -              kfree_skb(skb);
- -      }
- -}
- -
   static int netif_receive_skb_internal(struct sk_buff *skb)
   {
         int ret;
@@@ -4503,12 -4468,17 +4503,12 @@@
         rcu_read_lock();
   
         if (static_key_false(&generic_xdp_needed)) {
- -              struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+ +              int ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
+ +                                       skb);
   
- -              if (xdp_prog) {
- -                      u32 act = netif_receive_generic_xdp(skb, xdp_prog);
- -
- -                      if (act != XDP_PASS) {
- -                              rcu_read_unlock();
- -                              if (act == XDP_TX)
- -                                      generic_xdp_tx(skb, xdp_prog);
- -                              return NET_RX_DROP;
- -                      }
+ +              if (ret != XDP_PASS) {
+ +                      rcu_read_unlock();
+ +                      return NET_RX_DROP;
                 }
         }
   
@@@ -5319,6 -5289,7 +5319,7 @@@ static void busy_poll_stop(struct napi_
          * Ideally, a new ndo_busy_poll_stop() could avoid another round.
          */
         rc = napi->poll(napi, BUSY_POLL_BUDGET);
+       trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
         netpoll_poll_unlock(have_poll_lock);
         if (rc == BUSY_POLL_BUDGET)
                 __napi_schedule(napi);
@@@ -5697,12 -5668,13 +5698,13 @@@ EXPORT_SYMBOL(netdev_has_upper_dev_all_
    * Find out if a device is linked to an upper device and return true in case
    * it is. The caller must hold the RTNL lock.
    */
- static bool netdev_has_any_upper_dev(struct net_device *dev)
+ bool netdev_has_any_upper_dev(struct net_device *dev)
   {
         ASSERT_RTNL();
   
         return !list_empty(&dev->adj_list.upper);
   }
+ EXPORT_SYMBOL(netdev_has_any_upper_dev);
   
   /**
    * netdev_master_upper_dev_get - Get master upper device
@@@ -6719,12 -6691,8 +6721,12 @@@ int __dev_change_flags(struct net_devic
          */
   
         ret = 0;
- -      if ((old_flags ^ flags) & IFF_UP)
- -              ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
+ +      if ((old_flags ^ flags) & IFF_UP) {
+ +              if (old_flags & IFF_UP)
+ +                      __dev_close(dev);
+ +              else
+ +                      ret = __dev_open(dev);
+ +      }
   
         if ((flags ^ dev->gflags) & IFF_PROMISC) {
                 int inc = (flags & IFF_PROMISC) ? 1 : -1;
@@@ -7269,6 -7237,24 +7271,6 @@@ static netdev_features_t netdev_fix_fea
                 features &= ~NETIF_F_GSO;
         }
   
- -      /* UFO needs SG and checksumming */
- -      if (features & NETIF_F_UFO) {
- -              /* maybe split UFO into V4 and V6? */
- -              if (!(features & NETIF_F_HW_CSUM) &&
- -                  ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
- -                   (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
- -                      netdev_dbg(dev,
- -                              "Dropping NETIF_F_UFO since no checksum offload features.\n");
- -                      features &= ~NETIF_F_UFO;
- -              }
- -
- -              if (!(features & NETIF_F_SG)) {
- -                      netdev_dbg(dev,
- -                              "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
- -                      features &= ~NETIF_F_UFO;
- -              }
- -      }
- -
         /* GSO partial features require GSO partial be set */
         if ((features & dev->gso_partial_features) &&
             !(features & NETIF_F_GSO_PARTIAL)) {
@@@ -7329,27 -7315,8 +7331,27 @@@ sync_lower
         netdev_for_each_lower_dev(dev, lower, iter)
                 netdev_sync_lower_features(dev, lower, features);
   
- -      if (!err)
+ +      if (!err) {
+ +              netdev_features_t diff = features ^ dev->features;
+ +
+ +              if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
+ +                      /* udp_tunnel_{get,drop}_rx_info both need
+ +                       * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the
+ +                       * device, or they won't do anything.
+ +                       * Thus we need to update dev->features
+ +                       * *before* calling udp_tunnel_get_rx_info,
+ +                       * but *after* calling udp_tunnel_drop_rx_info.
+ +                       */
+ +                      if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
+ +                              dev->features = features;
+ +                              udp_tunnel_get_rx_info(dev);
+ +                      } else {
+ +                              udp_tunnel_drop_rx_info(dev);
+ +                      }
+ +              }
+ +
                 dev->features = features;
+ +      }
   
         return err < 0 ? 0 : 1;
   }
@@@ -7551,12 -7518,6 +7553,12 @@@ int register_netdevice(struct net_devic
          */
         dev->hw_features |= NETIF_F_SOFT_FEATURES;
         dev->features |= NETIF_F_SOFT_FEATURES;
+ +
+ +      if (dev->netdev_ops->ndo_udp_tunnel_add) {
+ +              dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
+ +              dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
+ +      }
+ +
         dev->wanted_features = dev->features & dev->hw_features;
   
         if (!(dev->flags & IFF_LOOPBACK))
diff --combined net/core/filter.c

index f9add024d92fcec8fb6dacce0a2251336204158b,169974998c7692b063947cb925fede167f2fb817..5912c738a7b272e3fb3eb1d66317fe2df9ce0c6d
--- 1/net/core/filter.c
--- 2/net/core/filter.c
+++ b/net/core/filter.c
@@@ -55,7 -55,6 +55,7 @@@
   #include <net/sock_reuseport.h>
   #include <net/busy_poll.h>
   #include <net/tcp.h>
+ +#include <linux/bpf_trace.h>
   
   /**
    *    sk_filter_trim_cap - run a packet through a socket filter
@@@ -514,27 -513,14 +514,27 @@@ do_pass
                                 break;
                         }
   
- -                      /* Convert JEQ into JNE when 'jump_true' is next insn. */
- -                      if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
- -                              insn->code = BPF_JMP | BPF_JNE | bpf_src;
+ +                      /* Convert some jumps when 'jump_true' is next insn. */
+ +                      if (fp->jt == 0) {
+ +                              switch (BPF_OP(fp->code)) {
+ +                              case BPF_JEQ:
+ +                                      insn->code = BPF_JMP | BPF_JNE | bpf_src;
+ +                                      break;
+ +                              case BPF_JGT:
+ +                                      insn->code = BPF_JMP | BPF_JLE | bpf_src;
+ +                                      break;
+ +                              case BPF_JGE:
+ +                                      insn->code = BPF_JMP | BPF_JLT | bpf_src;
+ +                                      break;
+ +                              default:
+ +                                      goto jmp_rest;
+ +                              }
+ +
                                 target = i + fp->jf + 1;
                                 BPF_EMIT_JMP;
                                 break;
                         }
- -
+ +jmp_rest:
                         /* Other jumps are mapped into two insns: Jxx and JA. */
                         target = i + fp->jt + 1;
                         insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
@@@ -1792,8 -1778,6 +1792,8 @@@ static const struct bpf_func_proto bpf_
   struct redirect_info {
         u32 ifindex;
         u32 flags;
+ +      struct bpf_map *map;
+ +      struct bpf_map *map_to_flush;
   };
   
   static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@@ -1807,7 -1791,6 +1807,7 @@@ BPF_CALL_2(bpf_redirect, u32, ifindex, 
   
         ri->ifindex = ifindex;
         ri->flags = flags;
+ +      ri->map = NULL;
   
         return TC_ACT_REDIRECT;
   }
@@@ -1835,45 -1818,6 +1835,45 @@@ static const struct bpf_func_proto bpf_
         .arg2_type      = ARG_ANYTHING,
   };
   
+ +BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
+ +{
+ +      struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ +
+ +      if (unlikely(flags))
+ +              return SK_ABORTED;
+ +
+ +      ri->ifindex = key;
+ +      ri->flags = flags;
+ +      ri->map = map;
+ +
+ +      return SK_REDIRECT;
+ +}
+ +
+ +struct sock *do_sk_redirect_map(void)
+ +{
+ +      struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ +      struct sock *sk = NULL;
+ +
+ +      if (ri->map) {
+ +              sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
+ +
+ +              ri->ifindex = 0;
+ +              ri->map = NULL;
+ +              /* we do not clear flags for future lookup */
+ +      }
+ +
+ +      return sk;
+ +}
+ +
+ +static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
+ +      .func           = bpf_sk_redirect_map,
+ +      .gpl_only       = false,
+ +      .ret_type       = RET_INTEGER,
+ +      .arg1_type      = ARG_CONST_MAP_PTR,
+ +      .arg2_type      = ARG_ANYTHING,
+ +      .arg3_type      = ARG_ANYTHING,
+ +};
+ +
   BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
   {
         return task_get_classid(skb);
@@@ -2080,8 -2024,8 +2080,8 @@@ static int bpf_skb_proto_4_to_6(struct 
                 return ret;
   
         if (skb_is_gso(skb)) {
- -              /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
- -               * be changed into SKB_GSO_TCPV6.
+ +              /* SKB_GSO_TCPV4 needs to be changed into
+ +               * SKB_GSO_TCPV6.
                  */
                 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
                         skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
@@@ -2116,8 -2060,8 +2116,8 @@@ static int bpf_skb_proto_6_to_4(struct 
                 return ret;
   
         if (skb_is_gso(skb)) {
- -              /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
- -               * be changed into SKB_GSO_TCPV4.
+ +              /* SKB_GSO_TCPV6 needs to be changed into
+ +               * SKB_GSO_TCPV4.
                  */
                 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
                         skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
@@@ -2468,180 -2412,6 +2468,180 @@@ static const struct bpf_func_proto bpf_
         .arg2_type      = ARG_ANYTHING,
   };
   
+ +static int __bpf_tx_xdp(struct net_device *dev,
+ +                      struct bpf_map *map,
+ +                      struct xdp_buff *xdp,
+ +                      u32 index)
+ +{
+ +      int err;
+ +
+ +      if (!dev->netdev_ops->ndo_xdp_xmit) {
+ +              return -EOPNOTSUPP;
+ +      }
+ +
+ +      err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+ +      if (err)
+ +              return err;
+ +      if (map)
+ +              __dev_map_insert_ctx(map, index);
+ +      else
+ +              dev->netdev_ops->ndo_xdp_flush(dev);
+ +      return 0;
+ +}
+ +
+ +void xdp_do_flush_map(void)
+ +{
+ +      struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ +      struct bpf_map *map = ri->map_to_flush;
+ +
+ +      ri->map_to_flush = NULL;
+ +      if (map)
+ +              __dev_map_flush(map);
+ +}
+ +EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+ +
+ +static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
+ +                             struct bpf_prog *xdp_prog)
+ +{
+ +      struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ +      struct bpf_map *map = ri->map;
+ +      u32 index = ri->ifindex;
+ +      struct net_device *fwd;
+ +      int err;
+ +
+ +      ri->ifindex = 0;
+ +      ri->map = NULL;
+ +
+ +      fwd = __dev_map_lookup_elem(map, index);
+ +      if (!fwd) {
+ +              err = -EINVAL;
+ +              goto err;
+ +      }
+ +      if (ri->map_to_flush && ri->map_to_flush != map)
+ +              xdp_do_flush_map();
+ +
+ +      err = __bpf_tx_xdp(fwd, map, xdp, index);
+ +      if (unlikely(err))
+ +              goto err;
+ +
+ +      ri->map_to_flush = map;
+ +      _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+ +      return 0;
+ +err:
+ +      _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+ +      return err;
+ +}
+ +
+ +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+ +                  struct bpf_prog *xdp_prog)
+ +{
+ +      struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ +      struct net_device *fwd;
+ +      u32 index = ri->ifindex;
+ +      int err;
+ +
+ +      if (ri->map)
+ +              return xdp_do_redirect_map(dev, xdp, xdp_prog);
+ +
+ +      fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ +      ri->ifindex = 0;
+ +      if (unlikely(!fwd)) {
+ +              err = -EINVAL;
+ +              goto err;
+ +      }
+ +
+ +      err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
+ +      if (unlikely(err))
+ +              goto err;
+ +
+ +      _trace_xdp_redirect(dev, xdp_prog, index);
+ +      return 0;
+ +err:
+ +      _trace_xdp_redirect_err(dev, xdp_prog, index, err);
+ +      return err;
+ +}
+ +EXPORT_SYMBOL_GPL(xdp_do_redirect);
+ +
+ +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
+ +                          struct bpf_prog *xdp_prog)
+ +{
+ +      struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ +      u32 index = ri->ifindex;
+ +      struct net_device *fwd;
+ +      unsigned int len;
+ +      int err = 0;
+ +
+ +      fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ +      ri->ifindex = 0;
+ +      if (unlikely(!fwd)) {
+ +              err = -EINVAL;
+ +              goto err;
+ +      }
+ +
+ +      if (unlikely(!(fwd->flags & IFF_UP))) {
+ +              err = -ENETDOWN;
+ +              goto err;
+ +      }
+ +
+ +      len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+ +      if (skb->len > len) {
+ +              err = -EMSGSIZE;
+ +              goto err;
+ +      }
+ +
+ +      skb->dev = fwd;
+ +      _trace_xdp_redirect(dev, xdp_prog, index);
+ +      return 0;
+ +err:
+ +      _trace_xdp_redirect_err(dev, xdp_prog, index, err);
+ +      return err;
+ +}
+ +EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
+ +
+ +BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
+ +{
+ +      struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ +
+ +      if (unlikely(flags))
+ +              return XDP_ABORTED;
+ +
+ +      ri->ifindex = ifindex;
+ +      ri->flags = flags;
+ +
+ +      return XDP_REDIRECT;
+ +}
+ +
+ +static const struct bpf_func_proto bpf_xdp_redirect_proto = {
+ +      .func           = bpf_xdp_redirect,
+ +      .gpl_only       = false,
+ +      .ret_type       = RET_INTEGER,
+ +      .arg1_type      = ARG_ANYTHING,
+ +      .arg2_type      = ARG_ANYTHING,
+ +};
+ +
+ +BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags)
+ +{
+ +      struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ +
+ +      if (unlikely(flags))
+ +              return XDP_ABORTED;
+ +
+ +      ri->ifindex = ifindex;
+ +      ri->flags = flags;
+ +      ri->map = map;
+ +
+ +      return XDP_REDIRECT;
+ +}
+ +
+ +static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
+ +      .func           = bpf_xdp_redirect_map,
+ +      .gpl_only       = false,
+ +      .ret_type       = RET_INTEGER,
+ +      .arg1_type      = ARG_CONST_MAP_PTR,
+ +      .arg2_type      = ARG_ANYTHING,
+ +      .arg3_type      = ARG_ANYTHING,
+ +};
+ +
   bool bpf_helper_changes_pkt_data(void *func)
   {
         if (func == bpf_skb_vlan_push ||
@@@ -3066,15 -2836,12 +3066,12 @@@ BPF_CALL_5(bpf_setsockopt, struct bpf_s
                    sk->sk_prot->setsockopt == tcp_setsockopt) {
                 if (optname == TCP_CONGESTION) {
                         char name[TCP_CA_NAME_MAX];
+                       bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
   
                         strncpy(name, optval, min_t(long, optlen,
                                                     TCP_CA_NAME_MAX-1));
                         name[TCP_CA_NAME_MAX-1] = 0;
-                       ret = tcp_set_congestion_control(sk, name, false);
-                       if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
-                               /* replacing an existing ca */
-                               tcp_reinit_congestion_control(sk,
-                                       inet_csk(sk)->icsk_ca_ops);
+                       ret = tcp_set_congestion_control(sk, name, false, reinit);
                 } else {
                         struct tcp_sock *tp = tcp_sk(sk);
   
@@@ -3102,7 -2869,6 +3099,6 @@@
                                 ret = -EINVAL;
                         }
                 }
-               ret = -EINVAL;
   #endif
         } else {
                 ret = -EINVAL;
@@@ -3149,20 -2915,6 +3145,20 @@@ bpf_base_func_proto(enum bpf_func_id fu
         }
   }
   
+ +static const struct bpf_func_proto *
+ +sock_filter_func_proto(enum bpf_func_id func_id)
+ +{
+ +      switch (func_id) {
+ +      /* inet and inet6 sockets are created in a process
+ +       * context so there is always a valid uid/gid
+ +       */
+ +      case BPF_FUNC_get_current_uid_gid:
+ +              return &bpf_get_current_uid_gid_proto;
+ +      default:
+ +              return bpf_base_func_proto(func_id);
+ +      }
+ +}
+ +
   static const struct bpf_func_proto *
   sk_filter_func_proto(enum bpf_func_id func_id)
   {
@@@ -3255,10 -3007,6 +3251,10 @@@ xdp_func_proto(enum bpf_func_id func_id
                 return &bpf_get_smp_processor_id_proto;
         case BPF_FUNC_xdp_adjust_head:
                 return &bpf_xdp_adjust_head_proto;
+ +      case BPF_FUNC_redirect:
+ +              return &bpf_xdp_redirect_proto;
+ +      case BPF_FUNC_redirect_map:
+ +              return &bpf_xdp_redirect_map_proto;
         default:
                 return bpf_base_func_proto(func_id);
         }
@@@ -3297,32 -3045,6 +3293,32 @@@ static const struct bpf_func_proto 
         switch (func_id) {
         case BPF_FUNC_setsockopt:
                 return &bpf_setsockopt_proto;
+ +      case BPF_FUNC_sock_map_update:
+ +              return &bpf_sock_map_update_proto;
+ +      default:
+ +              return bpf_base_func_proto(func_id);
+ +      }
+ +}
+ +
+ +static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
+ +{
+ +      switch (func_id) {
+ +      case BPF_FUNC_skb_store_bytes:
+ +              return &bpf_skb_store_bytes_proto;
+ +      case BPF_FUNC_skb_load_bytes:
+ +              return &bpf_skb_load_bytes_proto;
+ +      case BPF_FUNC_skb_pull_data:
+ +              return &bpf_skb_pull_data_proto;
+ +      case BPF_FUNC_skb_change_tail:
+ +              return &bpf_skb_change_tail_proto;
+ +      case BPF_FUNC_skb_change_head:
+ +              return &bpf_skb_change_head_proto;
+ +      case BPF_FUNC_get_socket_cookie:
+ +              return &bpf_get_socket_cookie_proto;
+ +      case BPF_FUNC_get_socket_uid:
+ +              return &bpf_get_socket_uid_proto;
+ +      case BPF_FUNC_sk_redirect_map:
+ +              return &bpf_sk_redirect_map_proto;
         default:
                 return bpf_base_func_proto(func_id);
         }
@@@ -3380,10 -3102,6 +3376,10 @@@ static bool bpf_skb_is_valid_access(in
                 if (off + size > offsetofend(struct __sk_buff, cb[4]))
                         return false;
                 break;
+ +      case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
+ +      case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
+ +      case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
+ +      case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
         case bpf_ctx_range(struct __sk_buff, data):
         case bpf_ctx_range(struct __sk_buff, data_end):
                 if (size != size_default)
@@@ -3412,7 -3130,6 +3408,7 @@@ static bool sk_filter_is_valid_access(i
         case bpf_ctx_range(struct __sk_buff, tc_classid):
         case bpf_ctx_range(struct __sk_buff, data):
         case bpf_ctx_range(struct __sk_buff, data_end):
+ +      case bpf_ctx_range_till(struct __sk_buff, family, local_port):
                 return false;
         }
   
@@@ -3434,7 -3151,6 +3430,7 @@@ static bool lwt_is_valid_access(int off
   {
         switch (off) {
         case bpf_ctx_range(struct __sk_buff, tc_classid):
+ +      case bpf_ctx_range_till(struct __sk_buff, family, local_port):
                 return false;
         }
   
@@@ -3468,8 -3184,6 +3464,8 @@@ static bool sock_filter_is_valid_access
         if (type == BPF_WRITE) {
                 switch (off) {
                 case offsetof(struct bpf_sock, bound_dev_if):
+ +              case offsetof(struct bpf_sock, mark):
+ +              case offsetof(struct bpf_sock, priority):
                         break;
                 default:
                         return false;
@@@ -3487,8 -3201,8 +3483,8 @@@
         return true;
   }
   
- -static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
- -                             const struct bpf_prog *prog)
+ +static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ +                              const struct bpf_prog *prog, int drop_verdict)
   {
         struct bpf_insn *insn = insn_buf;
   
@@@ -3515,7 -3229,7 +3511,7 @@@
          * return TC_ACT_SHOT;
          */
         *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
- -      *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT);
+ +      *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
         *insn++ = BPF_EXIT_INSN();
   
         /* restore: */
@@@ -3526,12 -3240,6 +3522,12 @@@
         return insn - insn_buf;
   }
   
+ +static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ +                             const struct bpf_prog *prog)
+ +{
+ +      return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
+ +}
+ +
   static bool tc_cls_act_is_valid_access(int off, int size,
                                        enum bpf_access_type type,
                                        struct bpf_insn_access_aux *info)
@@@ -3556,8 -3264,6 +3552,8 @@@
         case bpf_ctx_range(struct __sk_buff, data_end):
                 info->reg_type = PTR_TO_PACKET_END;
                 break;
+ +      case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ +              return false;
         }
   
         return bpf_skb_is_valid_access(off, size, type, info);
@@@ -3630,41 -3336,6 +3626,41 @@@ static bool sock_ops_is_valid_access(in
         return __is_valid_sock_ops_access(off, size);
   }
   
+ +static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ +                         const struct bpf_prog *prog)
+ +{
+ +      return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
+ +}
+ +
+ +static bool sk_skb_is_valid_access(int off, int size,
+ +                                 enum bpf_access_type type,
+ +                                 struct bpf_insn_access_aux *info)
+ +{
+ +      if (type == BPF_WRITE) {
+ +              switch (off) {
+ +              case bpf_ctx_range(struct __sk_buff, mark):
+ +              case bpf_ctx_range(struct __sk_buff, tc_index):
+ +              case bpf_ctx_range(struct __sk_buff, priority):
+ +                      break;
+ +              default:
+ +                      return false;
+ +              }
+ +      }
+ +
+ +      switch (off) {
+ +      case bpf_ctx_range(struct __sk_buff, tc_classid):
+ +              return false;
+ +      case bpf_ctx_range(struct __sk_buff, data):
+ +              info->reg_type = PTR_TO_PACKET;
+ +              break;
+ +      case bpf_ctx_range(struct __sk_buff, data_end):
+ +              info->reg_type = PTR_TO_PACKET_END;
+ +              break;
+ +      }
+ +
+ +      return bpf_skb_is_valid_access(off, size, type, info);
+ +}
+ +
   static u32 bpf_convert_ctx_access(enum bpf_access_type type,
                                   const struct bpf_insn *si,
                                   struct bpf_insn *insn_buf,
@@@ -3850,106 -3521,6 +3846,106 @@@
                 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
   #endif
                 break;
+ +      case offsetof(struct __sk_buff, family):
+ +              BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+ +
+ +              *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ +                                    si->dst_reg, si->src_reg,
+ +                                    offsetof(struct sk_buff, sk));
+ +              *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ +                                    bpf_target_off(struct sock_common,
+ +                                                   skc_family,
+ +                                                   2, target_size));
+ +              break;
+ +      case offsetof(struct __sk_buff, remote_ip4):
+ +              BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+ +
+ +              *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ +                                    si->dst_reg, si->src_reg,
+ +                                    offsetof(struct sk_buff, sk));
+ +              *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ +                                    bpf_target_off(struct sock_common,
+ +                                                   skc_daddr,
+ +                                                   4, target_size));
+ +              break;
+ +      case offsetof(struct __sk_buff, local_ip4):
+ +              BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ +                                        skc_rcv_saddr) != 4);
+ +
+ +              *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ +                                    si->dst_reg, si->src_reg,
+ +                                    offsetof(struct sk_buff, sk));
+ +              *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ +                                    bpf_target_off(struct sock_common,
+ +                                                   skc_rcv_saddr,
+ +                                                   4, target_size));
+ +              break;
+ +      case offsetof(struct __sk_buff, remote_ip6[0]) ...
+ +           offsetof(struct __sk_buff, remote_ip6[3]):
+ +#if IS_ENABLED(CONFIG_IPV6)
+ +              BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ +                                        skc_v6_daddr.s6_addr32[0]) != 4);
+ +
+ +              off = si->off;
+ +              off -= offsetof(struct __sk_buff, remote_ip6[0]);
+ +
+ +              *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ +                                    si->dst_reg, si->src_reg,
+ +                                    offsetof(struct sk_buff, sk));
+ +              *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ +                                    offsetof(struct sock_common,
+ +                                             skc_v6_daddr.s6_addr32[0]) +
+ +                                    off);
+ +#else
+ +              *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+ +#endif
+ +              break;
+ +      case offsetof(struct __sk_buff, local_ip6[0]) ...
+ +           offsetof(struct __sk_buff, local_ip6[3]):
+ +#if IS_ENABLED(CONFIG_IPV6)
+ +              BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ +                                        skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+ +
+ +              off = si->off;
+ +              off -= offsetof(struct __sk_buff, local_ip6[0]);
+ +
+ +              *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ +                                    si->dst_reg, si->src_reg,
+ +                                    offsetof(struct sk_buff, sk));
+ +              *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ +                                    offsetof(struct sock_common,
+ +                                             skc_v6_rcv_saddr.s6_addr32[0]) +
+ +                                    off);
+ +#else
+ +              *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+ +#endif
+ +              break;
+ +
+ +      case offsetof(struct __sk_buff, remote_port):
+ +              BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+ +
+ +              *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ +                                    si->dst_reg, si->src_reg,
+ +                                    offsetof(struct sk_buff, sk));
+ +              *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ +                                    bpf_target_off(struct sock_common,
+ +                                                   skc_dport,
+ +                                                   2, target_size));
+ +#ifndef __BIG_ENDIAN_BITFIELD
+ +              *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+ +#endif
+ +              break;
+ +
+ +      case offsetof(struct __sk_buff, local_port):
+ +              BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+ +
+ +              *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ +                                    si->dst_reg, si->src_reg,
+ +                                    offsetof(struct sk_buff, sk));
+ +              *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ +                                    bpf_target_off(struct sock_common,
+ +                                                   skc_num, 2, target_size));
+ +              break;
         }
   
         return insn - insn_buf;
@@@ -3974,28 -3545,6 +3970,28 @@@ static u32 sock_filter_convert_ctx_acce
                                       offsetof(struct sock, sk_bound_dev_if));
                 break;
   
+ +      case offsetof(struct bpf_sock, mark):
+ +              BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
+ +
+ +              if (type == BPF_WRITE)
+ +                      *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ +                                      offsetof(struct sock, sk_mark));
+ +              else
+ +                      *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ +                                    offsetof(struct sock, sk_mark));
+ +              break;
+ +
+ +      case offsetof(struct bpf_sock, priority):
+ +              BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
+ +
+ +              if (type == BPF_WRITE)
+ +                      *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ +                                      offsetof(struct sock, sk_priority));
+ +              else
+ +                      *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ +                                    offsetof(struct sock, sk_priority));
+ +              break;
+ +
         case offsetof(struct bpf_sock, family):
                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
   
@@@ -4245,7 -3794,7 +4241,7 @@@ const struct bpf_verifier_ops lwt_xmit_
   };
   
   const struct bpf_verifier_ops cg_sock_prog_ops = {
- -      .get_func_proto         = bpf_base_func_proto,
+ +      .get_func_proto         = sock_filter_func_proto,
         .is_valid_access        = sock_filter_is_valid_access,
         .convert_ctx_access     = sock_filter_convert_ctx_access,
   };
@@@ -4256,13 -3805,6 +4252,13 @@@ const struct bpf_verifier_ops sock_ops_
         .convert_ctx_access     = sock_ops_convert_ctx_access,
   };
   
+ +const struct bpf_verifier_ops sk_skb_prog_ops = {
+ +      .get_func_proto         = sk_skb_func_proto,
+ +      .is_valid_access        = sk_skb_is_valid_access,
+ +      .convert_ctx_access     = bpf_convert_ctx_access,
+ +      .gen_prologue           = sk_skb_prologue,
+ +};
+ +
   int sk_detach_filter(struct sock *sk)
   {
         int ret = -ENOENT;
diff --combined net/core/skbuff.c

index 917da73d3ab3b82163cf0a9ee944da09cb5a391f,e0755660628407e5a1cefc9ed2c4a725f68628a0..246ca1c81715787bb8e58a424670965ee9fc2d95
--- 1/net/core/skbuff.c
--- 2/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@@ -158,6 -158,31 +158,6 @@@ out
    *
    */
   
- -struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
- -{
- -      struct sk_buff *skb;
- -
- -      /* Get the HEAD */
- -      skb = kmem_cache_alloc_node(skbuff_head_cache,
- -                                  gfp_mask & ~__GFP_DMA, node);
- -      if (!skb)
- -              goto out;
- -
- -      /*
- -       * Only clear those fields we need to clear, not those that we will
- -       * actually initialise below. Hence, don't put any more fields after
- -       * the tail pointer in struct sk_buff!
- -       */
- -      memset(skb, 0, offsetof(struct sk_buff, tail));
- -      skb->head = NULL;
- -      skb->truesize = sizeof(struct sk_buff);
- -      refcount_set(&skb->users, 1);
- -
- -      skb->mac_header = (typeof(skb->mac_header))~0U;
- -out:
- -      return skb;
- -}
- -
   /**
    *    __alloc_skb     -       allocate a network buffer
    *    @size: size to allocate
@@@ -567,10 -592,21 +567,10 @@@ static void skb_release_data(struct sk_
         for (i = 0; i < shinfo->nr_frags; i++)
                 __skb_frag_unref(&shinfo->frags[i]);
   
- -      /*
- -       * If skb buf is from userspace, we need to notify the caller
- -       * the lower device DMA has done;
- -       */
- -      if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
- -              struct ubuf_info *uarg;
- -
- -              uarg = shinfo->destructor_arg;
- -              if (uarg->callback)
- -                      uarg->callback(uarg, true);
- -      }
- -
         if (shinfo->frag_list)
                 kfree_skb_list(shinfo->frag_list);
   
+ +      skb_zcopy_clear(skb, true);
         skb_free_head(skb);
   }
   
@@@ -684,7 -720,14 +684,7 @@@ EXPORT_SYMBOL(kfree_skb_list)
    */
   void skb_tx_error(struct sk_buff *skb)
   {
- -      if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
- -              struct ubuf_info *uarg;
- -
- -              uarg = skb_shinfo(skb)->destructor_arg;
- -              if (uarg->callback)
- -                      uarg->callback(uarg, false);
- -              skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
- -      }
+ +      skb_zcopy_clear(skb, true);
   }
   EXPORT_SYMBOL(skb_tx_error);
   
@@@ -719,7 -762,8 +719,7 @@@ void consume_stateless_skb(struct sk_bu
                 return;
   
         trace_consume_skb(skb);
- -      if (likely(skb->head))
- -              skb_release_data(skb);
+ +      skb_release_data(skb);
         kfree_skbmem(skb);
   }
   
@@@ -897,273 -941,6 +897,273 @@@ struct sk_buff *skb_morph(struct sk_buf
   }
   EXPORT_SYMBOL_GPL(skb_morph);
   
+ +static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+ +{
+ +      unsigned long max_pg, num_pg, new_pg, old_pg;
+ +      struct user_struct *user;
+ +
+ +      if (capable(CAP_IPC_LOCK) || !size)
+ +              return 0;
+ +
+ +      num_pg = (size >> PAGE_SHIFT) + 2;      /* worst case */
+ +      max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ +      user = mmp->user ? : current_user();
+ +
+ +      do {
+ +              old_pg = atomic_long_read(&user->locked_vm);
+ +              new_pg = old_pg + num_pg;
+ +              if (new_pg > max_pg)
+ +                      return -ENOBUFS;
+ +      } while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) !=
+ +               old_pg);
+ +
+ +      if (!mmp->user) {
+ +              mmp->user = get_uid(user);
+ +              mmp->num_pg = num_pg;
+ +      } else {
+ +              mmp->num_pg += num_pg;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+ +{
+ +      if (mmp->user) {
+ +              atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
+ +              free_uid(mmp->user);
+ +      }
+ +}
+ +
+ +struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
+ +{
+ +      struct ubuf_info *uarg;
+ +      struct sk_buff *skb;
+ +
+ +      WARN_ON_ONCE(!in_task());
+ +
+ +      if (!sock_flag(sk, SOCK_ZEROCOPY))
+ +              return NULL;
+ +
+ +      skb = sock_omalloc(sk, 0, GFP_KERNEL);
+ +      if (!skb)
+ +              return NULL;
+ +
+ +      BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
+ +      uarg = (void *)skb->cb;
+ +      uarg->mmp.user = NULL;
+ +
+ +      if (mm_account_pinned_pages(&uarg->mmp, size)) {
+ +              kfree_skb(skb);
+ +              return NULL;
+ +      }
+ +
+ +      uarg->callback = sock_zerocopy_callback;
+ +      uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
+ +      uarg->len = 1;
+ +      uarg->bytelen = size;
+ +      uarg->zerocopy = 1;
+ +      atomic_set(&uarg->refcnt, 0);
+ +      sock_hold(sk);
+ +
+ +      return uarg;
+ +}
+ +EXPORT_SYMBOL_GPL(sock_zerocopy_alloc);
+ +
+ +static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
+ +{
+ +      return container_of((void *)uarg, struct sk_buff, cb);
+ +}
+ +
+ +struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
+ +                                      struct ubuf_info *uarg)
+ +{
+ +      if (uarg) {
+ +              const u32 byte_limit = 1 << 19;         /* limit to a few TSO */
+ +              u32 bytelen, next;
+ +
+ +              /* realloc only when socket is locked (TCP, UDP cork),
+ +               * so uarg->len and sk_zckey access is serialized
+ +               */
+ +              if (!sock_owned_by_user(sk)) {
+ +                      WARN_ON_ONCE(1);
+ +                      return NULL;
+ +              }
+ +
+ +              bytelen = uarg->bytelen + size;
+ +              if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
+ +                      /* TCP can create new skb to attach new uarg */
+ +                      if (sk->sk_type == SOCK_STREAM)
+ +                              goto new_alloc;
+ +                      return NULL;
+ +              }
+ +
+ +              next = (u32)atomic_read(&sk->sk_zckey);
+ +              if ((u32)(uarg->id + uarg->len) == next) {
+ +                      if (mm_account_pinned_pages(&uarg->mmp, size))
+ +                              return NULL;
+ +                      uarg->len++;
+ +                      uarg->bytelen = bytelen;
+ +                      atomic_set(&sk->sk_zckey, ++next);
+ +                      return uarg;
+ +              }
+ +      }
+ +
+ +new_alloc:
+ +      return sock_zerocopy_alloc(sk, size);
+ +}
+ +EXPORT_SYMBOL_GPL(sock_zerocopy_realloc);
+ +
+ +static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
+ +{
+ +      struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+ +      u32 old_lo, old_hi;
+ +      u64 sum_len;
+ +
+ +      old_lo = serr->ee.ee_info;
+ +      old_hi = serr->ee.ee_data;
+ +      sum_len = old_hi - old_lo + 1ULL + len;
+ +
+ +      if (sum_len >= (1ULL << 32))
+ +              return false;
+ +
+ +      if (lo != old_hi + 1)
+ +              return false;
+ +
+ +      serr->ee.ee_data += len;
+ +      return true;
+ +}
+ +
+ +void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
+ +{
+ +      struct sk_buff *tail, *skb = skb_from_uarg(uarg);
+ +      struct sock_exterr_skb *serr;
+ +      struct sock *sk = skb->sk;
+ +      struct sk_buff_head *q;
+ +      unsigned long flags;
+ +      u32 lo, hi;
+ +      u16 len;
+ +
+ +      mm_unaccount_pinned_pages(&uarg->mmp);
+ +
+ +      /* if !len, there was only 1 call, and it was aborted
+ +       * so do not queue a completion notification
+ +       */
+ +      if (!uarg->len || sock_flag(sk, SOCK_DEAD))
+ +              goto release;
+ +
+ +      len = uarg->len;
+ +      lo = uarg->id;
+ +      hi = uarg->id + len - 1;
+ +
+ +      serr = SKB_EXT_ERR(skb);
+ +      memset(serr, 0, sizeof(*serr));
+ +      serr->ee.ee_errno = 0;
+ +      serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
+ +      serr->ee.ee_data = hi;
+ +      serr->ee.ee_info = lo;
+ +      if (!success)
+ +              serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
+ +
+ +      q = &sk->sk_error_queue;
+ +      spin_lock_irqsave(&q->lock, flags);
+ +      tail = skb_peek_tail(q);
+ +      if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY ||
+ +          !skb_zerocopy_notify_extend(tail, lo, len)) {
+ +              __skb_queue_tail(q, skb);
+ +              skb = NULL;
+ +      }
+ +      spin_unlock_irqrestore(&q->lock, flags);
+ +
+ +      sk->sk_error_report(sk);
+ +
+ +release:
+ +      consume_skb(skb);
+ +      sock_put(sk);
+ +}
+ +EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
+ +
+ +void sock_zerocopy_put(struct ubuf_info *uarg)
+ +{
+ +      if (uarg && atomic_dec_and_test(&uarg->refcnt)) {
+ +              if (uarg->callback)
+ +                      uarg->callback(uarg, uarg->zerocopy);
+ +              else
+ +                      consume_skb(skb_from_uarg(uarg));
+ +      }
+ +}
+ +EXPORT_SYMBOL_GPL(sock_zerocopy_put);
+ +
+ +void sock_zerocopy_put_abort(struct ubuf_info *uarg)
+ +{
+ +      if (uarg) {
+ +              struct sock *sk = skb_from_uarg(uarg)->sk;
+ +
+ +              atomic_dec(&sk->sk_zckey);
+ +              uarg->len--;
+ +
+ +              /* sock_zerocopy_put expects a ref. Most sockets take one per
+ +               * skb, which is zero on abort. tcp_sendmsg holds one extra, to
+ +               * avoid an skb send inside the main loop triggering uarg free.
+ +               */
+ +              if (sk->sk_type != SOCK_STREAM)
+ +                      atomic_inc(&uarg->refcnt);
+ +
+ +              sock_zerocopy_put(uarg);
+ +      }
+ +}
+ +EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
+ +
+ +extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+ +                                 struct iov_iter *from, size_t length);
+ +
+ +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+ +                           struct msghdr *msg, int len,
+ +                           struct ubuf_info *uarg)
+ +{
+ +      struct ubuf_info *orig_uarg = skb_zcopy(skb);
+ +      struct iov_iter orig_iter = msg->msg_iter;
+ +      int err, orig_len = skb->len;
+ +
+ +      /* An skb can only point to one uarg. This edge case happens when
+ +       * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
+ +       */
+ +      if (orig_uarg && uarg != orig_uarg)
+ +              return -EEXIST;
+ +
+ +      err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
+ +      if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
+ +              /* Streams do not free skb on error. Reset to prev state. */
+ +              msg->msg_iter = orig_iter;
+ +              ___pskb_trim(skb, orig_len);
+ +              return err;
+ +      }
+ +
+ +      skb_zcopy_set(skb, uarg);
+ +      return skb->len - orig_len;
+ +}
+ +EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
+ +
+ +static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
+ +                            gfp_t gfp_mask)
+ +{
+ +      if (skb_zcopy(orig)) {
+ +              if (skb_zcopy(nskb)) {
+ +                      /* !gfp_mask callers are verified to !skb_zcopy(nskb) */
+ +                      if (!gfp_mask) {
+ +                              WARN_ON_ONCE(1);
+ +                              return -ENOMEM;
+ +                      }
+ +                      if (skb_uarg(nskb) == skb_uarg(orig))
+ +                              return 0;
+ +                      if (skb_copy_ubufs(nskb, GFP_ATOMIC))
+ +                              return -EIO;
+ +              }
+ +              skb_zcopy_set(nskb, skb_uarg(orig));
+ +      }
+ +      return 0;
+ +}
+ +
   /**
    *    skb_copy_ubufs  -       copy userspace skb frags buffers to kernel
    *    @skb: the skb to modify
@@@ -1181,19 -958,15 +1181,19 @@@
    */
   int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
   {
- -      int i;
         int num_frags = skb_shinfo(skb)->nr_frags;
         struct page *page, *head = NULL;
- -      struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
+ +      int i, new_frags;
+ +      u32 d_off;
   
- -      for (i = 0; i < num_frags; i++) {
- -              u8 *vaddr;
- -              skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+ +      if (!num_frags)
+ +              return 0;
+ +
+ +      if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
+ +              return -EINVAL;
   
+ +      new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ +      for (i = 0; i < new_frags; i++) {
                 page = alloc_page(gfp_mask);
                 if (!page) {
                         while (head) {
@@@ -1203,51 -976,28 +1203,51 @@@
                         }
                         return -ENOMEM;
                 }
- -              vaddr = kmap_atomic(skb_frag_page(f));
- -              memcpy(page_address(page),
- -                     vaddr + f->page_offset, skb_frag_size(f));
- -              kunmap_atomic(vaddr);
                 set_page_private(page, (unsigned long)head);
                 head = page;
         }
   
+ +      page = head;
+ +      d_off = 0;
+ +      for (i = 0; i < num_frags; i++) {
+ +              skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+ +              u32 p_off, p_len, copied;
+ +              struct page *p;
+ +              u8 *vaddr;
+ +
+ +              skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
+ +                                    p, p_off, p_len, copied) {
+ +                      u32 copy, done = 0;
+ +                      vaddr = kmap_atomic(p);
+ +
+ +                      while (done < p_len) {
+ +                              if (d_off == PAGE_SIZE) {
+ +                                      d_off = 0;
+ +                                      page = (struct page *)page_private(page);
+ +                              }
+ +                              copy = min_t(u32, PAGE_SIZE - d_off, p_len - done);
+ +                              memcpy(page_address(page) + d_off,
+ +                                     vaddr + p_off + done, copy);
+ +                              done += copy;
+ +                              d_off += copy;
+ +                      }
+ +                      kunmap_atomic(vaddr);
+ +              }
+ +      }
+ +
         /* skb frags release userspace buffers */
         for (i = 0; i < num_frags; i++)
                 skb_frag_unref(skb, i);
   
- -      uarg->callback(uarg, false);
- -
         /* skb frags point to kernel buffers */
- -      for (i = num_frags - 1; i >= 0; i--) {
- -              __skb_fill_page_desc(skb, i, head, 0,
- -                                   skb_shinfo(skb)->frags[i].size);
+ +      for (i = 0; i < new_frags - 1; i++) {
+ +              __skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
                 head = (struct page *)page_private(head);
         }
+ +      __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
+ +      skb_shinfo(skb)->nr_frags = new_frags;
   
- -      skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+ +      skb_zcopy_clear(skb, false);
         return 0;
   }
   EXPORT_SYMBOL_GPL(skb_copy_ubufs);
@@@ -1408,8 -1158,7 +1408,8 @@@ struct sk_buff *__pskb_copy_fclone(stru
         if (skb_shinfo(skb)->nr_frags) {
                 int i;
   
- -              if (skb_orphan_frags(skb, gfp_mask)) {
+ +              if (skb_orphan_frags(skb, gfp_mask) ||
+ +                  skb_zerocopy_clone(n, skb, gfp_mask)) {
                         kfree_skb(n);
                         n = NULL;
                         goto out;
@@@ -1486,10 -1235,9 +1486,10 @@@ int pskb_expand_head(struct sk_buff *sk
          * be since all we did is relocate the values
          */
         if (skb_cloned(skb)) {
- -              /* copy this zero copy skb frags */
                 if (skb_orphan_frags(skb, gfp_mask))
                         goto nofrags;
+ +              if (skb_zcopy(skb))
+ +                      atomic_inc(&skb_uarg(skb)->refcnt);
                 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
                         skb_frag_ref(skb, i);
   
@@@ -1615,18 -1363,20 +1615,20 @@@ struct sk_buff *skb_copy_expand(const s
   EXPORT_SYMBOL(skb_copy_expand);
   
   /**
-  *    skb_pad                 -       zero pad the tail of an skb
+  *    __skb_pad               -       zero pad the tail of an skb
    *    @skb: buffer to pad
    *    @pad: space to pad
+  *    @free_on_error: free buffer on error
    *
    *    Ensure that a buffer is followed by a padding area that is zero
    *    filled. Used by network drivers which may DMA or transfer data
    *    beyond the buffer end onto the wire.
    *
-  *    May return error in out of memory cases. The skb is freed on error.
+  *    May return error in out of memory cases. The skb is freed on error
+  *    if @free_on_error is true.
    */
   
- int skb_pad(struct sk_buff *skb, int pad)
+ int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error)
   {
         int err;
         int ntail;
@@@ -1655,10 -1405,11 +1657,11 @@@
         return 0;
   
   free_skb:
-       kfree_skb(skb);
+       if (free_on_error)
+               kfree_skb(skb);
         return err;
   }
- EXPORT_SYMBOL(skb_pad);
+ EXPORT_SYMBOL(__skb_pad);
   
   /**
    *    pskb_put - add data to the tail of a potentially fragmented buffer
@@@ -1971,8 -1722,6 +1974,8 @@@ pull_pages
                         if (eat) {
                                 skb_shinfo(skb)->frags[k].page_offset += eat;
                                 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+ +                              if (!i)
+ +                                      goto end;
                                 eat = 0;
                         }
                         k++;
@@@ -1980,13 -1729,9 +1983,13 @@@
         }
         skb_shinfo(skb)->nr_frags = k;
   
+ +end:
         skb->tail     += delta;
         skb->data_len -= delta;
   
+ +      if (!skb->data_len)
+ +              skb_zcopy_clear(skb, false);
+ +
         return skb_tail_pointer(skb);
   }
   EXPORT_SYMBOL(__pskb_pull_tail);
@@@ -2034,20 -1779,16 +2037,20 @@@ int skb_copy_bits(const struct sk_buff 
   
                 end = start + skb_frag_size(f);
                 if ((copy = end - offset) > 0) {
+ +                      u32 p_off, p_len, copied;
+ +                      struct page *p;
                         u8 *vaddr;
   
                         if (copy > len)
                                 copy = len;
   
- -                      vaddr = kmap_atomic(skb_frag_page(f));
- -                      memcpy(to,
- -                             vaddr + f->page_offset + offset - start,
- -                             copy);
- -                      kunmap_atomic(vaddr);
+ +                      skb_frag_foreach_page(f,
+ +                                            f->page_offset + offset - start,
+ +                                            copy, p, p_off, p_len, copied) {
+ +                              vaddr = kmap_atomic(p);
+ +                              memcpy(to + copied, vaddr + p_off, p_len);
+ +                              kunmap_atomic(vaddr);
+ +                      }
   
                         if ((len -= copy) == 0)
                                 return 0;
@@@ -2267,107 -2008,6 +2270,107 @@@ int skb_splice_bits(struct sk_buff *skb
   }
   EXPORT_SYMBOL_GPL(skb_splice_bits);
   
+ +/* Send skb data on a socket. Socket must be locked. */
+ +int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+ +                       int len)
+ +{
+ +      unsigned int orig_len = len;
+ +      struct sk_buff *head = skb;
+ +      unsigned short fragidx;
+ +      int slen, ret;
+ +
+ +do_frag_list:
+ +
+ +      /* Deal with head data */
+ +      while (offset < skb_headlen(skb) && len) {
+ +              struct kvec kv;
+ +              struct msghdr msg;
+ +
+ +              slen = min_t(int, len, skb_headlen(skb) - offset);
+ +              kv.iov_base = skb->data + offset;
+ +              kv.iov_len = slen;
+ +              memset(&msg, 0, sizeof(msg));
+ +
+ +              ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
+ +              if (ret <= 0)
+ +                      goto error;
+ +
+ +              offset += ret;
+ +              len -= ret;
+ +      }
+ +
+ +      /* All the data was skb head? */
+ +      if (!len)
+ +              goto out;
+ +
+ +      /* Make offset relative to start of frags */
+ +      offset -= skb_headlen(skb);
+ +
+ +      /* Find where we are in frag list */
+ +      for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+ +              skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+ +
+ +              if (offset < frag->size)
+ +                      break;
+ +
+ +              offset -= frag->size;
+ +      }
+ +
+ +      for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+ +              skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+ +
+ +              slen = min_t(size_t, len, frag->size - offset);
+ +
+ +              while (slen) {
+ +                      ret = kernel_sendpage_locked(sk, frag->page.p,
+ +                                                   frag->page_offset + offset,
+ +                                                   slen, MSG_DONTWAIT);
+ +                      if (ret <= 0)
+ +                              goto error;
+ +
+ +                      len -= ret;
+ +                      offset += ret;
+ +                      slen -= ret;
+ +              }
+ +
+ +              offset = 0;
+ +      }
+ +
+ +      if (len) {
+ +              /* Process any frag lists */
+ +
+ +              if (skb == head) {
+ +                      if (skb_has_frag_list(skb)) {
+ +                              skb = skb_shinfo(skb)->frag_list;
+ +                              goto do_frag_list;
+ +                      }
+ +              } else if (skb->next) {
+ +                      skb = skb->next;
+ +                      goto do_frag_list;
+ +              }
+ +      }
+ +
+ +out:
+ +      return orig_len - len;
+ +
+ +error:
+ +      return orig_len == len ? ret : orig_len - len;
+ +}
+ +EXPORT_SYMBOL_GPL(skb_send_sock_locked);
+ +
+ +/* Send skb data on a socket. */
+ +int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
+ +{
+ +      int ret = 0;
+ +
+ +      lock_sock(sk);
+ +      ret = skb_send_sock_locked(sk, skb, offset, len);
+ +      release_sock(sk);
+ +
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL_GPL(skb_send_sock);
+ +
   /**
    *    skb_store_bits - store bits from kernel buffer to skb
    *    @skb: destination buffer
@@@ -2407,20 -2047,15 +2410,20 @@@ int skb_store_bits(struct sk_buff *skb
   
                 end = start + skb_frag_size(frag);
                 if ((copy = end - offset) > 0) {
+ +                      u32 p_off, p_len, copied;
+ +                      struct page *p;
                         u8 *vaddr;
   
                         if (copy > len)
                                 copy = len;
   
- -                      vaddr = kmap_atomic(skb_frag_page(frag));
- -                      memcpy(vaddr + frag->page_offset + offset - start,
- -                             from, copy);
- -                      kunmap_atomic(vaddr);
+ +                      skb_frag_foreach_page(frag,
+ +                                            frag->page_offset + offset - start,
+ +                                            copy, p, p_off, p_len, copied) {
+ +                              vaddr = kmap_atomic(p);
+ +                              memcpy(vaddr + p_off, from + copied, p_len);
+ +                              kunmap_atomic(vaddr);
+ +                      }
   
                         if ((len -= copy) == 0)
                                 return 0;
@@@ -2485,27 -2120,20 +2488,27 @@@ __wsum __skb_checksum(const struct sk_b
   
                 end = start + skb_frag_size(frag);
                 if ((copy = end - offset) > 0) {
+ +                      u32 p_off, p_len, copied;
+ +                      struct page *p;
                         __wsum csum2;
                         u8 *vaddr;
   
                         if (copy > len)
                                 copy = len;
- -                      vaddr = kmap_atomic(skb_frag_page(frag));
- -                      csum2 = ops->update(vaddr + frag->page_offset +
- -                                          offset - start, copy, 0);
- -                      kunmap_atomic(vaddr);
- -                      csum = ops->combine(csum, csum2, pos, copy);
+ +
+ +                      skb_frag_foreach_page(frag,
+ +                                            frag->page_offset + offset - start,
+ +                                            copy, p, p_off, p_len, copied) {
+ +                              vaddr = kmap_atomic(p);
+ +                              csum2 = ops->update(vaddr + p_off, p_len, 0);
+ +                              kunmap_atomic(vaddr);
+ +                              csum = ops->combine(csum, csum2, pos, p_len);
+ +                              pos += p_len;
+ +                      }
+ +
                         if (!(len -= copy))
                                 return csum;
                         offset += copy;
- -                      pos    += copy;
                 }
                 start = end;
         }
@@@ -2578,31 -2206,24 +2581,31 @@@ __wsum skb_copy_and_csum_bits(const str
   
                 end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
                 if ((copy = end - offset) > 0) {
+ +                      skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ +                      u32 p_off, p_len, copied;
+ +                      struct page *p;
                         __wsum csum2;
                         u8 *vaddr;
- -                      skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
   
                         if (copy > len)
                                 copy = len;
- -                      vaddr = kmap_atomic(skb_frag_page(frag));
- -                      csum2 = csum_partial_copy_nocheck(vaddr +
- -                                                        frag->page_offset +
- -                                                        offset - start, to,
- -                                                        copy, 0);
- -                      kunmap_atomic(vaddr);
- -                      csum = csum_block_add(csum, csum2, pos);
+ +
+ +                      skb_frag_foreach_page(frag,
+ +                                            frag->page_offset + offset - start,
+ +                                            copy, p, p_off, p_len, copied) {
+ +                              vaddr = kmap_atomic(p);
+ +                              csum2 = csum_partial_copy_nocheck(vaddr + p_off,
+ +                                                                to + copied,
+ +                                                                p_len, 0);
+ +                              kunmap_atomic(vaddr);
+ +                              csum = csum_block_add(csum, csum2, pos);
+ +                              pos += p_len;
+ +                      }
+ +
                         if (!(len -= copy))
                                 return csum;
                         offset += copy;
                         to     += copy;
- -                      pos    += copy;
                 }
                 start = end;
         }
@@@ -2742,7 -2363,6 +2745,7 @@@ skb_zerocopy(struct sk_buff *to, struc
                 skb_tx_error(from);
                 return -ENOMEM;
         }
+ +      skb_zerocopy_clone(to, from, GFP_ATOMIC);
   
         for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
                 if (!len)
@@@ -3040,7 -2660,6 +3043,7 @@@ void skb_split(struct sk_buff *skb, str
   
         skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags &
                                       SKBTX_SHARED_FRAG;
+ +      skb_zerocopy_clone(skb1, skb, 0);
         if (len < pos)  /* Split line is inside header. */
                 skb_split_inside_header(skb, skb1, len, pos);
         else            /* Second chunk has no header, nothing to copy. */
@@@ -3084,8 -2703,6 +3087,8 @@@ int skb_shift(struct sk_buff *tgt, stru
   
         if (skb_headlen(skb))
                 return 0;
+ +      if (skb_zcopy(tgt) || skb_zcopy(skb))
+ +              return 0;
   
         todo = shiftlen;
         from = 0;
@@@ -3659,8 -3276,6 +3662,8 @@@ normal
   
                 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
                                               SKBTX_SHARED_FRAG;
+ +              if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
+ +                      goto err;
   
                 while (pos < offset + len) {
                         if (i >= nfrags) {
@@@ -4784,8 -4399,6 +4787,8 @@@ bool skb_try_coalesce(struct sk_buff *t
   
         if (skb_has_frag_list(to) || skb_has_frag_list(from))
                 return false;
+ +      if (skb_zcopy(to) || skb_zcopy(from))
+ +              return false;
   
         if (skb_headlen(from) != 0) {
                 struct page *page;
diff --combined net/dsa/dsa2.c

index cceaa4dd9f53c30c5a3d9d647379a24358fca669,20bc9c56fca05c230477b15d6dad15e02b488800..873af0108e243fc269f591ec7d7c59a9c8b374fc
--- 1/net/dsa/dsa2.c
--- 2/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@@ -219,7 -219,7 +219,7 @@@ static int dsa_dsa_port_apply(struct ds
         struct dsa_switch *ds = port->ds;
         int err;
   
- -      err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
+ +      err = dsa_cpu_dsa_setup(port);
         if (err) {
                 dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
                          port->index, err);
@@@ -243,7 -243,7 +243,7 @@@ static int dsa_cpu_port_apply(struct ds
         struct dsa_switch *ds = port->ds;
         int err;
   
- -      err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
+ +      err = dsa_cpu_dsa_setup(port);
         if (err) {
                 dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
                          port->index, err);
@@@ -275,7 -275,7 +275,7 @@@ static int dsa_user_port_apply(struct d
         if (!name)
                 name = "eth%d";
   
- -      err = dsa_slave_create(ds, ds->dev, port->index, name);
+ +      err = dsa_slave_create(port, name);
         if (err) {
                 dev_warn(ds->dev, "Failed to create slave %d: %d\n",
                          port->index, err);
@@@ -577,7 -577,7 +577,7 @@@ static int dsa_dst_parse(struct dsa_swi
                         return err;
         }
   
-       if (!dst->cpu_dp->netdev) {
+       if (!dst->cpu_dp) {
                 pr_warn("Tree has no master device\n");
                 return -EINVAL;
         }
diff --combined net/dsa/tag_ksz.c

index 17f30675c15cee18642642334120b3f03eb09183,fcd90f79458e20fefd76661fe7bc7e07d42ed1a3..010ca0a336c46a34f6a89d8c6975ca4b00642e6e
--- 1/net/dsa/tag_ksz.c
--- 2/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@@ -42,7 -42,8 +42,8 @@@ static struct sk_buff *ksz_xmit(struct 
         padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
   
         if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
-               if (skb_put_padto(skb, skb->len + padlen))
+               /* Let dsa_slave_xmit() free skb */
+               if (__skb_put_padto(skb, skb->len + padlen, false))
                         return NULL;
   
                 nskb = skb;
@@@ -60,12 -61,13 +61,13 @@@
                                          skb_transport_header(skb) - skb->head);
                 skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
   
-               if (skb_put_padto(nskb, nskb->len + padlen)) {
-                       kfree_skb(nskb);
+               /* Let skb_put_padto() free nskb, and let dsa_slave_xmit() free
+                * skb
+                */
+               if (skb_put_padto(nskb, nskb->len + padlen))
                         return NULL;
-               }
   
-               kfree_skb(skb);
+               consume_skb(skb);
         }
   
         tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
@@@ -76,7 -78,8 +78,7 @@@
   }
   
   static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
- -                             struct packet_type *pt,
- -                             struct net_device *orig_dev)
+ +                             struct packet_type *pt)
   {
         struct dsa_switch_tree *dst = dev->dsa_ptr;
         struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
diff --combined net/dsa/tag_trailer.c

index 8707157dea32ee88b77a306f9a5c3b4342bb0c1e,9c7b1d74a5c6cc6a80e51a259ef6645060390d31..d2fd4923aa3eb3d1d56f20dd159d0ca87408b835
--- 1/net/dsa/tag_trailer.c
--- 2/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@@ -40,7 -40,7 +40,7 @@@ static struct sk_buff *trailer_xmit(str
         skb_set_network_header(nskb, skb_network_header(skb) - skb->head);
         skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head);
         skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
-       kfree_skb(skb);
+       consume_skb(skb);
   
         if (padlen) {
                 skb_put_zero(nskb, padlen);
@@@ -56,7 -56,8 +56,7 @@@
   }
   
   static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
- -                                 struct packet_type *pt,
- -                                 struct net_device *orig_dev)
+ +                                 struct packet_type *pt)
   {
         struct dsa_switch_tree *dst = dev->dsa_ptr;
         struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
diff --combined net/ipv4/esp4.c

index 319000573bc7a6dc12966289c08f08521dead451,df68963dc90ada0ec19f8997d920f6faf3186e05..b00e4a43b4dc8538e016f8d1c23707a1f49060a8
--- 1/net/ipv4/esp4.c
--- 2/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@@ -258,7 -258,7 +258,7 @@@ int esp_output_head(struct xfrm_state *
                 esp_output_udp_encap(x, skb, esp);
   
         if (!skb_cloned(skb)) {
-               if (tailen <= skb_availroom(skb)) {
+               if (tailen <= skb_tailroom(skb)) {
                         nfrags = 1;
                         trailer = skb;
                         tail = skb_tail_pointer(trailer);
@@@ -292,8 -292,6 +292,6 @@@
   
                         kunmap_atomic(vaddr);
   
-                       spin_unlock_bh(&x->lock);
- 
                         nfrags = skb_shinfo(skb)->nr_frags;
   
                         __skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@@ -301,6 -299,9 +299,9 @@@
                         skb_shinfo(skb)->nr_frags = ++nfrags;
   
                         pfrag->offset = pfrag->offset + allocsize;
+ 
+                       spin_unlock_bh(&x->lock);
+ 
                         nfrags++;
   
                         skb->len += tailen;
@@@ -381,7 -382,7 +382,7 @@@ int esp_output_tail(struct xfrm_state *
                            (unsigned char *)esph - skb->data,
                            assoclen + ivlen + esp->clen + alen);
         if (unlikely(err < 0))
-               goto error;
+               goto error_free;
   
         if (!esp->inplace) {
                 int allocsize;
@@@ -392,7 -393,7 +393,7 @@@
                 spin_lock_bh(&x->lock);
                 if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
                         spin_unlock_bh(&x->lock);
-                       goto error;
+                       goto error_free;
                 }
   
                 skb_shinfo(skb)->nr_frags = 1;
@@@ -409,7 -410,7 +410,7 @@@
                                    (unsigned char *)esph - skb->data,
                                    assoclen + ivlen + esp->clen + alen);
                 if (unlikely(err < 0))
-                       goto error;
+                       goto error_free;
         }
   
         if ((x->props.flags & XFRM_STATE_ESN))
@@@ -442,8 -443,9 +443,9 @@@
   
         if (sg != dsg)
                 esp_ssg_unref(x, tmp);
-       kfree(tmp);
   
+ error_free:
+       kfree(tmp);
   error:
         return err;
   }
@@@ -499,59 -501,18 +501,59 @@@ static int esp_output(struct xfrm_stat
         return esp_output_tail(x, skb, &esp);
   }
   
+ +static inline int esp_remove_trailer(struct sk_buff *skb)
+ +{
+ +      struct xfrm_state *x = xfrm_input_state(skb);
+ +      struct xfrm_offload *xo = xfrm_offload(skb);
+ +      struct crypto_aead *aead = x->data;
+ +      int alen, hlen, elen;
+ +      int padlen, trimlen;
+ +      __wsum csumdiff;
+ +      u8 nexthdr[2];
+ +      int ret;
+ +
+ +      alen = crypto_aead_authsize(aead);
+ +      hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+ +      elen = skb->len - hlen;
+ +
+ +      if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+ +              ret = xo->proto;
+ +              goto out;
+ +      }
+ +
+ +      if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
+ +              BUG();
+ +
+ +      ret = -EINVAL;
+ +      padlen = nexthdr[0];
+ +      if (padlen + 2 + alen >= elen) {
+ +              net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
+ +                                  padlen + 2, elen - alen);
+ +              goto out;
+ +      }
+ +
+ +      trimlen = alen + padlen + 2;
+ +      if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ +              csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
+ +              skb->csum = csum_block_sub(skb->csum, csumdiff,
+ +                                         skb->len - trimlen);
+ +      }
+ +      pskb_trim(skb, skb->len - trimlen);
+ +
+ +      ret = nexthdr[1];
+ +
+ +out:
+ +      return ret;
+ +}
+ +
   int esp_input_done2(struct sk_buff *skb, int err)
   {
         const struct iphdr *iph;
         struct xfrm_state *x = xfrm_input_state(skb);
         struct xfrm_offload *xo = xfrm_offload(skb);
         struct crypto_aead *aead = x->data;
- -      int alen = crypto_aead_authsize(aead);
         int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
- -      int elen = skb->len - hlen;
         int ihl;
- -      u8 nexthdr[2];
- -      int padlen;
   
         if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
                 kfree(ESP_SKB_CB(skb)->tmp);
@@@ -559,10 -520,16 +561,10 @@@
         if (unlikely(err))
                 goto out;
   
- -      if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
- -              BUG();
- -
- -      err = -EINVAL;
- -      padlen = nexthdr[0];
- -      if (padlen + 2 + alen >= elen)
+ +      err = esp_remove_trailer(skb);
+ +      if (unlikely(err < 0))
                 goto out;
   
- -      /* ... check padding bits here. Silly. :-) */
- -
         iph = ip_hdr(skb);
         ihl = iph->ihl * 4;
   
@@@ -603,12 -570,15 +605,12 @@@
                         skb->ip_summed = CHECKSUM_UNNECESSARY;
         }
   
- -      pskb_trim(skb, skb->len - alen - padlen - 2);
- -      __skb_pull(skb, hlen);
+ +      skb_pull_rcsum(skb, hlen);
         if (x->props.mode == XFRM_MODE_TUNNEL)
                 skb_reset_transport_header(skb);
         else
                 skb_set_transport_header(skb, -ihl);
   
- -      err = nexthdr[1];
- -
         /* RFC4303: Drop dummy packets without any error */
         if (err == IPPROTO_NONE)
                 err = -EINVAL;
@@@ -727,8 -697,10 +729,10 @@@ skip_cow
   
         sg_init_table(sg, nfrags);
         err = skb_to_sgvec(skb, sg, 0, skb->len);
-       if (unlikely(err < 0))
+       if (unlikely(err < 0)) {
+               kfree(tmp);
                 goto out;
+       }
   
         skb->ip_summed = CHECKSUM_NONE;
   
diff --combined net/ipv4/esp4_offload.c

index aca1c85f079528d61c6816f430f32df94d805f98,50112324fa5c3638527b12477c356ce406ba9a36..f8b918c766b0af1e572ed895dcf2435af92016a9
--- 1/net/ipv4/esp4_offload.c
--- 2/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@@ -182,13 -182,11 +182,13 @@@ out
   static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
   {
         struct crypto_aead *aead = x->data;
+ +      struct xfrm_offload *xo = xfrm_offload(skb);
   
         if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
                 return -EINVAL;
   
- -      skb->ip_summed = CHECKSUM_NONE;
+ +      if (!(xo->flags & CRYPTO_DONE))
+ +              skb->ip_summed = CHECKSUM_NONE;
   
         return esp_input_done2(skb, 0);
   }
@@@ -259,7 -257,7 +259,7 @@@ static int esp_xmit(struct xfrm_state *
         esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
   
         err = esp_output_tail(x, skb, &esp);
-       if (err < 0)
+       if (err)
                 return err;
   
         secpath_reset(skb);
@@@ -305,4 -303,3 +305,4 @@@ module_init(esp4_offload_init)
   module_exit(esp4_offload_exit);
   MODULE_LICENSE("GPL");
   MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+ +MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP);
diff --combined net/ipv4/tcp.c

index 21ca2df274c5130a13d31a391a1408d779af34af,a3e91b552edce4edee0d3b9ee5e07105946d2dd9..7a3d843758363af44c27bf3716ba2f488688fed6
--- 1/net/ipv4/tcp.c
--- 2/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@@ -269,7 -269,6 +269,7 @@@
   #include <linux/err.h>
   #include <linux/time.h>
   #include <linux/slab.h>
+ +#include <linux/errqueue.h>
   
   #include <net/icmp.h>
   #include <net/inet_common.h>
@@@ -389,19 -388,6 +389,19 @@@ static int retrans_to_secs(u8 retrans, 
         return period;
   }
   
+ +static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
+ +{
+ +      u32 rate = READ_ONCE(tp->rate_delivered);
+ +      u32 intv = READ_ONCE(tp->rate_interval_us);
+ +      u64 rate64 = 0;
+ +
+ +      if (rate && intv) {
+ +              rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
+ +              do_div(rate64, intv);
+ +      }
+ +      return rate64;
+ +}
+ +
   /* Address-family independent initialization for a tcp_sock.
    *
    * NOTE: A lot of things set to zero explicitly by call to
@@@ -414,6 -400,7 +414,6 @@@ void tcp_init_sock(struct sock *sk
   
         tp->out_of_order_queue = RB_ROOT;
         tcp_init_xmit_timers(sk);
- -      tcp_prequeue_init(tp);
         INIT_LIST_HEAD(&tp->tsq_node);
   
         icsk->icsk_rto = TCP_TIMEOUT_INIT;
@@@ -1047,29 -1034,23 +1047,29 @@@ out_err
   }
   EXPORT_SYMBOL_GPL(do_tcp_sendpages);
   
- -int tcp_sendpage(struct sock *sk, struct page *page, int offset,
- -               size_t size, int flags)
+ +int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+ +                      size_t size, int flags)
   {
- -      ssize_t res;
- -
         if (!(sk->sk_route_caps & NETIF_F_SG) ||
             !sk_check_csum_caps(sk))
- -              return sock_no_sendpage(sk->sk_socket, page, offset, size,
- -                                      flags);
- -
- -      lock_sock(sk);
+ +              return sock_no_sendpage_locked(sk, page, offset, size, flags);
   
         tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
   
- -      res = do_tcp_sendpages(sk, page, offset, size, flags);
+ +      return do_tcp_sendpages(sk, page, offset, size, flags);
+ +}
+ +EXPORT_SYMBOL_GPL(tcp_sendpage_locked);
+ +
+ +int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+ +               size_t size, int flags)
+ +{
+ +      int ret;
+ +
+ +      lock_sock(sk);
+ +      ret = tcp_sendpage_locked(sk, page, offset, size, flags);
         release_sock(sk);
- -      return res;
+ +
+ +      return ret;
   }
   EXPORT_SYMBOL(tcp_sendpage);
   
@@@ -1163,10 -1144,9 +1163,10 @@@ static int tcp_sendmsg_fastopen(struct 
         return err;
   }
   
- -int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ +int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
   {
         struct tcp_sock *tp = tcp_sk(sk);
+ +      struct ubuf_info *uarg = NULL;
         struct sk_buff *skb;
         struct sockcm_cookie sockc;
         int flags, err, copied = 0;
@@@ -1175,27 -1155,9 +1175,27 @@@
         bool sg;
         long timeo;
   
- -      lock_sock(sk);
- -
         flags = msg->msg_flags;
+ +
+ +      if (flags & MSG_ZEROCOPY && size) {
+ +              if (sk->sk_state != TCP_ESTABLISHED) {
+ +                      err = -EINVAL;
+ +                      goto out_err;
+ +              }
+ +
+ +              skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL;
+ +              uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
+ +              if (!uarg) {
+ +                      err = -ENOBUFS;
+ +                      goto out_err;
+ +              }
+ +
+ +              /* skb may be freed in main loop, keep extra ref on uarg */
+ +              sock_zerocopy_get(uarg);
+ +              if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG))
+ +                      uarg->zerocopy = 0;
+ +      }
+ +
         if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
                 err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
                 if (err == -EINPROGRESS && copied_syn > 0)
@@@ -1319,7 -1281,7 +1319,7 @@@ new_segment
                         err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
                         if (err)
                                 goto do_fault;
- -              } else {
+ +              } else if (!uarg || !uarg->zerocopy) {
                         bool merge = true;
                         int i = skb_shinfo(skb)->nr_frags;
                         struct page_frag *pfrag = sk_page_frag(sk);
@@@ -1357,13 -1319,6 +1357,13 @@@
                                 page_ref_inc(pfrag->page);
                         }
                         pfrag->offset += copy;
+ +              } else {
+ +                      err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
+ +                      if (err == -EMSGSIZE || err == -EEXIST)
+ +                              goto new_segment;
+ +                      if (err < 0)
+ +                              goto do_error;
+ +                      copy = err;
                 }
   
                 if (!copied)
@@@ -1410,7 -1365,7 +1410,7 @@@ out
                 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
         }
   out_nopush:
- -      release_sock(sk);
+ +      sock_zerocopy_put(uarg);
         return copied + copied_syn;
   
   do_fault:
@@@ -1427,7 -1382,6 +1427,7 @@@ do_error
         if (copied + copied_syn)
                 goto out;
   out_err:
+ +      sock_zerocopy_put_abort(uarg);
         err = sk_stream_error(sk, flags, err);
         /* make sure we wake any epoll edge trigger waiter */
         if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
@@@ -1435,20 -1389,9 +1435,20 @@@
                 sk->sk_write_space(sk);
                 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
         }
- -      release_sock(sk);
         return err;
   }
+ +EXPORT_SYMBOL_GPL(tcp_sendmsg_locked);
+ +
+ +int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ +{
+ +      int ret;
+ +
+ +      lock_sock(sk);
+ +      ret = tcp_sendmsg_locked(sk, msg, size);
+ +      release_sock(sk);
+ +
+ +      return ret;
+ +}
   EXPORT_SYMBOL(tcp_sendmsg);
   
   /*
@@@ -1582,6 -1525,20 +1582,6 @@@ static void tcp_cleanup_rbuf(struct soc
                 tcp_send_ack(sk);
   }
   
- -static void tcp_prequeue_process(struct sock *sk)
- -{
- -      struct sk_buff *skb;
- -      struct tcp_sock *tp = tcp_sk(sk);
- -
- -      NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
- -
- -      while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
- -              sk_backlog_rcv(sk, skb);
- -
- -      /* Clear memory counter. */
- -      tp->ucopy.memory = 0;
- -}
- -
   static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
   {
         struct sk_buff *skb;
@@@ -1695,61 -1652,6 +1695,61 @@@ int tcp_peek_len(struct socket *sock
   }
   EXPORT_SYMBOL(tcp_peek_len);
   
+ +static void tcp_update_recv_tstamps(struct sk_buff *skb,
+ +                                  struct scm_timestamping *tss)
+ +{
+ +      if (skb->tstamp)
+ +              tss->ts[0] = ktime_to_timespec(skb->tstamp);
+ +      else
+ +              tss->ts[0] = (struct timespec) {0};
+ +
+ +      if (skb_hwtstamps(skb)->hwtstamp)
+ +              tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp);
+ +      else
+ +              tss->ts[2] = (struct timespec) {0};
+ +}
+ +
+ +/* Similar to __sock_recv_timestamp, but does not require an skb */
+ +void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+ +                      struct scm_timestamping *tss)
+ +{
+ +      struct timeval tv;
+ +      bool has_timestamping = false;
+ +
+ +      if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
+ +              if (sock_flag(sk, SOCK_RCVTSTAMP)) {
+ +                      if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+ +                              put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
+ +                                       sizeof(tss->ts[0]), &tss->ts[0]);
+ +                      } else {
+ +                              tv.tv_sec = tss->ts[0].tv_sec;
+ +                              tv.tv_usec = tss->ts[0].tv_nsec / 1000;
+ +
+ +                              put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
+ +                                       sizeof(tv), &tv);
+ +                      }
+ +              }
+ +
+ +              if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+ +                      has_timestamping = true;
+ +              else
+ +                      tss->ts[0] = (struct timespec) {0};
+ +      }
+ +
+ +      if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
+ +              if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+ +                      has_timestamping = true;
+ +              else
+ +                      tss->ts[2] = (struct timespec) {0};
+ +      }
+ +
+ +      if (has_timestamping) {
+ +              tss->ts[1] = (struct timespec) {0};
+ +              put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING,
+ +                       sizeof(*tss), tss);
+ +      }
+ +}
+ +
   /*
    *    This routine copies from a sock struct into the user buffer.
    *
@@@ -1769,10 -1671,9 +1769,10 @@@ int tcp_recvmsg(struct sock *sk, struc
         int err;
         int target;             /* Read at least this many bytes */
         long timeo;
- -      struct task_struct *user_recv = NULL;
         struct sk_buff *skb, *last;
         u32 urg_hole = 0;
+ +      struct scm_timestamping tss;
+ +      bool has_tss = false;
   
         if (unlikely(flags & MSG_ERRQUEUE))
                 return inet_recv_error(sk, msg, len, addr_len);
@@@ -1905,6 -1806,51 +1905,6 @@@
   
                 tcp_cleanup_rbuf(sk, copied);
   
- -              if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
- -                      /* Install new reader */
- -                      if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
- -                              user_recv = current;
- -                              tp->ucopy.task = user_recv;
- -                              tp->ucopy.msg = msg;
- -                      }
- -
- -                      tp->ucopy.len = len;
- -
- -                      WARN_ON(tp->copied_seq != tp->rcv_nxt &&
- -                              !(flags & (MSG_PEEK | MSG_TRUNC)));
- -
- -                      /* Ugly... If prequeue is not empty, we have to
- -                       * process it before releasing socket, otherwise
- -                       * order will be broken at second iteration.
- -                       * More elegant solution is required!!!
- -                       *
- -                       * Look: we have the following (pseudo)queues:
- -                       *
- -                       * 1. packets in flight
- -                       * 2. backlog
- -                       * 3. prequeue
- -                       * 4. receive_queue
- -                       *
- -                       * Each queue can be processed only if the next ones
- -                       * are empty. At this point we have empty receive_queue.
- -                       * But prequeue _can_ be not empty after 2nd iteration,
- -                       * when we jumped to start of loop because backlog
- -                       * processing added something to receive_queue.
- -                       * We cannot release_sock(), because backlog contains
- -                       * packets arrived _after_ prequeued ones.
- -                       *
- -                       * Shortly, algorithm is clear --- to process all
- -                       * the queues in order. We could make it more directly,
- -                       * requeueing packets from backlog to prequeue, if
- -                       * is not empty. It is more elegant, but eats cycles,
- -                       * unfortunately.
- -                       */
- -                      if (!skb_queue_empty(&tp->ucopy.prequeue))
- -                              goto do_prequeue;
- -
- -                      /* __ Set realtime policy in scheduler __ */
- -              }
- -
                 if (copied >= target) {
                         /* Do not sleep, just process backlog. */
                         release_sock(sk);
@@@ -1913,6 -1859,31 +1913,6 @@@
                         sk_wait_data(sk, &timeo, last);
                 }
   
- -              if (user_recv) {
- -                      int chunk;
- -
- -                      /* __ Restore normal policy in scheduler __ */
- -
- -                      chunk = len - tp->ucopy.len;
- -                      if (chunk != 0) {
- -                              NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
- -                              len -= chunk;
- -                              copied += chunk;
- -                      }
- -
- -                      if (tp->rcv_nxt == tp->copied_seq &&
- -                          !skb_queue_empty(&tp->ucopy.prequeue)) {
- -do_prequeue:
- -                              tcp_prequeue_process(sk);
- -
- -                              chunk = len - tp->ucopy.len;
- -                              if (chunk != 0) {
- -                                      NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
- -                                      len -= chunk;
- -                                      copied += chunk;
- -                              }
- -                      }
- -              }
                 if ((flags & MSG_PEEK) &&
                     (peek_seq - copied - urg_hole != tp->copied_seq)) {
                         net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
@@@ -1970,10 -1941,6 +1970,10 @@@ skip_copy
                 if (used + offset < skb->len)
                         continue;
   
+ +              if (TCP_SKB_CB(skb)->has_rxtstamp) {
+ +                      tcp_update_recv_tstamps(skb, &tss);
+ +                      has_tss = true;
+ +              }
                 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
                         goto found_fin_ok;
                 if (!(flags & MSG_PEEK))
@@@ -1988,13 -1955,29 +1988,13 @@@
                 break;
         } while (len > 0);
   
- -      if (user_recv) {
- -              if (!skb_queue_empty(&tp->ucopy.prequeue)) {
- -                      int chunk;
- -
- -                      tp->ucopy.len = copied > 0 ? len : 0;
- -
- -                      tcp_prequeue_process(sk);
- -
- -                      if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
- -                              NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
- -                              len -= chunk;
- -                              copied += chunk;
- -                      }
- -              }
- -
- -              tp->ucopy.task = NULL;
- -              tp->ucopy.len = 0;
- -      }
- -
         /* According to UNIX98, msg_name/msg_namelen are ignored
          * on connected socket. I was just happy when found this 8) --ANK
          */
   
+ +      if (has_tss)
+ +              tcp_recv_timestamp(msg, sk, &tss);
+ +
         /* Clean up data we have read: This will do ACK frames. */
         tcp_cleanup_rbuf(sk, copied);
   
@@@ -2498,7 -2481,7 +2498,7 @@@ static int do_tcp_setsockopt(struct soc
                 name[val] = 0;
   
                 lock_sock(sk);
-               err = tcp_set_congestion_control(sk, name, true);
+               err = tcp_set_congestion_control(sk, name, true, true);
                 release_sock(sk);
                 return err;
         }
@@@ -2840,7 -2823,7 +2840,7 @@@ void tcp_get_info(struct sock *sk, stru
   {
         const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
         const struct inet_connection_sock *icsk = inet_csk(sk);
- -      u32 now, intv;
+ +      u32 now;
         u64 rate64;
         bool slow;
         u32 rate;
@@@ -2939,9 -2922,13 +2939,9 @@@
         info->tcpi_data_segs_out = tp->data_segs_out;
   
         info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
- -      rate = READ_ONCE(tp->rate_delivered);
- -      intv = READ_ONCE(tp->rate_interval_us);
- -      if (rate && intv) {
- -              rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
- -              do_div(rate64, intv);
+ +      rate64 = tcp_compute_delivery_rate(tp);
+ +      if (rate64)
                 info->tcpi_delivery_rate = rate64;
- -      }
         unlock_sock_fast(sk, slow);
   }
   EXPORT_SYMBOL_GPL(tcp_get_info);
@@@ -2951,12 -2938,8 +2951,12 @@@ struct sk_buff *tcp_get_timestamping_op
         const struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *stats;
         struct tcp_info info;
+ +      u64 rate64;
+ +      u32 rate;
   
- -      stats = alloc_skb(5 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
+ +      stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
+ +                        3 * nla_total_size(sizeof(u32)) +
+ +                        2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
         if (!stats)
                 return NULL;
   
@@@ -2971,20 -2954,6 +2971,20 @@@
                           tp->data_segs_out, TCP_NLA_PAD);
         nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS,
                           tp->total_retrans, TCP_NLA_PAD);
+ +
+ +      rate = READ_ONCE(sk->sk_pacing_rate);
+ +      rate64 = rate != ~0U ? rate : ~0ULL;
+ +      nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
+ +
+ +      rate64 = tcp_compute_delivery_rate(tp);
+ +      nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
+ +
+ +      nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
+ +      nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
+ +      nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
+ +
+ +      nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
+ +      nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
         return stats;
   }
   
diff --combined net/ipv4/tcp_cong.c

index c2b1744696459540c989a64e05eb2453cf2e366e,421ea1b918da5bc4a3974531539cd67266f70798..2f26124fd1601ad23662d1fc9152b4370270c112
--- 1/net/ipv4/tcp_cong.c
--- 2/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@@ -189,8 -189,8 +189,8 @@@ void tcp_init_congestion_control(struc
                 INET_ECN_dontxmit(sk);
   }
   
- void tcp_reinit_congestion_control(struct sock *sk,
-                                  const struct tcp_congestion_ops *ca)
+ static void tcp_reinit_congestion_control(struct sock *sk,
+                                         const struct tcp_congestion_ops *ca)
   {
         struct inet_connection_sock *icsk = inet_csk(sk);
   
@@@ -338,7 -338,7 +338,7 @@@ out
    * tcp_reinit_congestion_control (if the current congestion control was
    * already initialized.
    */
- int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
+ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
   {
         struct inet_connection_sock *icsk = inet_csk(sk);
         const struct tcp_congestion_ops *ca;
@@@ -360,9 -360,18 +360,18 @@@
         if (!ca) {
                 err = -ENOENT;
         } else if (!load) {
-               icsk->icsk_ca_ops = ca;
-               if (!try_module_get(ca->owner))
+               const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
+ 
+               if (try_module_get(ca->owner)) {
+                       if (reinit) {
+                               tcp_reinit_congestion_control(sk, ca);
+                       } else {
+                               icsk->icsk_ca_ops = ca;
+                               module_put(old_ca->owner);
+                       }
+               } else {
                         err = -EBUSY;
+               }
         } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
                      ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
                 err = -EPERM;
@@@ -456,7 -465,7 +465,7 @@@ u32 tcp_reno_undo_cwnd(struct sock *sk
   {
         const struct tcp_sock *tp = tcp_sk(sk);
   
- -      return max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+ +      return max(tp->snd_cwnd, tp->prior_cwnd);
   }
   EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
   
diff --combined net/ipv4/udp.c

index bf6c406bf5e71f0200fcfc1439c48e2843fae6c2,62344804baaef96daf405dbdd5418db541b95864..f900cdd0fbfb00e4422da04daee88ebda69b2510
--- 1/net/ipv4/udp.c
--- 2/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@@ -380,8 -380,8 +380,8 @@@ int udp_v4_get_port(struct sock *sk, un
   
   static int compute_score(struct sock *sk, struct net *net,
                          __be32 saddr, __be16 sport,
- -                       __be32 daddr, unsigned short hnum, int dif,
- -                       bool exact_dif)
+ +                       __be32 daddr, unsigned short hnum,
+ +                       int dif, int sdif, bool exact_dif)
   {
         int score;
         struct inet_sock *inet;
@@@ -413,15 -413,10 +413,15 @@@
         }
   
         if (sk->sk_bound_dev_if || exact_dif) {
- -              if (sk->sk_bound_dev_if != dif)
+ +              bool dev_match = (sk->sk_bound_dev_if == dif ||
+ +                                sk->sk_bound_dev_if == sdif);
+ +
+ +              if (exact_dif && !dev_match)
                         return -1;
- -              score += 4;
+ +              if (sk->sk_bound_dev_if && dev_match)
+ +                      score += 4;
         }
+ +
         if (sk->sk_incoming_cpu == raw_smp_processor_id())
                 score++;
         return score;
@@@ -441,11 -436,10 +441,11 @@@ static u32 udp_ehashfn(const struct ne
   
   /* called with rcu_read_lock() */
   static struct sock *udp4_lib_lookup2(struct net *net,
- -              __be32 saddr, __be16 sport,
- -              __be32 daddr, unsigned int hnum, int dif, bool exact_dif,
- -              struct udp_hslot *hslot2,
- -              struct sk_buff *skb)
+ +                                   __be32 saddr, __be16 sport,
+ +                                   __be32 daddr, unsigned int hnum,
+ +                                   int dif, int sdif, bool exact_dif,
+ +                                   struct udp_hslot *hslot2,
+ +                                   struct sk_buff *skb)
   {
         struct sock *sk, *result;
         int score, badness, matches = 0, reuseport = 0;
@@@ -455,7 -449,7 +455,7 @@@
         badness = 0;
         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
                 score = compute_score(sk, net, saddr, sport,
- -                                    daddr, hnum, dif, exact_dif);
+ +                                    daddr, hnum, dif, sdif, exact_dif);
                 if (score > badness) {
                         reuseport = sk->sk_reuseport;
                         if (reuseport) {
@@@ -483,8 -477,8 +483,8 @@@
    * harder than this. -DaveM
    */
   struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
- -              __be16 sport, __be32 daddr, __be16 dport,
- -              int dif, struct udp_table *udptable, struct sk_buff *skb)
+ +              __be16 sport, __be32 daddr, __be16 dport, int dif,
+ +              int sdif, struct udp_table *udptable, struct sk_buff *skb)
   {
         struct sock *sk, *result;
         unsigned short hnum = ntohs(dport);
@@@ -502,7 -496,7 +502,7 @@@
                         goto begin;
   
                 result = udp4_lib_lookup2(net, saddr, sport,
- -                                        daddr, hnum, dif,
+ +                                        daddr, hnum, dif, sdif,
                                           exact_dif, hslot2, skb);
                 if (!result) {
                         unsigned int old_slot2 = slot2;
@@@ -517,7 -511,7 +517,7 @@@
                                 goto begin;
   
                         result = udp4_lib_lookup2(net, saddr, sport,
- -                                                daddr, hnum, dif,
+ +                                                daddr, hnum, dif, sdif,
                                                   exact_dif, hslot2, skb);
                 }
                 return result;
@@@ -527,7 -521,7 +527,7 @@@ begin
         badness = 0;
         sk_for_each_rcu(sk, &hslot->head) {
                 score = compute_score(sk, net, saddr, sport,
- -                                    daddr, hnum, dif, exact_dif);
+ +                                    daddr, hnum, dif, sdif, exact_dif);
                 if (score > badness) {
                         reuseport = sk->sk_reuseport;
                         if (reuseport) {
@@@ -560,7 -554,7 +560,7 @@@ static inline struct sock *__udp4_lib_l
   
         return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
                                  iph->daddr, dport, inet_iif(skb),
- -                               udptable, skb);
+ +                               inet_sdif(skb), udptable, skb);
   }
   
   struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
@@@ -582,7 -576,7 +582,7 @@@ struct sock *udp4_lib_lookup(struct ne
         struct sock *sk;
   
         sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
- -                             dif, &udp_table, NULL);
+ +                             dif, 0, &udp_table, NULL);
         if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
                 sk = NULL;
         return sk;
@@@ -593,7 -587,7 +593,7 @@@ EXPORT_SYMBOL_GPL(udp4_lib_lookup)
   static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
                                        __be16 loc_port, __be32 loc_addr,
                                        __be16 rmt_port, __be32 rmt_addr,
- -                                     int dif, unsigned short hnum)
+ +                                     int dif, int sdif, unsigned short hnum)
   {
         struct inet_sock *inet = inet_sk(sk);
   
@@@ -603,10 -597,9 +603,10 @@@
             (inet->inet_dport != rmt_port && inet->inet_dport) ||
             (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
             ipv6_only_sock(sk) ||
- -          (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ +          (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ +           sk->sk_bound_dev_if != sdif))
                 return false;
- -      if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif))
+ +      if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
                 return false;
         return true;
   }
@@@ -635,8 -628,8 +635,8 @@@ void __udp4_lib_err(struct sk_buff *skb
         struct net *net = dev_net(skb->dev);
   
         sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
- -                      iph->saddr, uh->source, skb->dev->ifindex, udptable,
- -                      NULL);
+ +                             iph->saddr, uh->source, skb->dev->ifindex, 0,
+ +                             udptable, NULL);
         if (!sk) {
                 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
                 return; /* No socket for error */
@@@ -809,7 -802,7 +809,7 @@@ static int udp_send_skb(struct sk_buff 
         if (is_udplite)                                  /*     UDP-Lite      */
                 csum = udplite_csum(skb);
   
- -      else if (sk->sk_no_check_tx && !skb_is_gso(skb)) {   /* UDP csum off */
+ +      else if (sk->sk_no_check_tx) {                   /* UDP csum off */
   
                 skb->ip_summed = CHECKSUM_NONE;
                 goto send;
@@@ -1183,11 -1176,7 +1183,11 @@@ static void udp_set_dev_scratch(struct 
         scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
         scratch->is_linear = !skb_is_nonlinear(skb);
   #endif
- -      if (likely(!skb->_skb_refdst && !skb_sec_path(skb)))
+ +      /* all head states execept sp (dst, sk, nf) are always cleared by
+ +       * udp_rcv() and we need to preserve secpath, if present, to eventually
+ +       * process IP_CMSG_PASSSEC at recvmsg() time
+ +       */
+ +      if (likely(!skb_sec_path(skb)))
                 scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
   }
   
@@@ -1794,6 -1783,13 +1794,6 @@@ static int __udp_queue_rcv_skb(struct s
                 sk_mark_napi_id_once(sk, skb);
         }
   
- -      /* At recvmsg() time we may access skb->dst or skb->sp depending on
- -       * the IP options and the cmsg flags, elsewhere can we clear all
- -       * pending head states while they are hot in the cache
- -       */
- -      if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
- -              skb_release_head_state(skb);
- -
         rc = __udp_enqueue_schedule_skb(sk, skb);
         if (rc < 0) {
                 int is_udplite = IS_UDPLITE(sk);
@@@ -1933,14 -1929,16 +1933,16 @@@ drop
   /* For TCP sockets, sk_rx_dst is protected by socket lock
    * For UDP, we use xchg() to guard against concurrent changes.
    */
- void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
   {
         struct dst_entry *old;
   
         if (dst_hold_safe(dst)) {
                 old = xchg(&sk->sk_rx_dst, dst);
                 dst_release(old);
+               return old != dst;
         }
+       return false;
   }
   EXPORT_SYMBOL(udp_sk_rx_dst_set);
   
@@@ -1961,7 -1959,6 +1963,7 @@@ static int __udp4_lib_mcast_deliver(str
         unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
         unsigned int offset = offsetof(typeof(*sk), sk_node);
         int dif = skb->dev->ifindex;
+ +      int sdif = inet_sdif(skb);
         struct hlist_node *node;
         struct sk_buff *nskb;
   
@@@ -1976,7 -1973,7 +1978,7 @@@ start_lookup
   
         sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
                 if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
- -                                       uh->source, saddr, dif, hnum))
+ +                                       uh->source, saddr, dif, sdif, hnum))
                         continue;
   
                 if (!first) {
@@@ -2166,7 -2163,7 +2168,7 @@@ drop
   static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
                                                   __be16 loc_port, __be32 loc_addr,
                                                   __be16 rmt_port, __be32 rmt_addr,
- -                                                int dif)
+ +                                                int dif, int sdif)
   {
         struct sock *sk, *result;
         unsigned short hnum = ntohs(loc_port);
@@@ -2180,7 -2177,7 +2182,7 @@@
         result = NULL;
         sk_for_each_rcu(sk, &hslot->head) {
                 if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
- -                                      rmt_port, rmt_addr, dif, hnum)) {
+ +                                      rmt_port, rmt_addr, dif, sdif, hnum)) {
                         if (result)
                                 return NULL;
                         result = sk;
@@@ -2197,7 -2194,7 +2199,7 @@@
   static struct sock *__udp4_lib_demux_lookup(struct net *net,
                                             __be16 loc_port, __be32 loc_addr,
                                             __be16 rmt_port, __be32 rmt_addr,
- -                                          int dif)
+ +                                          int dif, int sdif)
   {
         unsigned short hnum = ntohs(loc_port);
         unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
@@@ -2209,7 -2206,7 +2211,7 @@@
   
         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
                 if (INET_MATCH(sk, net, acookie, rmt_addr,
- -                             loc_addr, ports, dif))
+ +                             loc_addr, ports, dif, sdif))
                         return sk;
                 /* Only check first socket in chain */
                 break;
@@@ -2225,7 -2222,6 +2227,7 @@@ void udp_v4_early_demux(struct sk_buff 
         struct sock *sk = NULL;
         struct dst_entry *dst;
         int dif = skb->dev->ifindex;
+ +      int sdif = inet_sdif(skb);
         int ours;
   
         /* validate the packet */
@@@ -2251,11 -2247,10 +2253,11 @@@
                 }
   
                 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
- -                                                 uh->source, iph->saddr, dif);
+ +                                                 uh->source, iph->saddr,
+ +                                                 dif, sdif);
         } else if (skb->pkt_type == PACKET_HOST) {
                 sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
- -                                           uh->source, iph->saddr, dif);
+ +                                           uh->source, iph->saddr, dif, sdif);
         }
   
         if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
diff --combined net/ipv6/addrconf.c

index 45d0a24644debc829604ea2120897192c52f0ee3,936e9ab4dda5453ce30b8640b85693b9728502fd..c2e2a78787ec990f4dac2040fb1e26dc150860e2
--- 1/net/ipv6/addrconf.c
--- 2/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@@ -3030,6 -3030,9 +3030,6 @@@ static void sit_add_v4_addrs(struct ine
   static void init_loopback(struct net_device *dev)
   {
         struct inet6_dev  *idev;
- -      struct net_device *sp_dev;
- -      struct inet6_ifaddr *sp_ifa;
- -      struct rt6_info *sp_rt;
   
         /* ::1 */
   
@@@ -3042,6 -3045,45 +3042,6 @@@
         }
   
         add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
- -
- -      /* Add routes to other interface's IPv6 addresses */
- -      for_each_netdev(dev_net(dev), sp_dev) {
- -              if (!strcmp(sp_dev->name, dev->name))
- -                      continue;
- -
- -              idev = __in6_dev_get(sp_dev);
- -              if (!idev)
- -                      continue;
- -
- -              read_lock_bh(&idev->lock);
- -              list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
- -
- -                      if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
- -                              continue;
- -
- -                      if (sp_ifa->rt) {
- -                              /* This dst has been added to garbage list when
- -                               * lo device down, release this obsolete dst and
- -                               * reallocate a new router for ifa.
- -                               */
- -                              if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
- -                                      ip6_rt_put(sp_ifa->rt);
- -                                      sp_ifa->rt = NULL;
- -                              } else {
- -                                      continue;
- -                              }
- -                      }
- -
- -                      sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);
- -
- -                      /* Failure cases are ignored */
- -                      if (!IS_ERR(sp_rt)) {
- -                              sp_ifa->rt = sp_rt;
- -                              ip6_ins_rt(sp_rt);
- -                      }
- -              }
- -              read_unlock_bh(&idev->lock);
- -      }
   }
   
   void addrconf_add_linklocal(struct inet6_dev *idev,
@@@ -3279,11 -3321,11 +3279,11 @@@ static void addrconf_gre_config(struct 
   static int fixup_permanent_addr(struct inet6_dev *idev,
                                 struct inet6_ifaddr *ifp)
   {
- -      /* rt6i_ref == 0 means the host route was removed from the
+ +      /* !rt6i_node means the host route was removed from the
          * FIB, for example, if 'lo' device is taken down. In that
          * case regenerate the host route.
          */
- -      if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+ +      if (!ifp->rt || !ifp->rt->rt6i_node) {
                 struct rt6_info *rt, *prev;
   
                 rt = addrconf_dst_alloc(idev, &ifp->addr, false);
@@@ -5514,7 -5556,7 +5514,7 @@@ static void __ipv6_ifa_notify(int event
                  * our DAD process, so we don't need
                  * to do it again
                  */
-               if (!(ifp->rt->rt6i_node))
+               if (!rcu_access_pointer(ifp->rt->rt6i_node))
                         ip6_ins_rt(ifp->rt);
                 if (ifp->idev->cnf.forwarding)
                         addrconf_join_anycast(ifp);
@@@ -6563,21 -6605,21 +6563,21 @@@ int __init addrconf_init(void
         rtnl_af_register(&inet6_ops);
   
         err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
- -                            NULL);
+ +                            0);
         if (err < 0)
                 goto errout;
   
         /* Only the first call to __rtnl_register can fail */
- -      __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL);
- -      __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL);
+ +      __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
+ +      __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
         __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
- -                      inet6_dump_ifaddr, NULL);
+ +                      inet6_dump_ifaddr, 0);
         __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
- -                      inet6_dump_ifmcaddr, NULL);
+ +                      inet6_dump_ifmcaddr, 0);
         __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
- -                      inet6_dump_ifacaddr, NULL);
+ +                      inet6_dump_ifacaddr, 0);
         __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
- -                      inet6_netconf_dump_devconf, NULL);
+ +                      inet6_netconf_dump_devconf, 0);
   
         ipv6_addr_label_rtnl_register();
   
diff --combined net/ipv6/esp6.c

index 7fb41b0ad437d79223b104bfd5d6b3092779d29c,ab64f367d11cc256ddc56527d979a06e32170745..89910e2c10f4a63bcd285e28820141266f6d056f
--- 1/net/ipv6/esp6.c
--- 2/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@@ -226,7 -226,7 +226,7 @@@ int esp6_output_head(struct xfrm_state 
         int tailen = esp->tailen;
   
         if (!skb_cloned(skb)) {
-               if (tailen <= skb_availroom(skb)) {
+               if (tailen <= skb_tailroom(skb)) {
                         nfrags = 1;
                         trailer = skb;
                         tail = skb_tail_pointer(trailer);
@@@ -260,8 -260,6 +260,6 @@@
   
                         kunmap_atomic(vaddr);
   
-                       spin_unlock_bh(&x->lock);
- 
                         nfrags = skb_shinfo(skb)->nr_frags;
   
                         __skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@@ -269,6 -267,9 +267,9 @@@
                         skb_shinfo(skb)->nr_frags = ++nfrags;
   
                         pfrag->offset = pfrag->offset + allocsize;
+ 
+                       spin_unlock_bh(&x->lock);
+ 
                         nfrags++;
   
                         skb->len += tailen;
@@@ -345,7 -346,7 +346,7 @@@ int esp6_output_tail(struct xfrm_state 
                            (unsigned char *)esph - skb->data,
                            assoclen + ivlen + esp->clen + alen);
         if (unlikely(err < 0))
-               goto error;
+               goto error_free;
   
         if (!esp->inplace) {
                 int allocsize;
@@@ -356,7 -357,7 +357,7 @@@
                 spin_lock_bh(&x->lock);
                 if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
                         spin_unlock_bh(&x->lock);
-                       goto error;
+                       goto error_free;
                 }
   
                 skb_shinfo(skb)->nr_frags = 1;
@@@ -373,7 -374,7 +374,7 @@@
                                    (unsigned char *)esph - skb->data,
                                    assoclen + ivlen + esp->clen + alen);
                 if (unlikely(err < 0))
-                       goto error;
+                       goto error_free;
         }
   
         if ((x->props.flags & XFRM_STATE_ESN))
@@@ -406,8 -407,9 +407,9 @@@
   
         if (sg != dsg)
                 esp_ssg_unref(x, tmp);
-       kfree(tmp);
   
+ error_free:
+       kfree(tmp);
   error:
         return err;
   }
@@@ -461,30 -463,28 +463,30 @@@ static int esp6_output(struct xfrm_stat
         return esp6_output_tail(x, skb, &esp);
   }
   
- -int esp6_input_done2(struct sk_buff *skb, int err)
+ +static inline int esp_remove_trailer(struct sk_buff *skb)
   {
         struct xfrm_state *x = xfrm_input_state(skb);
         struct xfrm_offload *xo = xfrm_offload(skb);
         struct crypto_aead *aead = x->data;
- -      int alen = crypto_aead_authsize(aead);
- -      int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
- -      int elen = skb->len - hlen;
- -      int hdr_len = skb_network_header_len(skb);
- -      int padlen;
+ +      int alen, hlen, elen;
+ +      int padlen, trimlen;
+ +      __wsum csumdiff;
         u8 nexthdr[2];
+ +      int ret;
   
- -      if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
- -              kfree(ESP_SKB_CB(skb)->tmp);
+ +      alen = crypto_aead_authsize(aead);
+ +      hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+ +      elen = skb->len - hlen;
   
- -      if (unlikely(err))
+ +      if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+ +              ret = xo->proto;
                 goto out;
+ +      }
   
         if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
                 BUG();
   
- -      err = -EINVAL;
+ +      ret = -EINVAL;
         padlen = nexthdr[0];
         if (padlen + 2 + alen >= elen) {
                 net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
@@@ -492,46 -492,17 +494,46 @@@
                 goto out;
         }
   
- -      /* ... check padding bits here. Silly. :-) */
+ +      trimlen = alen + padlen + 2;
+ +      if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ +              csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
+ +              skb->csum = csum_block_sub(skb->csum, csumdiff,
+ +                                         skb->len - trimlen);
+ +      }
+ +      pskb_trim(skb, skb->len - trimlen);
+ +
+ +      ret = nexthdr[1];
+ +
+ +out:
+ +      return ret;
+ +}
   
- -      pskb_trim(skb, skb->len - alen - padlen - 2);
- -      __skb_pull(skb, hlen);
+ +int esp6_input_done2(struct sk_buff *skb, int err)
+ +{
+ +      struct xfrm_state *x = xfrm_input_state(skb);
+ +      struct xfrm_offload *xo = xfrm_offload(skb);
+ +      struct crypto_aead *aead = x->data;
+ +      int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+ +      int hdr_len = skb_network_header_len(skb);
+ +
+ +      if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+ +              kfree(ESP_SKB_CB(skb)->tmp);
+ +
+ +      if (unlikely(err))
+ +              goto out;
+ +
+ +      err = esp_remove_trailer(skb);
+ +      if (unlikely(err < 0))
+ +              goto out;
+ +
+ +      skb_postpull_rcsum(skb, skb_network_header(skb),
+ +                         skb_network_header_len(skb));
+ +      skb_pull_rcsum(skb, hlen);
         if (x->props.mode == XFRM_MODE_TUNNEL)
                 skb_reset_transport_header(skb);
         else
                 skb_set_transport_header(skb, -hdr_len);
   
- -      err = nexthdr[1];
- -
         /* RFC4303: Drop dummy packets without any error */
         if (err == IPPROTO_NONE)
                 err = -EINVAL;
diff --combined net/ipv6/esp6_offload.c

index 8d4e2ba9163da0831946a43cf01f403c09efb2db,1cf437f75b0bf2bc446337ededbf58bd22673823..333a478aa1610441ce08e3ac82e92d3b48e3222d
--- 1/net/ipv6/esp6_offload.c
--- 2/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@@ -209,13 -209,11 +209,13 @@@ out
   static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
   {
         struct crypto_aead *aead = x->data;
+ +      struct xfrm_offload *xo = xfrm_offload(skb);
   
         if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
                 return -EINVAL;
   
- -      skb->ip_summed = CHECKSUM_NONE;
+ +      if (!(xo->flags & CRYPTO_DONE))
+ +              skb->ip_summed = CHECKSUM_NONE;
   
         return esp6_input_done2(skb, 0);
   }
@@@ -288,7 -286,7 +288,7 @@@ static int esp6_xmit(struct xfrm_state 
         esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
   
         err = esp6_output_tail(x, skb, &esp);
-       if (err < 0)
+       if (err)
                 return err;
   
         secpath_reset(skb);
@@@ -334,4 -332,3 +334,4 @@@ module_init(esp6_offload_init)
   module_exit(esp6_offload_exit);
   MODULE_LICENSE("GPL");
   MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+ +MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --combined net/ipv6/ip6_fib.c

index 549aacc3cb2c6f803a19d97e295ceac56ce6ef44,e1c85bb4eac0fd50905fc441e726eca843fc36a8..a3b5c163325fa3448818c446c7de502eb4f5a9c8
--- 1/net/ipv6/ip6_fib.c
--- 2/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@@ -33,7 -33,6 +33,7 @@@
   #include <net/ndisc.h>
   #include <net/addrconf.h>
   #include <net/lwtunnel.h>
+ +#include <net/fib_notifier.h>
   
   #include <net/ip6_fib.h>
   #include <net/ip6_route.h>
@@@ -149,12 -148,24 +149,24 @@@ static struct fib6_node *node_alloc(voi
         return fn;
   }
   
- static void node_free(struct fib6_node *fn)
+ static void node_free_immediate(struct fib6_node *fn)
+ {
+       kmem_cache_free(fib6_node_kmem, fn);
+ }
+ 
+ static void node_free_rcu(struct rcu_head *head)
   {
+       struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
+ 
         kmem_cache_free(fib6_node_kmem, fn);
   }
   
- -static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+ static void node_free(struct fib6_node *fn)
+ {
+       call_rcu(&fn->rcu, node_free_rcu);
+ }
+ 
+ +void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
   {
         int cpu;
   
@@@ -177,7 -188,15 +189,7 @@@
         free_percpu(non_pcpu_rt->rt6i_pcpu);
         non_pcpu_rt->rt6i_pcpu = NULL;
   }
- -
- -static void rt6_release(struct rt6_info *rt)
- -{
- -      if (atomic_dec_and_test(&rt->rt6i_ref)) {
- -              rt6_free_pcpu(rt);
- -              dst_dev_put(&rt->dst);
- -              dst_release(&rt->dst);
- -      }
- -}
+ +EXPORT_SYMBOL_GPL(rt6_free_pcpu);
   
   static void fib6_link_table(struct net *net, struct fib6_table *tb)
   {
@@@ -295,109 -314,6 +307,109 @@@ static void __net_init fib6_tables_init
   
   #endif
   
+ +unsigned int fib6_tables_seq_read(struct net *net)
+ +{
+ +      unsigned int h, fib_seq = 0;
+ +
+ +      rcu_read_lock();
+ +      for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ +              struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ +              struct fib6_table *tb;
+ +
+ +              hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+ +                      read_lock_bh(&tb->tb6_lock);
+ +                      fib_seq += tb->fib_seq;
+ +                      read_unlock_bh(&tb->tb6_lock);
+ +              }
+ +      }
+ +      rcu_read_unlock();
+ +
+ +      return fib_seq;
+ +}
+ +
+ +static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+ +                                  enum fib_event_type event_type,
+ +                                  struct rt6_info *rt)
+ +{
+ +      struct fib6_entry_notifier_info info = {
+ +              .rt = rt,
+ +      };
+ +
+ +      return call_fib6_notifier(nb, net, event_type, &info.info);
+ +}
+ +
+ +static int call_fib6_entry_notifiers(struct net *net,
+ +                                   enum fib_event_type event_type,
+ +                                   struct rt6_info *rt)
+ +{
+ +      struct fib6_entry_notifier_info info = {
+ +              .rt = rt,
+ +      };
+ +
+ +      rt->rt6i_table->fib_seq++;
+ +      return call_fib6_notifiers(net, event_type, &info.info);
+ +}
+ +
+ +struct fib6_dump_arg {
+ +      struct net *net;
+ +      struct notifier_block *nb;
+ +};
+ +
+ +static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+ +{
+ +      if (rt == arg->net->ipv6.ip6_null_entry)
+ +              return;
+ +      call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+ +}
+ +
+ +static int fib6_node_dump(struct fib6_walker *w)
+ +{
+ +      struct rt6_info *rt;
+ +
+ +      for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+ +              fib6_rt_dump(rt, w->args);
+ +      w->leaf = NULL;
+ +      return 0;
+ +}
+ +
+ +static void fib6_table_dump(struct net *net, struct fib6_table *tb,
+ +                          struct fib6_walker *w)
+ +{
+ +      w->root = &tb->tb6_root;
+ +      read_lock_bh(&tb->tb6_lock);
+ +      fib6_walk(net, w);
+ +      read_unlock_bh(&tb->tb6_lock);
+ +}
+ +
+ +/* Called with rcu_read_lock() */
+ +int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+ +{
+ +      struct fib6_dump_arg arg;
+ +      struct fib6_walker *w;
+ +      unsigned int h;
+ +
+ +      w = kzalloc(sizeof(*w), GFP_ATOMIC);
+ +      if (!w)
+ +              return -ENOMEM;
+ +
+ +      w->func = fib6_node_dump;
+ +      arg.net = net;
+ +      arg.nb = nb;
+ +      w->args = &arg;
+ +
+ +      for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ +              struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ +              struct fib6_table *tb;
+ +
+ +              hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+ +                      fib6_table_dump(net, tb, w);
+ +      }
+ +
+ +      kfree(w);
+ +
+ +      return 0;
+ +}
+ +
   static int fib6_dump_node(struct fib6_walker *w)
   {
         int res;
@@@ -697,9 -613,9 +709,9 @@@ insert_above
   
                 if (!in || !ln) {
                         if (in)
-                               node_free(in);
+                               node_free_immediate(in);
                         if (ln)
-                               node_free(ln);
+                               node_free_immediate(ln);
                         return ERR_PTR(-ENOMEM);
                 }
   
@@@ -829,6 -745,8 +841,6 @@@ static void fib6_purge_rt(struct rt6_in
                         }
                         fn = fn->parent;
                 }
- -              /* No more references are possible at this point. */
- -              BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
         }
   }
   
@@@ -971,10 -889,8 +983,10 @@@ add
   
                 rt->dst.rt6_next = iter;
                 *ins = rt;
-               rt->rt6i_node = fn;
+               rcu_assign_pointer(rt->rt6i_node, fn);
                 atomic_inc(&rt->rt6i_ref);
+ +              call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
+ +                                        rt);
                 if (!info->skip_notify)
                         inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
                 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@@ -999,11 -915,9 +1011,11 @@@
                         return err;
   
                 *ins = rt;
-               rt->rt6i_node = fn;
+               rcu_assign_pointer(rt->rt6i_node, fn);
                 rt->dst.rt6_next = iter->dst.rt6_next;
                 atomic_inc(&rt->rt6i_ref);
+ +              call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
+ +                                        rt);
                 if (!info->skip_notify)
                         inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
                 if (!(fn->fn_flags & RTN_RTINFO)) {
@@@ -1011,7 -925,6 +1023,7 @@@
                         fn->fn_flags |= RTN_RTINFO;
                 }
                 nsiblings = iter->rt6i_nsiblings;
+ +              iter->rt6i_node = NULL;
                 fib6_purge_rt(iter, fn, info->nl_net);
                 if (fn->rr_ptr == iter)
                         fn->rr_ptr = NULL;
@@@ -1026,7 -939,6 +1038,7 @@@
                                         break;
                                 if (rt6_qualify_for_ecmp(iter)) {
                                         *ins = iter->dst.rt6_next;
+ +                                      iter->rt6i_node = NULL;
                                         fib6_purge_rt(iter, fn, info->nl_net);
                                         if (fn->rr_ptr == iter)
                                                 fn->rr_ptr = NULL;
@@@ -1138,7 -1050,7 +1150,7 @@@ int fib6_add(struct fib6_node *root, st
                                    root, and then (in failure) stale node
                                    in main tree.
                                  */
-                               node_free(sfn);
+                               node_free_immediate(sfn);
                                 err = PTR_ERR(sn);
                                 goto failure;
                         }
@@@ -1561,7 -1473,6 +1573,7 @@@ static void fib6_del_route(struct fib6_
   
         fib6_purge_rt(rt, fn, net);
   
+ +      call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
         if (!info->skip_notify)
                 inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
         rt6_release(rt);
@@@ -1569,8 -1480,9 +1581,9 @@@
   
   int fib6_del(struct rt6_info *rt, struct nl_info *info)
   {
+       struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+                                   lockdep_is_held(&rt->rt6i_table->tb6_lock));
         struct net *net = info->nl_net;
-       struct fib6_node *fn = rt->rt6i_node;
         struct rt6_info **rtp;
   
   #if RT6_DEBUG >= 2
@@@ -1759,7 -1671,9 +1772,9 @@@ static int fib6_clean_node(struct fib6_
                         if (res) {
   #if RT6_DEBUG >= 2
                                 pr_debug("%s: del failed: rt=%p@%p err=%d\n",
-                                        __func__, rt, rt->rt6i_node, res);
+                                        __func__, rt,
+                                        rcu_access_pointer(rt->rt6i_node),
+                                        res);
   #endif
                                 continue;
                         }
@@@ -1881,8 -1795,10 +1896,10 @@@ static int fib6_age(struct rt6_info *rt
                 }
                 gc_args->more++;
         } else if (rt->rt6i_flags & RTF_CACHE) {
+               if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout))
+                       rt->dst.obsolete = DST_OBSOLETE_KILL;
                 if (atomic_read(&rt->dst.__refcnt) == 1 &&
-                   time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+                   rt->dst.obsolete == DST_OBSOLETE_KILL) {
                         RT6_TRACE("aging clone %p\n", rt);
                         return -1;
                 } else if (rt->rt6i_flags & RTF_GATEWAY) {
@@@ -1942,11 -1858,6 +1959,11 @@@ static void fib6_gc_timer_cb(unsigned l
   static int __net_init fib6_net_init(struct net *net)
   {
         size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+ +      int err;
+ +
+ +      err = fib6_notifier_init(net);
+ +      if (err)
+ +              return err;
   
         spin_lock_init(&net->ipv6.fib6_gc_lock);
         rwlock_init(&net->ipv6.fib6_walker_lock);
@@@ -1999,7 -1910,6 +2016,7 @@@ out_fib_table_hash
   out_rt6_stats:
         kfree(net->ipv6.rt6_stats);
   out_timer:
+ +      fib6_notifier_exit(net);
         return -ENOMEM;
   }
   
@@@ -2016,7 -1926,6 +2033,7 @@@ static void fib6_net_exit(struct net *n
         kfree(net->ipv6.fib6_main_tbl);
         kfree(net->ipv6.fib_table_hash);
         kfree(net->ipv6.rt6_stats);
+ +      fib6_notifier_exit(net);
   }
   
   static struct pernet_operations fib6_net_ops = {
@@@ -2040,7 -1949,7 +2057,7 @@@ int __init fib6_init(void
                 goto out_kmem_cache_create;
   
         ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
- -                            NULL);
+ +                            0);
         if (ret)
                 goto out_unregister_subsys;
   
diff --combined net/ipv6/route.c

index 4d0273459d49cc3b6cd72f0de135b63ac9d02250,2d0e7798c793a4058dc0ef3a5b50734e774500a9..26cc9f483b6d282f0a665bfc4c2c206da7981921
--- 1/net/ipv6/route.c
--- 2/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@@ -440,11 -440,22 +440,12 @@@ static bool rt6_check_expired(const str
                 if (time_after(jiffies, rt->dst.expires))
                         return true;
         } else if (rt->dst.from) {
-               return rt6_check_expired((struct rt6_info *) rt->dst.from);
+               return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
+                      rt6_check_expired((struct rt6_info *)rt->dst.from);
         }
         return false;
   }
   
- -/* Multipath route selection:
- - *   Hash based function using packet header and flowlabel.
- - * Adapted from fib_info_hashfn()
- - */
- -static int rt6_info_hash_nhsfn(unsigned int candidate_count,
- -                             const struct flowi6 *fl6)
- -{
- -      return get_hash_from_flowi6(fl6) % candidate_count;
- -}
- -
   static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
                                              struct flowi6 *fl6, int oif,
                                              int strict)
@@@ -452,13 -463,7 +453,13 @@@
         struct rt6_info *sibling, *next_sibling;
         int route_choosen;
   
- -      route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
+ +      /* We might have already computed the hash for ICMPv6 errors. In such
+ +       * case it will always be non-zero. Otherwise now is the time to do it.
+ +       */
+ +      if (!fl6->mp_hash)
+ +              fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+ +
+ +      route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
         /* Don't change the route, if route_choosen == 0
          * (siblings does not include ourself)
          */
@@@ -954,34 -959,10 +955,34 @@@ int ip6_ins_rt(struct rt6_info *rt
         return __ip6_ins_rt(rt, &info, &mxc, NULL);
   }
   
+ +/* called with rcu_lock held */
+ +static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
+ +{
+ +      struct net_device *dev = rt->dst.dev;
+ +
+ +      if (rt->rt6i_flags & RTF_LOCAL) {
+ +              /* for copies of local routes, dst->dev needs to be the
+ +               * device if it is a master device, the master device if
+ +               * device is enslaved, and the loopback as the default
+ +               */
+ +              if (netif_is_l3_slave(dev) &&
+ +                  !rt6_need_strict(&rt->rt6i_dst.addr))
+ +                      dev = l3mdev_master_dev_rcu(dev);
+ +              else if (!netif_is_l3_master(dev))
+ +                      dev = dev_net(dev)->loopback_dev;
+ +              /* last case is netif_is_l3_master(dev) is true in which
+ +               * case we want dev returned to be dev
+ +               */
+ +      }
+ +
+ +      return dev;
+ +}
+ +
   static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
                                            const struct in6_addr *daddr,
                                            const struct in6_addr *saddr)
   {
+ +      struct net_device *dev;
         struct rt6_info *rt;
   
         /*
@@@ -991,10 -972,8 +992,10 @@@
         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
                 ort = (struct rt6_info *)ort->dst.from;
   
- -      rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
- -
+ +      rcu_read_lock();
+ +      dev = ip6_rt_get_dev_rcu(ort);
+ +      rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+ +      rcu_read_unlock();
         if (!rt)
                 return NULL;
   
@@@ -1022,13 -1001,11 +1023,13 @@@
   
   static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
   {
+ +      struct net_device *dev;
         struct rt6_info *pcpu_rt;
   
- -      pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
- -                                rt->dst.dev, rt->dst.flags);
- -
+ +      rcu_read_lock();
+ +      dev = ip6_rt_get_dev_rcu(rt);
+ +      pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
+ +      rcu_read_unlock();
         if (!pcpu_rt)
                 return NULL;
         ip6_rt_copy_init(pcpu_rt, rt);
@@@ -1210,54 -1187,6 +1211,54 @@@ struct dst_entry *ip6_route_input_looku
   }
   EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
   
+ +static void ip6_multipath_l3_keys(const struct sk_buff *skb,
+ +                                struct flow_keys *keys)
+ +{
+ +      const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+ +      const struct ipv6hdr *key_iph = outer_iph;
+ +      const struct ipv6hdr *inner_iph;
+ +      const struct icmp6hdr *icmph;
+ +      struct ipv6hdr _inner_iph;
+ +
+ +      if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
+ +              goto out;
+ +
+ +      icmph = icmp6_hdr(skb);
+ +      if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
+ +          icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
+ +          icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
+ +          icmph->icmp6_type != ICMPV6_PARAMPROB)
+ +              goto out;
+ +
+ +      inner_iph = skb_header_pointer(skb,
+ +                                     skb_transport_offset(skb) + sizeof(*icmph),
+ +                                     sizeof(_inner_iph), &_inner_iph);
+ +      if (!inner_iph)
+ +              goto out;
+ +
+ +      key_iph = inner_iph;
+ +out:
+ +      memset(keys, 0, sizeof(*keys));
+ +      keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ +      keys->addrs.v6addrs.src = key_iph->saddr;
+ +      keys->addrs.v6addrs.dst = key_iph->daddr;
+ +      keys->tags.flow_label = ip6_flowinfo(key_iph);
+ +      keys->basic.ip_proto = key_iph->nexthdr;
+ +}
+ +
+ +/* if skb is set it will be used and fl6 can be NULL */
+ +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+ +{
+ +      struct flow_keys hash_keys;
+ +
+ +      if (skb) {
+ +              ip6_multipath_l3_keys(skb, &hash_keys);
+ +              return flow_hash_from_keys(&hash_keys);
+ +      }
+ +
+ +      return get_hash_from_flowi6(fl6);
+ +}
+ +
   void ip6_route_input(struct sk_buff *skb)
   {
         const struct ipv6hdr *iph = ipv6_hdr(skb);
@@@ -1276,8 -1205,6 +1277,8 @@@
         tun_info = skb_tunnel_info(skb);
         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+ +      if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
+ +              fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
         skb_dst_drop(skb);
         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
   }
@@@ -1363,7 -1290,9 +1364,9 @@@ static void rt6_dst_from_metrics_check(
   
   static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
   {
-       if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+       u32 rt_cookie = 0;
+ 
+       if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
                 return NULL;
   
         if (rt6_check_expired(rt))
@@@ -1431,8 -1360,14 +1434,14 @@@ static void ip6_link_failure(struct sk_
                 if (rt->rt6i_flags & RTF_CACHE) {
                         if (dst_hold_safe(&rt->dst))
                                 ip6_del_rt(rt);
-               } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
-                       rt->rt6i_node->fn_sernum = -1;
+               } else {
+                       struct fib6_node *fn;
+ 
+                       rcu_read_lock();
+                       fn = rcu_dereference(rt->rt6i_node);
+                       if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+                               fn->fn_sernum = -1;
+                       rcu_read_unlock();
                 }
         }
   }
@@@ -1449,7 -1384,8 +1458,8 @@@ static void rt6_do_update_pmtu(struct r
   static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
   {
         return !(rt->rt6i_flags & RTF_CACHE) &&
-               (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+               (rt->rt6i_flags & RTF_PCPU ||
+                rcu_access_pointer(rt->rt6i_node));
   }
   
   static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@@ -2762,9 -2698,15 +2772,9 @@@ struct rt6_info *addrconf_dst_alloc(str
   {
         u32 tb_id;
         struct net *net = dev_net(idev->dev);
- -      struct net_device *dev = net->loopback_dev;
+ +      struct net_device *dev = idev->dev;
         struct rt6_info *rt;
   
- -      /* use L3 Master device as loopback for host routes if device
- -       * is enslaved and address is not link local or multicast
- -       */
- -      if (!rt6_need_strict(addr))
- -              dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
- -
         rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
         if (!rt)
                 return ERR_PTR(-ENOMEM);
@@@ -3395,9 -3337,6 +3405,9 @@@ static int rt6_nexthop_info(struct sk_b
                         goto nla_put_failure;
         }
   
+ +      if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
+ +              *flags |= RTNH_F_OFFLOAD;
+ +
         /* not needed for multipath encoding b/c it has a rtnexthop struct */
         if (!skip_oif && rt->dst.dev &&
             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
@@@ -3676,11 -3615,8 +3686,11 @@@ static int inet6_rtm_getroute(struct sk
                 struct net_device *dev;
                 int flags = 0;
   
- -              dev = __dev_get_by_index(net, iif);
+ +              rcu_read_lock();
+ +
+ +              dev = dev_get_by_index_rcu(net, iif);
                 if (!dev) {
+ +                      rcu_read_unlock();
                         err = -ENODEV;
                         goto errout;
                 }
@@@ -3692,19 -3628,15 +3702,19 @@@
   
                 if (!fibmatch)
                         dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+ +              else
+ +                      dst = ip6_route_lookup(net, &fl6, 0);
+ +
+ +              rcu_read_unlock();
         } else {
                 fl6.flowi6_oif = oif;
   
                 if (!fibmatch)
                         dst = ip6_route_output(net, NULL, &fl6);
+ +              else
+ +                      dst = ip6_route_lookup(net, &fl6, 0);
         }
   
- -      if (fibmatch)
- -              dst = ip6_route_lookup(net, &fl6, 0);
   
         rt = container_of(dst, struct rt6_info, dst);
         if (rt->dst.error) {
@@@ -3996,7 -3928,6 +4006,7 @@@ static int __net_init ip6_route_net_ini
                          ip6_template_metrics, true);
   
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ +      net->ipv6.fib6_has_custom_rules = false;
         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
                                                sizeof(*net->ipv6.ip6_prohibit_entry),
                                                GFP_KERNEL);
@@@ -4172,10 -4103,9 +4182,10 @@@ int __init ip6_route_init(void
                 goto fib6_rules_init;
   
         ret = -ENOBUFS;
- -      if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
- -          __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
- -          __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
+ +      if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
+ +          __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
+ +          __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
+ +                          RTNL_FLAG_DOIT_UNLOCKED))
                 goto out_register_late_subsys;
   
         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
diff --combined net/ipv6/udp.c

index 976f3039135612ffea9da4861f7cf17fa797d77c,d6886228e1d05c4dd192f5fe431fdaca1ffadabd..42ebb9ad46cc16405a6b4c6d948a8e82238ef732
--- 1/net/ipv6/udp.c
--- 2/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@@ -129,7 -129,7 +129,7 @@@ static void udp_v6_rehash(struct sock *
   static int compute_score(struct sock *sk, struct net *net,
                          const struct in6_addr *saddr, __be16 sport,
                          const struct in6_addr *daddr, unsigned short hnum,
- -                       int dif, bool exact_dif)
+ +                       int dif, int sdif, bool exact_dif)
   {
         int score;
         struct inet_sock *inet;
@@@ -161,13 -161,9 +161,13 @@@
         }
   
         if (sk->sk_bound_dev_if || exact_dif) {
- -              if (sk->sk_bound_dev_if != dif)
+ +              bool dev_match = (sk->sk_bound_dev_if == dif ||
+ +                                sk->sk_bound_dev_if == sdif);
+ +
+ +              if (exact_dif && !dev_match)
                         return -1;
- -              score++;
+ +              if (sk->sk_bound_dev_if && dev_match)
+ +                      score++;
         }
   
         if (sk->sk_incoming_cpu == raw_smp_processor_id())
@@@ -179,9 -175,9 +179,9 @@@
   /* called with rcu_read_lock() */
   static struct sock *udp6_lib_lookup2(struct net *net,
                 const struct in6_addr *saddr, __be16 sport,
- -              const struct in6_addr *daddr, unsigned int hnum, int dif,
- -              bool exact_dif, struct udp_hslot *hslot2,
- -              struct sk_buff *skb)
+ +              const struct in6_addr *daddr, unsigned int hnum,
+ +              int dif, int sdif, bool exact_dif,
+ +              struct udp_hslot *hslot2, struct sk_buff *skb)
   {
         struct sock *sk, *result;
         int score, badness, matches = 0, reuseport = 0;
@@@ -191,7 -187,7 +191,7 @@@
         badness = -1;
         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
                 score = compute_score(sk, net, saddr, sport,
- -                                    daddr, hnum, dif, exact_dif);
+ +                                    daddr, hnum, dif, sdif, exact_dif);
                 if (score > badness) {
                         reuseport = sk->sk_reuseport;
                         if (reuseport) {
@@@ -218,10 -214,10 +218,10 @@@
   
   /* rcu_read_lock() must be held */
   struct sock *__udp6_lib_lookup(struct net *net,
- -                                    const struct in6_addr *saddr, __be16 sport,
- -                                    const struct in6_addr *daddr, __be16 dport,
- -                                    int dif, struct udp_table *udptable,
- -                                    struct sk_buff *skb)
+ +                             const struct in6_addr *saddr, __be16 sport,
+ +                             const struct in6_addr *daddr, __be16 dport,
+ +                             int dif, int sdif, struct udp_table *udptable,
+ +                             struct sk_buff *skb)
   {
         struct sock *sk, *result;
         unsigned short hnum = ntohs(dport);
@@@ -239,7 -235,7 +239,7 @@@
                         goto begin;
   
                 result = udp6_lib_lookup2(net, saddr, sport,
- -                                        daddr, hnum, dif, exact_dif,
+ +                                        daddr, hnum, dif, sdif, exact_dif,
                                           hslot2, skb);
                 if (!result) {
                         unsigned int old_slot2 = slot2;
@@@ -254,7 -250,7 +254,7 @@@
                                 goto begin;
   
                         result = udp6_lib_lookup2(net, saddr, sport,
- -                                                daddr, hnum, dif,
+ +                                                daddr, hnum, dif, sdif,
                                                   exact_dif, hslot2,
                                                   skb);
                 }
@@@ -265,7 -261,7 +265,7 @@@ begin
         badness = -1;
         sk_for_each_rcu(sk, &hslot->head) {
                 score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
- -                                    exact_dif);
+ +                                    sdif, exact_dif);
                 if (score > badness) {
                         reuseport = sk->sk_reuseport;
                         if (reuseport) {
@@@ -298,7 -294,7 +298,7 @@@ static struct sock *__udp6_lib_lookup_s
   
         return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
                                  &iph->daddr, dport, inet6_iif(skb),
- -                               udptable, skb);
+ +                               inet6_sdif(skb), udptable, skb);
   }
   
   struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
@@@ -308,7 -304,7 +308,7 @@@
   
         return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
                                  &iph->daddr, dport, inet6_iif(skb),
- -                               &udp_table, skb);
+ +                               inet6_sdif(skb), &udp_table, skb);
   }
   EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
   
@@@ -324,7 -320,7 +324,7 @@@ struct sock *udp6_lib_lookup(struct ne
         struct sock *sk;
   
         sk =  __udp6_lib_lookup(net, saddr, sport, daddr, dport,
- -                              dif, &udp_table, NULL);
+ +                              dif, 0, &udp_table, NULL);
         if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
                 sk = NULL;
         return sk;
@@@ -506,7 -502,7 +506,7 @@@ void __udp6_lib_err(struct sk_buff *skb
         struct net *net = dev_net(skb->dev);
   
         sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- -                             inet6_iif(skb), udptable, skb);
+ +                             inet6_iif(skb), 0, udptable, skb);
         if (!sk) {
                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
                                   ICMP6_MIB_INERRORS);
@@@ -772,6 -768,15 +772,15 @@@ start_lookup
         return 0;
   }
   
+ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+ {
+       if (udp_sk_rx_dst_set(sk, dst)) {
+               const struct rt6_info *rt = (const struct rt6_info *)dst;
+ 
+               inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+       }
+ }
+ 
   int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
                    int proto)
   {
@@@ -821,7 -826,7 +830,7 @@@
                 int ret;
   
                 if (unlikely(sk->sk_rx_dst != dst))
-                       udp_sk_rx_dst_set(sk, dst);
+                       udp6_sk_rx_dst_set(sk, dst);
   
                 ret = udpv6_queue_rcv_skb(sk, skb);
                 sock_put(sk);
@@@ -898,7 -903,7 +907,7 @@@ discard
   static struct sock *__udp6_lib_demux_lookup(struct net *net,
                         __be16 loc_port, const struct in6_addr *loc_addr,
                         __be16 rmt_port, const struct in6_addr *rmt_addr,
- -                      int dif)
+ +                      int dif, int sdif)
   {
         unsigned short hnum = ntohs(loc_port);
         unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
@@@ -909,7 -914,7 +918,7 @@@
   
         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
                 if (sk->sk_state == TCP_ESTABLISHED &&
- -                  INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+ +                  INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif))
                         return sk;
                 /* Only check first socket in chain */
                 break;
@@@ -924,7 -929,6 +933,7 @@@ static void udp_v6_early_demux(struct s
         struct sock *sk;
         struct dst_entry *dst;
         int dif = skb->dev->ifindex;
+ +      int sdif = inet6_sdif(skb);
   
         if (!pskb_may_pull(skb, skb_transport_offset(skb) +
             sizeof(struct udphdr)))
@@@ -936,7 -940,7 +945,7 @@@
                 sk = __udp6_lib_demux_lookup(net, uh->dest,
                                              &ipv6_hdr(skb)->daddr,
                                              uh->source, &ipv6_hdr(skb)->saddr,
- -                                           dif);
+ +                                           dif, sdif);
         else
                 return;
   
@@@ -1472,9 -1476,6 +1481,9 @@@ int compat_udpv6_getsockopt(struct soc
   }
   #endif
   
+ +/* thinking of making this const? Don't.
+ + * early_demux can change based on sysctl.
+ + */
   static struct inet6_protocol udpv6_protocol = {
         .early_demux    =       udp_v6_early_demux,
         .early_demux_handler =  udp_v6_early_demux,
diff --combined net/kcm/kcmsock.c

index 48e993b2dbcf1afae04968ed840e2e98c2cf6772,4abf6287d7e1c29314db5c846acd16c3a2a377db..af4e76ac88ff0817398d1d7460a41f0cd5fe6f30
--- 1/net/kcm/kcmsock.c
--- 2/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@@ -96,12 -96,12 +96,12 @@@ static void kcm_update_rx_mux_stats(str
                                     struct kcm_psock *psock)
   {
         STRP_STATS_ADD(mux->stats.rx_bytes,
- -                     psock->strp.stats.rx_bytes -
+ +                     psock->strp.stats.bytes -
                        psock->saved_rx_bytes);
         mux->stats.rx_msgs +=
- -              psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
- -      psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
- -      psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
+ +              psock->strp.stats.msgs - psock->saved_rx_msgs;
+ +      psock->saved_rx_msgs = psock->strp.stats.msgs;
+ +      psock->saved_rx_bytes = psock->strp.stats.bytes;
   }
   
   static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@@ -1118,7 -1118,7 +1118,7 @@@ static int kcm_recvmsg(struct socket *s
         struct kcm_sock *kcm = kcm_sk(sk);
         int err = 0;
         long timeo;
- -      struct strp_rx_msg *rxm;
+ +      struct strp_msg *stm;
         int copied = 0;
         struct sk_buff *skb;
   
@@@ -1132,26 -1132,26 +1132,26 @@@
   
         /* Okay, have a message on the receive queue */
   
- -      rxm = strp_rx_msg(skb);
+ +      stm = strp_msg(skb);
   
- -      if (len > rxm->full_len)
- -              len = rxm->full_len;
+ +      if (len > stm->full_len)
+ +              len = stm->full_len;
   
- -      err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
+ +      err = skb_copy_datagram_msg(skb, stm->offset, msg, len);
         if (err < 0)
                 goto out;
   
         copied = len;
         if (likely(!(flags & MSG_PEEK))) {
                 KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
- -              if (copied < rxm->full_len) {
+ +              if (copied < stm->full_len) {
                         if (sock->type == SOCK_DGRAM) {
                                 /* Truncated message */
                                 msg->msg_flags |= MSG_TRUNC;
                                 goto msg_finished;
                         }
- -                      rxm->offset += copied;
- -                      rxm->full_len -= copied;
+ +                      stm->offset += copied;
+ +                      stm->full_len -= copied;
                 } else {
   msg_finished:
                         /* Finished with message */
@@@ -1175,7 -1175,7 +1175,7 @@@ static ssize_t kcm_splice_read(struct s
         struct sock *sk = sock->sk;
         struct kcm_sock *kcm = kcm_sk(sk);
         long timeo;
- -      struct strp_rx_msg *rxm;
+ +      struct strp_msg *stm;
         int err = 0;
         ssize_t copied;
         struct sk_buff *skb;
@@@ -1192,12 -1192,12 +1192,12 @@@
   
         /* Okay, have a message on the receive queue */
   
- -      rxm = strp_rx_msg(skb);
+ +      stm = strp_msg(skb);
   
- -      if (len > rxm->full_len)
- -              len = rxm->full_len;
+ +      if (len > stm->full_len)
+ +              len = stm->full_len;
   
- -      copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
+ +      copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags);
         if (copied < 0) {
                 err = copied;
                 goto err_out;
@@@ -1205,8 -1205,8 +1205,8 @@@
   
         KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
   
- -      rxm->offset += copied;
- -      rxm->full_len -= copied;
+ +      stm->offset += copied;
+ +      stm->full_len -= copied;
   
         /* We have no way to return MSG_EOR. If all the bytes have been
          * read we still leave the message in the receive socket buffer.
@@@ -1376,17 -1376,17 +1376,21 @@@ static int kcm_attach(struct socket *so
         struct kcm_psock *psock = NULL, *tpsock;
         struct list_head *head;
         int index = 0;
- -      struct strp_callbacks cb;
+ +      static const struct strp_callbacks cb = {
+ +              .rcv_msg = kcm_rcv_strparser,
+ +              .parse_msg = kcm_parse_func_strparser,
+ +              .read_sock_done = kcm_read_sock_done,
+ +      };
         int err;
   
         csk = csock->sk;
         if (!csk)
                 return -EINVAL;
   
+       /* We must prevent loops or risk deadlock ! */
+       if (csk->sk_family == PF_KCM)
+               return -EOPNOTSUPP;
+ 
         psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
         if (!psock)
                 return -ENOMEM;
@@@ -1395,6 -1395,11 +1399,6 @@@
         psock->sk = csk;
         psock->bpf_prog = prog;
   
- -      cb.rcv_msg = kcm_rcv_strparser;
- -      cb.abort_parser = NULL;
- -      cb.parse_msg = kcm_parse_func_strparser;
- -      cb.read_sock_done = kcm_read_sock_done;
- -
         err = strp_init(&psock->strp, csk, &cb);
         if (err) {
                 kmem_cache_free(kcm_psockp, psock);
diff --combined net/packet/af_packet.c

index f31cb71172e00cf0d00e05e2771281795d42213b,1c61af9af67dae10ea9675a45b191d7302c69151..c26172995511f77bf9ed4c36d55fd1f430f6de5e
--- 1/net/packet/af_packet.c
--- 2/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@@ -177,6 -177,8 +177,6 @@@ static int packet_set_ring(struct sock 
   #define BLK_PLUS_PRIV(sz_of_priv) \
         (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
   
- -#define PGV_FROM_VMALLOC 1
- -
   #define BLOCK_STATUS(x)       ((x)->hdr.bh1.block_status)
   #define BLOCK_NUM_PKTS(x)     ((x)->hdr.bh1.num_pkts)
   #define BLOCK_O2FP(x)         ((x)->hdr.bh1.offset_to_first_pkt)
@@@ -2189,6 -2191,7 +2189,7 @@@ static int tpacket_rcv(struct sk_buff *
         struct timespec ts;
         __u32 ts_status;
         bool is_drop_n_account = false;
+       bool do_vnet = false;
   
         /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
          * We may add members to them until current aligned size without forcing
@@@ -2239,8 -2242,10 +2240,10 @@@
                 netoff = TPACKET_ALIGN(po->tp_hdrlen +
                                        (maclen < 16 ? 16 : maclen)) +
                                        po->tp_reserve;
-               if (po->has_vnet_hdr)
+               if (po->has_vnet_hdr) {
                         netoff += sizeof(struct virtio_net_hdr);
+                       do_vnet = true;
+               }
                 macoff = netoff - maclen;
         }
         if (po->tp_version <= TPACKET_V2) {
@@@ -2257,8 -2262,10 +2260,10 @@@
                                         skb_set_owner_r(copy_skb, sk);
                         }
                         snaplen = po->rx_ring.frame_size - macoff;
-                       if ((int)snaplen < 0)
+                       if ((int)snaplen < 0) {
                                 snaplen = 0;
+                               do_vnet = false;
+                       }
                 }
         } else if (unlikely(macoff + snaplen >
                             GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
@@@ -2271,6 -2278,7 +2276,7 @@@
                 if (unlikely((int)snaplen < 0)) {
                         snaplen = 0;
                         macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+                       do_vnet = false;
                 }
         }
         spin_lock(&sk->sk_receive_queue.lock);
@@@ -2296,7 -2304,7 +2302,7 @@@
         }
         spin_unlock(&sk->sk_receive_queue.lock);
   
-       if (po->has_vnet_hdr) {
+       if (do_vnet) {
                 if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
                                             sizeof(struct virtio_net_hdr),
                                             vio_le(), true)) {
diff --combined net/sched/cls_api.c

index d470a4e2de58f16afc534945a00a2c6c3284f300,6c5ea84d2682ab81fb9755361fa77326fa9d9935..ea6c65fd5fc5fa31669191470d963bc851822a00
--- 1/net/sched/cls_api.c
--- 2/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@@ -100,6 -100,21 +100,6 @@@ int unregister_tcf_proto_ops(struct tcf
   }
   EXPORT_SYMBOL(unregister_tcf_proto_ops);
   
- -static int tfilter_notify(struct net *net, struct sk_buff *oskb,
- -                        struct nlmsghdr *n, struct tcf_proto *tp,
- -                        unsigned long fh, int event, bool unicast);
- -
- -static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
- -                               struct nlmsghdr *n,
- -                               struct tcf_chain *chain, int event)
- -{
- -      struct tcf_proto *tp;
- -
- -      for (tp = rtnl_dereference(chain->filter_chain);
- -           tp; tp = rtnl_dereference(tp->next))
- -              tfilter_notify(net, oskb, n, tp, 0, event, false);
- -}
- -
   /* Select new prio value from the range, managed by kernel. */
   
   static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@@ -200,9 -215,15 +200,15 @@@ static void tcf_chain_flush(struct tcf_
   
   static void tcf_chain_destroy(struct tcf_chain *chain)
   {
-       list_del(&chain->list);
-       tcf_chain_flush(chain);
-       kfree(chain);
+       /* May be already removed from the list by the previous call. */
+       if (!list_empty(&chain->list))
+               list_del_init(&chain->list);
+ 
+       /* There might still be a reference held when we got here from
+        * tcf_block_put. Wait for the user to drop reference before free.
+        */
+       if (!chain->refcnt)
+               kfree(chain);
   }
   
   struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@@ -273,8 -294,10 +279,10 @@@ void tcf_block_put(struct tcf_block *bl
         if (!block)
                 return;
   
-       list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+       list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
+               tcf_chain_flush(chain);
                 tcf_chain_destroy(chain);
+       }
         kfree(block);
   }
   EXPORT_SYMBOL(tcf_block_put);
@@@ -392,109 -415,6 +400,109 @@@ static struct tcf_proto *tcf_chain_tp_f
         return tp;
   }
   
+ +static int tcf_fill_node(struct net *net, struct sk_buff *skb,
+ +                       struct tcf_proto *tp, void *fh, u32 portid,
+ +                       u32 seq, u16 flags, int event)
+ +{
+ +      struct tcmsg *tcm;
+ +      struct nlmsghdr  *nlh;
+ +      unsigned char *b = skb_tail_pointer(skb);
+ +
+ +      nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+ +      if (!nlh)
+ +              goto out_nlmsg_trim;
+ +      tcm = nlmsg_data(nlh);
+ +      tcm->tcm_family = AF_UNSPEC;
+ +      tcm->tcm__pad1 = 0;
+ +      tcm->tcm__pad2 = 0;
+ +      tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
+ +      tcm->tcm_parent = tp->classid;
+ +      tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
+ +      if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
+ +              goto nla_put_failure;
+ +      if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
+ +              goto nla_put_failure;
+ +      if (!fh) {
+ +              tcm->tcm_handle = 0;
+ +      } else {
+ +              if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+ +                      goto nla_put_failure;
+ +      }
+ +      nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ +      return skb->len;
+ +
+ +out_nlmsg_trim:
+ +nla_put_failure:
+ +      nlmsg_trim(skb, b);
+ +      return -1;
+ +}
+ +
+ +static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+ +                        struct nlmsghdr *n, struct tcf_proto *tp,
+ +                        void *fh, int event, bool unicast)
+ +{
+ +      struct sk_buff *skb;
+ +      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ +
+ +      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ +      if (!skb)
+ +              return -ENOBUFS;
+ +
+ +      if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+ +                        n->nlmsg_flags, event) <= 0) {
+ +              kfree_skb(skb);
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (unicast)
+ +              return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ +
+ +      return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ +                            n->nlmsg_flags & NLM_F_ECHO);
+ +}
+ +
+ +static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
+ +                            struct nlmsghdr *n, struct tcf_proto *tp,
+ +                            void *fh, bool unicast, bool *last)
+ +{
+ +      struct sk_buff *skb;
+ +      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ +      int err;
+ +
+ +      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ +      if (!skb)
+ +              return -ENOBUFS;
+ +
+ +      if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+ +                        n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+ +              kfree_skb(skb);
+ +              return -EINVAL;
+ +      }
+ +
+ +      err = tp->ops->delete(tp, fh, last);
+ +      if (err) {
+ +              kfree_skb(skb);
+ +              return err;
+ +      }
+ +
+ +      if (unicast)
+ +              return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ +
+ +      return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ +                            n->nlmsg_flags & NLM_F_ECHO);
+ +}
+ +
+ +static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
+ +                               struct nlmsghdr *n,
+ +                               struct tcf_chain *chain, int event)
+ +{
+ +      struct tcf_proto *tp;
+ +
+ +      for (tp = rtnl_dereference(chain->filter_chain);
+ +           tp; tp = rtnl_dereference(tp->next))
+ +              tfilter_notify(net, oskb, n, tp, 0, event, false);
+ +}
+ +
   /* Add/change/delete/get a filter node */
   
   static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@@ -516,7 -436,7 +524,7 @@@
         struct tcf_proto *tp;
         const struct Qdisc_class_ops *cops;
         unsigned long cl;
- -      unsigned long fh;
+ +      void *fh;
         int err;
         int tp_created;
   
@@@ -586,7 -506,7 +594,7 @@@ replay
   
         /* Do we search for filter, attached to class? */
         if (TC_H_MIN(parent)) {
- -              cl = cops->get(q, parent);
+ +              cl = cops->find(q, parent);
                 if (cl == 0)
                         return -ENOENT;
         }
@@@ -655,7 -575,7 +663,7 @@@
   
         fh = tp->ops->get(tp, t->tcm_handle);
   
- -      if (fh == 0) {
+ +      if (!fh) {
                 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
                         tcf_chain_tp_remove(chain, &chain_info, tp);
                         tfilter_notify(net, skb, n, tp, fh,
@@@ -683,10 -603,11 +691,10 @@@
                         }
                         break;
                 case RTM_DELTFILTER:
- -                      err = tp->ops->delete(tp, fh, &last);
+ +                      err = tfilter_del_notify(net, skb, n, tp, fh, false,
+ +                                               &last);
                         if (err)
                                 goto errout;
- -                      tfilter_notify(net, skb, n, tp, t->tcm_handle,
- -                                     RTM_DELTFILTER, false);
                         if (last) {
                                 tcf_chain_tp_remove(chain, &chain_info, tp);
                                 tcf_proto_destroy(tp);
@@@ -716,19 -637,83 +724,19 @@@
   errout:
         if (chain)
                 tcf_chain_put(chain);
- -      if (cl)
- -              cops->put(q, cl);
         if (err == -EAGAIN)
                 /* Replay the request. */
                 goto replay;
         return err;
   }
   
- -static int tcf_fill_node(struct net *net, struct sk_buff *skb,
- -                       struct tcf_proto *tp, unsigned long fh, u32 portid,
- -                       u32 seq, u16 flags, int event)
- -{
- -      struct tcmsg *tcm;
- -      struct nlmsghdr  *nlh;
- -      unsigned char *b = skb_tail_pointer(skb);
- -
- -      nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
- -      if (!nlh)
- -              goto out_nlmsg_trim;
- -      tcm = nlmsg_data(nlh);
- -      tcm->tcm_family = AF_UNSPEC;
- -      tcm->tcm__pad1 = 0;
- -      tcm->tcm__pad2 = 0;
- -      tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
- -      tcm->tcm_parent = tp->classid;
- -      tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
- -      if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
- -              goto nla_put_failure;
- -      if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
- -              goto nla_put_failure;
- -      tcm->tcm_handle = fh;
- -      if (RTM_DELTFILTER != event) {
- -              tcm->tcm_handle = 0;
- -              if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
- -                      goto nla_put_failure;
- -      }
- -      nlh->nlmsg_len = skb_tail_pointer(skb) - b;
- -      return skb->len;
- -
- -out_nlmsg_trim:
- -nla_put_failure:
- -      nlmsg_trim(skb, b);
- -      return -1;
- -}
- -
- -static int tfilter_notify(struct net *net, struct sk_buff *oskb,
- -                        struct nlmsghdr *n, struct tcf_proto *tp,
- -                        unsigned long fh, int event, bool unicast)
- -{
- -      struct sk_buff *skb;
- -      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- -
- -      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- -      if (!skb)
- -              return -ENOBUFS;
- -
- -      if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
- -                        n->nlmsg_flags, event) <= 0) {
- -              kfree_skb(skb);
- -              return -EINVAL;
- -      }
- -
- -      if (unicast)
- -              return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
- -
- -      return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- -                            n->nlmsg_flags & NLM_F_ECHO);
- -}
- -
   struct tcf_dump_args {
         struct tcf_walker w;
         struct sk_buff *skb;
         struct netlink_callback *cb;
   };
   
- -static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
- -                       struct tcf_walker *arg)
+ +static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
   {
         struct tcf_dump_args *a = (void *)arg;
         struct net *net = sock_net(a->skb->sk);
@@@ -820,17 -805,17 +828,17 @@@ static int tc_dump_tfilter(struct sk_bu
                 goto out;
         cops = q->ops->cl_ops;
         if (!cops)
- -              goto errout;
+ +              goto out;
         if (!cops->tcf_block)
- -              goto errout;
+ +              goto out;
         if (TC_H_MIN(tcm->tcm_parent)) {
- -              cl = cops->get(q, tcm->tcm_parent);
+ +              cl = cops->find(q, tcm->tcm_parent);
                 if (cl == 0)
- -                      goto errout;
+ +                      goto out;
         }
         block = cops->tcf_block(q, cl);
         if (!block)
- -              goto errout;
+ +              goto out;
   
         index_start = cb->args[0];
         index = 0;
@@@ -845,6 -830,9 +853,6 @@@
   
         cb->args[0] = index;
   
- -errout:
- -      if (cl)
- -              cops->put(q, cl);
   out:
         return skb->len;
   }
@@@ -903,12 -891,18 +911,12 @@@ int tcf_exts_validate(struct net *net, 
   }
   EXPORT_SYMBOL(tcf_exts_validate);
   
- -void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
- -                   struct tcf_exts *src)
+ +void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
   {
   #ifdef CONFIG_NET_CLS_ACT
         struct tcf_exts old = *dst;
   
- -      tcf_tree_lock(tp);
- -      dst->nr_actions = src->nr_actions;
- -      dst->actions = src->actions;
- -      dst->type = src->type;
- -      tcf_tree_unlock(tp);
- -
+ +      *dst = *src;
         tcf_exts_destroy(&old);
   #endif
   }
@@@ -929,7 -923,7 +937,7 @@@ int tcf_exts_dump(struct sk_buff *skb, 
   #ifdef CONFIG_NET_CLS_ACT
         struct nlattr *nest;
   
- -      if (exts->action && exts->nr_actions) {
+ +      if (exts->action && tcf_exts_has_actions(exts)) {
                 /*
                  * again for backward compatible mode - we want
                  * to work with both old and new modes of entering
@@@ -986,7 -980,7 +994,7 @@@ int tcf_exts_get_dev(struct net_device 
         const struct tc_action *a;
         LIST_HEAD(actions);
   
- -      if (tc_no_actions(exts))
+ +      if (!tcf_exts_has_actions(exts))
                 return -EINVAL;
   
         tcf_exts_to_list(exts, &actions);
@@@ -1005,10 -999,10 +1013,10 @@@ EXPORT_SYMBOL(tcf_exts_get_dev)
   
   static int __init tc_filter_init(void)
   {
- -      rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
- -      rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
+ +      rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
+ +      rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
         rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
- -                    tc_dump_tfilter, NULL);
+ +                    tc_dump_tfilter, 0);
   
         return 0;
   }
diff --combined net/sched/sch_api.c

index 929b024f41ba02dcf3296e13f5442e1d787f1d82,4fb5a3222d0d324167f079f755be14eb028b4a50..c6deb74e3d2f4a007554b9cf78e4ddf7b7b84535
--- 1/net/sched/sch_api.c
--- 2/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@@ -35,7 -35,13 +35,7 @@@
   #include <net/sock.h>
   #include <net/netlink.h>
   #include <net/pkt_sched.h>
- -
- -static int qdisc_notify(struct net *net, struct sk_buff *oskb,
- -                      struct nlmsghdr *n, u32 clid,
- -                      struct Qdisc *old, struct Qdisc *new);
- -static int tclass_notify(struct net *net, struct sk_buff *oskb,
- -                       struct nlmsghdr *n, struct Qdisc *q,
- -                       unsigned long cl, int event);
+ +#include <net/pkt_cls.h>
   
   /*
   
@@@ -154,7 -160,7 +154,7 @@@ int register_qdisc(struct Qdisc_ops *qo
         if (qops->cl_ops) {
                 const struct Qdisc_class_ops *cops = qops->cl_ops;
   
- -              if (!(cops->get && cops->put && cops->walk && cops->leaf))
+ +              if (!(cops->find && cops->walk && cops->leaf))
                         goto out_einval;
   
                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
@@@ -321,11 -327,12 +321,11 @@@ static struct Qdisc *qdisc_leaf(struct 
   
         if (cops == NULL)
                 return NULL;
- -      cl = cops->get(p, classid);
+ +      cl = cops->find(p, classid);
   
         if (cl == 0)
                 return NULL;
         leaf = cops->leaf(p, cl);
- -      cops->put(p, cl);
         return leaf;
   }
   
@@@ -614,10 -621,14 +614,10 @@@ EXPORT_SYMBOL(qdisc_watchdog_cancel)
   
   static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
   {
- -      unsigned int size = n * sizeof(struct hlist_head), i;
         struct hlist_head *h;
+ +      unsigned int i;
   
- -      if (size <= PAGE_SIZE)
- -              h = kmalloc(size, GFP_KERNEL);
- -      else
- -              h = (struct hlist_head *)
- -                      __get_free_pages(GFP_KERNEL, get_order(size));
+ +      h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
   
         if (h != NULL) {
                 for (i = 0; i < n; i++)
@@@ -626,6 -637,16 +626,6 @@@
         return h;
   }
   
- -static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
- -{
- -      unsigned int size = n * sizeof(struct hlist_head);
- -
- -      if (size <= PAGE_SIZE)
- -              kfree(h);
- -      else
- -              free_pages((unsigned long)h, get_order(size));
- -}
- -
   void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
   {
         struct Qdisc_class_common *cl;
@@@ -658,7 -679,7 +658,7 @@@
         clhash->hashmask = nmask;
         sch_tree_unlock(sch);
   
- -      qdisc_class_hash_free(ohash, osize);
+ +      kvfree(ohash);
   }
   EXPORT_SYMBOL(qdisc_class_hash_grow);
   
@@@ -678,7 -699,7 +678,7 @@@ EXPORT_SYMBOL(qdisc_class_hash_init)
   
   void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
   {
- -      qdisc_class_hash_free(clhash->hash, clhash->hashsize);
+ +      kvfree(clhash->hash);
   }
   EXPORT_SYMBOL(qdisc_class_hash_destroy);
   
@@@ -728,7 -749,6 +728,7 @@@ void qdisc_tree_reduce_backlog(struct Q
         const struct Qdisc_class_ops *cops;
         unsigned long cl;
         u32 parentid;
+ +      bool notify;
         int drops;
   
         if (n == 0 && len == 0)
@@@ -741,13 -761,6 +741,13 @@@
   
                 if (sch->flags & TCQ_F_NOPARENT)
                         break;
+ +              /* Notify parent qdisc only if child qdisc becomes empty.
+ +               *
+ +               * If child was empty even before update then backlog
+ +               * counter is screwed and we skip notification because
+ +               * parent class is already passive.
+ +               */
+ +              notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
                 /* TODO: perform the search on a per txq basis */
                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
                 if (sch == NULL) {
@@@ -755,9 -768,10 +755,9 @@@
                         break;
                 }
                 cops = sch->ops->cl_ops;
- -              if (cops->qlen_notify) {
- -                      cl = cops->get(sch, parentid);
+ +              if (notify && cops->qlen_notify) {
+ +                      cl = cops->find(sch, parentid);
                         cops->qlen_notify(sch, cl);
- -                      cops->put(sch, cl);
                 }
                 sch->q.qlen -= n;
                 sch->qstats.backlog -= len;
@@@ -767,111 -781,6 +767,111 @@@
   }
   EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
   
+ +static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
+ +                       u32 portid, u32 seq, u16 flags, int event)
+ +{
+ +      struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+ +      struct gnet_stats_queue __percpu *cpu_qstats = NULL;
+ +      struct tcmsg *tcm;
+ +      struct nlmsghdr  *nlh;
+ +      unsigned char *b = skb_tail_pointer(skb);
+ +      struct gnet_dump d;
+ +      struct qdisc_size_table *stab;
+ +      __u32 qlen;
+ +
+ +      cond_resched();
+ +      nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+ +      if (!nlh)
+ +              goto out_nlmsg_trim;
+ +      tcm = nlmsg_data(nlh);
+ +      tcm->tcm_family = AF_UNSPEC;
+ +      tcm->tcm__pad1 = 0;
+ +      tcm->tcm__pad2 = 0;
+ +      tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+ +      tcm->tcm_parent = clid;
+ +      tcm->tcm_handle = q->handle;
+ +      tcm->tcm_info = refcount_read(&q->refcnt);
+ +      if (nla_put_string(skb, TCA_KIND, q->ops->id))
+ +              goto nla_put_failure;
+ +      if (q->ops->dump && q->ops->dump(q, skb) < 0)
+ +              goto nla_put_failure;
+ +      qlen = q->q.qlen;
+ +
+ +      stab = rtnl_dereference(q->stab);
+ +      if (stab && qdisc_dump_stab(skb, stab) < 0)
+ +              goto nla_put_failure;
+ +
+ +      if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+ +                                       NULL, &d, TCA_PAD) < 0)
+ +              goto nla_put_failure;
+ +
+ +      if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
+ +              goto nla_put_failure;
+ +
+ +      if (qdisc_is_percpu_stats(q)) {
+ +              cpu_bstats = q->cpu_bstats;
+ +              cpu_qstats = q->cpu_qstats;
+ +      }
+ +
+ +      if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+ +                                &d, cpu_bstats, &q->bstats) < 0 ||
+ +          gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+ +          gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
+ +              goto nla_put_failure;
+ +
+ +      if (gnet_stats_finish_copy(&d) < 0)
+ +              goto nla_put_failure;
+ +
+ +      nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ +      return skb->len;
+ +
+ +out_nlmsg_trim:
+ +nla_put_failure:
+ +      nlmsg_trim(skb, b);
+ +      return -1;
+ +}
+ +
+ +static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
+ +{
+ +      if (q->flags & TCQ_F_BUILTIN)
+ +              return true;
+ +      if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
+ +static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+ +                      struct nlmsghdr *n, u32 clid,
+ +                      struct Qdisc *old, struct Qdisc *new)
+ +{
+ +      struct sk_buff *skb;
+ +      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ +
+ +      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ +      if (!skb)
+ +              return -ENOBUFS;
+ +
+ +      if (old && !tc_qdisc_dump_ignore(old, false)) {
+ +              if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
+ +                                0, RTM_DELQDISC) < 0)
+ +                      goto err_out;
+ +      }
+ +      if (new && !tc_qdisc_dump_ignore(new, false)) {
+ +              if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
+ +                                old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+ +                      goto err_out;
+ +      }
+ +
+ +      if (skb->len)
+ +              return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ +                                    n->nlmsg_flags & NLM_F_ECHO);
+ +
+ +err_out:
+ +      kfree_skb(skb);
+ +      return -EINVAL;
+ +}
+ +
   static void notify_and_destroy(struct net *net, struct sk_buff *skb,
                                struct nlmsghdr *n, u32 clid,
                                struct Qdisc *old, struct Qdisc *new)
@@@ -927,7 -836,7 +927,7 @@@ static int qdisc_graft(struct net_devic
   
                         old = dev_graft_qdisc(dev_queue, new);
                         if (new && i > 0)
-                               refcount_inc(&new->refcnt);
+                               qdisc_refcount_inc(new);
   
                         if (!ingress)
                                 qdisc_destroy(old);
@@@ -938,7 -847,7 +938,7 @@@ skip
                         notify_and_destroy(net, skb, n, classid,
                                            dev->qdisc, new);
                         if (new && !new->ops->attach)
-                               refcount_inc(&new->refcnt);
+                               qdisc_refcount_inc(new);
                         dev->qdisc = new ? : &noop_qdisc;
   
                         if (new && new->ops->attach)
@@@ -954,11 -863,11 +954,11 @@@
   
                 err = -EOPNOTSUPP;
                 if (cops && cops->graft) {
- -                      unsigned long cl = cops->get(parent, classid);
- -                      if (cl) {
+ +                      unsigned long cl = cops->find(parent, classid);
+ +
+ +                      if (cl)
                                 err = cops->graft(parent, cl, new, &old);
- -                              cops->put(parent, cl);
- -                      } else
+ +                      else
                                 err = -ENOENT;
                 }
                 if (!err)
@@@ -1347,7 -1256,7 +1347,7 @@@ replay
                                 if (q == p ||
                                     (p && check_loop(q, p, 0)))
                                         return -ELOOP;
-                               refcount_inc(&q->refcnt);
+                               qdisc_refcount_inc(q);
                                 goto graft;
                         } else {
                                 if (!q)
@@@ -1439,6 -1348,111 +1439,6 @@@ graft
         return 0;
   }
   
- -static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
- -                       u32 portid, u32 seq, u16 flags, int event)
- -{
- -      struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
- -      struct gnet_stats_queue __percpu *cpu_qstats = NULL;
- -      struct tcmsg *tcm;
- -      struct nlmsghdr  *nlh;
- -      unsigned char *b = skb_tail_pointer(skb);
- -      struct gnet_dump d;
- -      struct qdisc_size_table *stab;
- -      __u32 qlen;
- -
- -      cond_resched();
- -      nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
- -      if (!nlh)
- -              goto out_nlmsg_trim;
- -      tcm = nlmsg_data(nlh);
- -      tcm->tcm_family = AF_UNSPEC;
- -      tcm->tcm__pad1 = 0;
- -      tcm->tcm__pad2 = 0;
- -      tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
- -      tcm->tcm_parent = clid;
- -      tcm->tcm_handle = q->handle;
- -      tcm->tcm_info = refcount_read(&q->refcnt);
- -      if (nla_put_string(skb, TCA_KIND, q->ops->id))
- -              goto nla_put_failure;
- -      if (q->ops->dump && q->ops->dump(q, skb) < 0)
- -              goto nla_put_failure;
- -      qlen = q->q.qlen;
- -
- -      stab = rtnl_dereference(q->stab);
- -      if (stab && qdisc_dump_stab(skb, stab) < 0)
- -              goto nla_put_failure;
- -
- -      if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- -                                       NULL, &d, TCA_PAD) < 0)
- -              goto nla_put_failure;
- -
- -      if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
- -              goto nla_put_failure;
- -
- -      if (qdisc_is_percpu_stats(q)) {
- -              cpu_bstats = q->cpu_bstats;
- -              cpu_qstats = q->cpu_qstats;
- -      }
- -
- -      if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
- -                                &d, cpu_bstats, &q->bstats) < 0 ||
- -          gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
- -          gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
- -              goto nla_put_failure;
- -
- -      if (gnet_stats_finish_copy(&d) < 0)
- -              goto nla_put_failure;
- -
- -      nlh->nlmsg_len = skb_tail_pointer(skb) - b;
- -      return skb->len;
- -
- -out_nlmsg_trim:
- -nla_put_failure:
- -      nlmsg_trim(skb, b);
- -      return -1;
- -}
- -
- -static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
- -{
- -      if (q->flags & TCQ_F_BUILTIN)
- -              return true;
- -      if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
- -              return true;
- -
- -      return false;
- -}
- -
- -static int qdisc_notify(struct net *net, struct sk_buff *oskb,
- -                      struct nlmsghdr *n, u32 clid,
- -                      struct Qdisc *old, struct Qdisc *new)
- -{
- -      struct sk_buff *skb;
- -      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- -
- -      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- -      if (!skb)
- -              return -ENOBUFS;
- -
- -      if (old && !tc_qdisc_dump_ignore(old, false)) {
- -              if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
- -                                0, RTM_DELQDISC) < 0)
- -                      goto err_out;
- -      }
- -      if (new && !tc_qdisc_dump_ignore(new, false)) {
- -              if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
- -                                old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
- -                      goto err_out;
- -      }
- -
- -      if (skb->len)
- -              return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- -                                    n->nlmsg_flags & NLM_F_ECHO);
- -
- -err_out:
- -      kfree_skb(skb);
- -      return -EINVAL;
- -}
- -
   static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
                               struct netlink_callback *cb,
                               int *q_idx_p, int s_q_idx, bool recur,
@@@ -1551,161 -1565,7 +1551,161 @@@ done
    *    Traffic classes manipulation.           *
    ************************************************/
   
+ +static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
+ +                        unsigned long cl,
+ +                        u32 portid, u32 seq, u16 flags, int event)
+ +{
+ +      struct tcmsg *tcm;
+ +      struct nlmsghdr  *nlh;
+ +      unsigned char *b = skb_tail_pointer(skb);
+ +      struct gnet_dump d;
+ +      const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+ +
+ +      cond_resched();
+ +      nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+ +      if (!nlh)
+ +              goto out_nlmsg_trim;
+ +      tcm = nlmsg_data(nlh);
+ +      tcm->tcm_family = AF_UNSPEC;
+ +      tcm->tcm__pad1 = 0;
+ +      tcm->tcm__pad2 = 0;
+ +      tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+ +      tcm->tcm_parent = q->handle;
+ +      tcm->tcm_handle = q->handle;
+ +      tcm->tcm_info = 0;
+ +      if (nla_put_string(skb, TCA_KIND, q->ops->id))
+ +              goto nla_put_failure;
+ +      if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
+ +              goto nla_put_failure;
+ +
+ +      if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+ +                                       NULL, &d, TCA_PAD) < 0)
+ +              goto nla_put_failure;
+ +
+ +      if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
+ +              goto nla_put_failure;
+ +
+ +      if (gnet_stats_finish_copy(&d) < 0)
+ +              goto nla_put_failure;
+ +
+ +      nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ +      return skb->len;
+ +
+ +out_nlmsg_trim:
+ +nla_put_failure:
+ +      nlmsg_trim(skb, b);
+ +      return -1;
+ +}
+ +
+ +static int tclass_notify(struct net *net, struct sk_buff *oskb,
+ +                       struct nlmsghdr *n, struct Qdisc *q,
+ +                       unsigned long cl, int event)
+ +{
+ +      struct sk_buff *skb;
+ +      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ +
+ +      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ +      if (!skb)
+ +              return -ENOBUFS;
+ +
+ +      if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
+ +              kfree_skb(skb);
+ +              return -EINVAL;
+ +      }
+ +
+ +      return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ +                            n->nlmsg_flags & NLM_F_ECHO);
+ +}
+ +
+ +static int tclass_del_notify(struct net *net,
+ +                           const struct Qdisc_class_ops *cops,
+ +                           struct sk_buff *oskb, struct nlmsghdr *n,
+ +                           struct Qdisc *q, unsigned long cl)
+ +{
+ +      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ +      struct sk_buff *skb;
+ +      int err = 0;
+ +
+ +      if (!cops->delete)
+ +              return -EOPNOTSUPP;
+ +
+ +      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ +      if (!skb)
+ +              return -ENOBUFS;
+ +
+ +      if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+ +                         RTM_DELTCLASS) < 0) {
+ +              kfree_skb(skb);
+ +              return -EINVAL;
+ +      }
+ +
+ +      err = cops->delete(q, cl);
+ +      if (err) {
+ +              kfree_skb(skb);
+ +              return err;
+ +      }
+ +
+ +      return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ +                            n->nlmsg_flags & NLM_F_ECHO);
+ +}
+ +
+ +#ifdef CONFIG_NET_CLS
+ +
+ +struct tcf_bind_args {
+ +      struct tcf_walker w;
+ +      u32 classid;
+ +      unsigned long cl;
+ +};
+ +
+ +static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
+ +{
+ +      struct tcf_bind_args *a = (void *)arg;
+ +
+ +      if (tp->ops->bind_class) {
+ +              tcf_tree_lock(tp);
+ +              tp->ops->bind_class(n, a->classid, a->cl);
+ +              tcf_tree_unlock(tp);
+ +      }
+ +      return 0;
+ +}
+ +
+ +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+ +                         unsigned long new_cl)
+ +{
+ +      const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+ +      struct tcf_block *block;
+ +      struct tcf_chain *chain;
+ +      unsigned long cl;
+ +
+ +      cl = cops->find(q, portid);
+ +      if (!cl)
+ +              return;
+ +      block = cops->tcf_block(q, cl);
+ +      if (!block)
+ +              return;
+ +      list_for_each_entry(chain, &block->chain_list, list) {
+ +              struct tcf_proto *tp;
+ +
+ +              for (tp = rtnl_dereference(chain->filter_chain);
+ +                   tp; tp = rtnl_dereference(tp->next)) {
+ +                      struct tcf_bind_args arg = {};
+ +
+ +                      arg.w.fn = tcf_node_bind;
+ +                      arg.classid = clid;
+ +                      arg.cl = new_cl;
+ +                      tp->ops->walk(tp, &arg.w);
+ +              }
+ +      }
+ +}
+ +
+ +#else
   
+ +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+ +                         unsigned long new_cl)
+ +{
+ +}
+ +
+ +#endif
   
   static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
                          struct netlink_ext_ack *extack)
@@@ -1796,7 -1656,7 +1796,7 @@@
                 clid = TC_H_MAKE(qid, clid);
   
         if (clid)
- -              cl = cops->get(q, clid);
+ +              cl = cops->find(q, clid);
   
         if (cl == 0) {
                 err = -ENOENT;
@@@ -1811,9 -1671,12 +1811,9 @@@
                                 goto out;
                         break;
                 case RTM_DELTCLASS:
- -                      err = -EOPNOTSUPP;
- -                      if (cops->delete)
- -                              err = cops->delete(q, cl);
- -                      if (err == 0)
- -                              tclass_notify(net, skb, n, q, cl,
- -                                            RTM_DELTCLASS);
+ +                      err = tclass_del_notify(net, cops, skb, n, q, cl);
+ +                      /* Unbind the class with flilters with 0 */
+ +                      tc_bind_tclass(q, portid, clid, 0);
                         goto out;
                 case RTM_GETTCLASS:
                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
@@@ -1828,16 -1691,83 +1828,16 @@@
         err = -EOPNOTSUPP;
         if (cops->change)
                 err = cops->change(q, clid, portid, tca, &new_cl);
- -      if (err == 0)
+ +      if (err == 0) {
                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
- -
+ +              /* We just create a new class, need to do reverse binding. */
+ +              if (cl != new_cl)
+ +                      tc_bind_tclass(q, portid, clid, new_cl);
+ +      }
   out:
- -      if (cl)
- -              cops->put(q, cl);
- -
         return err;
   }
   
- -
- -static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
- -                        unsigned long cl,
- -                        u32 portid, u32 seq, u16 flags, int event)
- -{
- -      struct tcmsg *tcm;
- -      struct nlmsghdr  *nlh;
- -      unsigned char *b = skb_tail_pointer(skb);
- -      struct gnet_dump d;
- -      const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
- -
- -      cond_resched();
- -      nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
- -      if (!nlh)
- -              goto out_nlmsg_trim;
- -      tcm = nlmsg_data(nlh);
- -      tcm->tcm_family = AF_UNSPEC;
- -      tcm->tcm__pad1 = 0;
- -      tcm->tcm__pad2 = 0;
- -      tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
- -      tcm->tcm_parent = q->handle;
- -      tcm->tcm_handle = q->handle;
- -      tcm->tcm_info = 0;
- -      if (nla_put_string(skb, TCA_KIND, q->ops->id))
- -              goto nla_put_failure;
- -      if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
- -              goto nla_put_failure;
- -
- -      if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- -                                       NULL, &d, TCA_PAD) < 0)
- -              goto nla_put_failure;
- -
- -      if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
- -              goto nla_put_failure;
- -
- -      if (gnet_stats_finish_copy(&d) < 0)
- -              goto nla_put_failure;
- -
- -      nlh->nlmsg_len = skb_tail_pointer(skb) - b;
- -      return skb->len;
- -
- -out_nlmsg_trim:
- -nla_put_failure:
- -      nlmsg_trim(skb, b);
- -      return -1;
- -}
- -
- -static int tclass_notify(struct net *net, struct sk_buff *oskb,
- -                       struct nlmsghdr *n, struct Qdisc *q,
- -                       unsigned long cl, int event)
- -{
- -      struct sk_buff *skb;
- -      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- -
- -      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- -      if (!skb)
- -              return -ENOBUFS;
- -
- -      if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
- -              kfree_skb(skb);
- -              return -EINVAL;
- -      }
- -
- -      return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- -                            n->nlmsg_flags & NLM_F_ECHO);
- -}
- -
   struct qdisc_dump_args {
         struct qdisc_walker     w;
         struct sk_buff          *skb;
@@@ -2019,14 -1949,14 +2019,14 @@@ static int __init pktsched_init(void
         register_qdisc(&mq_qdisc_ops);
         register_qdisc(&noqueue_qdisc_ops);
   
- -      rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
- -      rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
+ +      rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
+ +      rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
- -                    NULL);
- -      rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
- -      rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
+ +                    0);
+ +      rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
+ +      rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
- -                    NULL);
+ +                    0);
   
         return 0;
   }
diff --combined net/sched/sch_cbq.c

index 3ec8bec109bbee2014c8b2204906bd123c12fd58,156c8a33c6777a644c77b1adec9057b482bac109..dcef97fa804739df3ae3bc837e1938f3914d6d4f
--- 1/net/sched/sch_cbq.c
--- 2/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@@ -129,6 -129,7 +129,6 @@@ struct cbq_class 
         struct tcf_proto __rcu  *filter_list;
         struct tcf_block        *block;
   
- -      int                     refcnt;
         int                     filters;
   
         struct cbq_class        *defaults[TC_PRIO_MAX + 1];
@@@ -1138,6 -1139,13 +1138,13 @@@ static int cbq_init(struct Qdisc *sch, 
         struct tc_ratespec *r;
         int err;
   
+       qdisc_watchdog_init(&q->watchdog, sch);
+       hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+       q->delay_timer.function = cbq_undelay;
+ 
+       if (!opt)
+               return -EINVAL;
+ 
         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
         if (err < 0)
                 return err;
@@@ -1154,6 -1162,7 +1161,6 @@@
         if (err < 0)
                 goto put_rtab;
   
- -      q->link.refcnt = 1;
         q->link.sibling = &q->link;
         q->link.common.classid = sch->handle;
         q->link.qdisc = sch;
@@@ -1175,9 -1184,6 +1182,6 @@@
         q->link.avpkt = q->link.allot/2;
         q->link.minidle = -0x7FFFFFFF;
   
-       qdisc_watchdog_init(&q->watchdog, sch);
-       hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
-       q->delay_timer.function = cbq_undelay;
         q->toplevel = TC_CBQ_MAXLEVEL;
         q->now = psched_get_time();
   
@@@ -1383,14 -1389,20 +1387,14 @@@ static void cbq_qlen_notify(struct Qdis
   {
         struct cbq_class *cl = (struct cbq_class *)arg;
   
- -      if (cl->q->q.qlen == 0)
- -              cbq_deactivate_class(cl);
+ +      cbq_deactivate_class(cl);
   }
   
- -static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
+ +static unsigned long cbq_find(struct Qdisc *sch, u32 classid)
   {
         struct cbq_sched_data *q = qdisc_priv(sch);
- -      struct cbq_class *cl = cbq_class_lookup(q, classid);
   
- -      if (cl) {
- -              cl->refcnt++;
- -              return (unsigned long)cl;
- -      }
- -      return 0;
+ +      return (unsigned long)cbq_class_lookup(q, classid);
   }
   
   static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
@@@ -1436,6 -1448,25 +1440,6 @@@ static void cbq_destroy(struct Qdisc *s
         qdisc_class_hash_destroy(&q->clhash);
   }
   
- -static void cbq_put(struct Qdisc *sch, unsigned long arg)
- -{
- -      struct cbq_class *cl = (struct cbq_class *)arg;
- -
- -      if (--cl->refcnt == 0) {
- -#ifdef CONFIG_NET_CLS_ACT
- -              spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
- -              struct cbq_sched_data *q = qdisc_priv(sch);
- -
- -              spin_lock_bh(root_lock);
- -              if (q->rx_class == cl)
- -                      q->rx_class = NULL;
- -              spin_unlock_bh(root_lock);
- -#endif
- -
- -              cbq_destroy_class(sch, cl);
- -      }
- -}
- -
   static int
   cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
                  unsigned long *arg)
@@@ -1582,6 -1613,7 +1586,6 @@@
   
         cl->R_tab = rtab;
         rtab = NULL;
- -      cl->refcnt = 1;
         cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
         if (!cl->q)
                 cl->q = &noop_qdisc;
@@@ -1662,7 -1694,12 +1666,7 @@@ static int cbq_delete(struct Qdisc *sch
         cbq_rmprio(q, cl);
         sch_tree_unlock(sch);
   
- -      BUG_ON(--cl->refcnt == 0);
- -      /*
- -       * This shouldn't happen: we "hold" one cops->get() when called
- -       * from tc_ctl_tclass; the destroy method is done from cops->put().
- -       */
- -
+ +      cbq_destroy_class(sch, cl);
         return 0;
   }
   
@@@ -1728,7 -1765,8 +1732,7 @@@ static const struct Qdisc_class_ops cbq
         .graft          =       cbq_graft,
         .leaf           =       cbq_leaf,
         .qlen_notify    =       cbq_qlen_notify,
- -      .get            =       cbq_get,
- -      .put            =       cbq_put,
+ +      .find           =       cbq_find,
         .change         =       cbq_change_class,
         .delete         =       cbq_delete,
         .walk           =       cbq_walk,
diff --combined net/sched/sch_fq_codel.c

index 7699b50688cd6f2eec4d86d15f3559876e522f49,2c0c05f2cc34a9de51390c45f29dd8db810075c7..de3b57ceca7bd625c874fbece917c944aabc26d8
--- 1/net/sched/sch_fq_codel.c
--- 2/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@@ -491,10 -491,8 +491,8 @@@ static int fq_codel_init(struct Qdisc *
                 if (!q->flows)
                         return -ENOMEM;
                 q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL);
-               if (!q->backlogs) {
-                       kvfree(q->flows);
+               if (!q->backlogs)
                         return -ENOMEM;
-               }
                 for (i = 0; i < q->flows_cnt; i++) {
                         struct fq_codel_flow *flow = q->flows + i;
   
@@@ -579,7 -577,7 +577,7 @@@ static struct Qdisc *fq_codel_leaf(stru
         return NULL;
   }
   
- -static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid)
+ +static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid)
   {
         return 0;
   }
@@@ -592,7 -590,7 +590,7 @@@ static unsigned long fq_codel_bind(stru
         return 0;
   }
   
- -static void fq_codel_put(struct Qdisc *q, unsigned long cl)
+ +static void fq_codel_unbind(struct Qdisc *q, unsigned long cl)
   {
   }
   
@@@ -683,10 -681,11 +681,10 @@@ static void fq_codel_walk(struct Qdisc 
   
   static const struct Qdisc_class_ops fq_codel_class_ops = {
         .leaf           =       fq_codel_leaf,
- -      .get            =       fq_codel_get,
- -      .put            =       fq_codel_put,
+ +      .find           =       fq_codel_find,
         .tcf_block      =       fq_codel_tcf_block,
         .bind_tcf       =       fq_codel_bind,
- -      .unbind_tcf     =       fq_codel_put,
+ +      .unbind_tcf     =       fq_codel_unbind,
         .dump           =       fq_codel_dump_class,
         .dump_stats     =       fq_codel_dump_class_stats,
         .walk           =       fq_codel_walk,
diff --combined net/sched/sch_generic.c

index c6b89a34e8d2e8eecc7ad6d21950bd84335443a0,4ba6da5fb2546c35ad48fe1f3632df8ca9957b34..92237e75dbbc5e3e7dab124fb67baca98ae2160f
--- 1/net/sched/sch_generic.c
--- 2/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@@ -29,7 -29,6 +29,7 @@@
   #include <net/sch_generic.h>
   #include <net/pkt_sched.h>
   #include <net/dst.h>
+ +#include <trace/events/qdisc.h>
   
   /* Qdisc to use by default */
   const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
@@@ -127,7 -126,7 +127,7 @@@ static struct sk_buff *dequeue_skb(stru
                         q->q.qlen--;
                 } else
                         skb = NULL;
- -              return skb;
+ +              goto trace;
         }
         *validate = true;
         skb = q->skb_bad_txq;
@@@ -140,8 -139,7 +140,8 @@@
                         q->q.qlen--;
                         goto bulk;
                 }
- -              return NULL;
+ +              skb = NULL;
+ +              goto trace;
         }
         if (!(q->flags & TCQ_F_ONETXQUEUE) ||
             !netif_xmit_frozen_or_stopped(txq))
@@@ -153,8 -151,6 +153,8 @@@ bulk
                 else
                         try_bulk_dequeue_skb_slow(q, skb, packets);
         }
+ +trace:
+ +      trace_qdisc_dequeue(q, txq, *packets, skb);
         return skb;
   }
   
@@@ -789,7 -785,7 +789,7 @@@ static void attach_default_qdiscs(struc
             dev->priv_flags & IFF_NO_QUEUE) {
                 netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
                 dev->qdisc = txq->qdisc_sleeping;
-               refcount_inc(&dev->qdisc->refcnt);
+               qdisc_refcount_inc(dev->qdisc);
         } else {
                 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
                 if (qdisc) {
diff --combined net/sched/sch_hfsc.c

index 7c7820d0fdc7afbf5e38c6845b09de88515d7551,11ab8dace901534b23b8f376ac704f995dc6b66b..daaf214e5201919ca3681e1670ac1389cb7985a4
--- 1/net/sched/sch_hfsc.c
--- 2/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@@ -110,6 -110,7 +110,6 @@@ enum hfsc_class_flags 
   
   struct hfsc_class {
         struct Qdisc_class_common cl_common;
- -      unsigned int    refcnt;         /* usage count */
   
         struct gnet_stats_basic_packed bstats;
         struct gnet_stats_queue qstats;
@@@ -828,6 -829,28 +828,6 @@@ update_vf(struct hfsc_class *cl, unsign
         }
   }
   
- -static void
- -set_active(struct hfsc_class *cl, unsigned int len)
- -{
- -      if (cl->cl_flags & HFSC_RSC)
- -              init_ed(cl, len);
- -      if (cl->cl_flags & HFSC_FSC)
- -              init_vf(cl, len);
- -
- -}
- -
- -static void
- -set_passive(struct hfsc_class *cl)
- -{
- -      if (cl->cl_flags & HFSC_RSC)
- -              eltree_remove(cl);
- -
- -      /*
- -       * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
- -       * needs to be called explicitly to remove a class from vttree.
- -       */
- -}
- -
   static unsigned int
   qdisc_peek_len(struct Qdisc *sch)
   {
@@@ -1044,6 -1067,7 +1044,6 @@@ hfsc_change_class(struct Qdisc *sch, u3
                 hfsc_change_usc(cl, usc, 0);
   
         cl->cl_common.classid = classid;
- -      cl->refcnt    = 1;
         cl->sched     = q;
         cl->cl_parent = parent;
         cl->qdisc = qdisc_create_dflt(sch->dev_queue,
@@@ -1099,9 -1123,13 +1099,9 @@@ hfsc_delete_class(struct Qdisc *sch, un
         hfsc_purge_queue(sch, cl);
         qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
   
- -      BUG_ON(--cl->refcnt == 0);
- -      /*
- -       * This shouldn't happen: we "hold" one cops->get() when called
- -       * from tc_ctl_tclass; the destroy method is done from cops->put().
- -       */
- -
         sch_tree_unlock(sch);
+ +
+ +      hfsc_destroy_class(sch, cl);
         return 0;
   }
   
@@@ -1193,18 -1221,30 +1193,18 @@@ hfsc_qlen_notify(struct Qdisc *sch, uns
   {
         struct hfsc_class *cl = (struct hfsc_class *)arg;
   
- -      if (cl->qdisc->q.qlen == 0) {
- -              update_vf(cl, 0, 0);
- -              set_passive(cl);
- -      }
+ +      /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
+ +       * needs to be called explicitly to remove a class from vttree.
+ +       */
+ +      update_vf(cl, 0, 0);
+ +      if (cl->cl_flags & HFSC_RSC)
+ +              eltree_remove(cl);
   }
   
   static unsigned long
- -hfsc_get_class(struct Qdisc *sch, u32 classid)
- -{
- -      struct hfsc_class *cl = hfsc_find_class(classid, sch);
- -
- -      if (cl != NULL)
- -              cl->refcnt++;
- -
- -      return (unsigned long)cl;
- -}
- -
- -static void
- -hfsc_put_class(struct Qdisc *sch, unsigned long arg)
+ +hfsc_search_class(struct Qdisc *sch, u32 classid)
   {
- -      struct hfsc_class *cl = (struct hfsc_class *)arg;
- -
- -      if (--cl->refcnt == 0)
- -              hfsc_destroy_class(sch, cl);
+ +      return (unsigned long)hfsc_find_class(classid, sch);
   }
   
   static unsigned long
@@@ -1378,6 -1418,8 +1378,8 @@@ hfsc_init_qdisc(struct Qdisc *sch, stru
         struct tc_hfsc_qopt *qopt;
         int err;
   
+       qdisc_watchdog_init(&q->watchdog, sch);
+ 
         if (opt == NULL || nla_len(opt) < sizeof(*qopt))
                 return -EINVAL;
         qopt = nla_data(opt);
@@@ -1390,9 -1432,10 +1392,9 @@@
   
         err = tcf_block_get(&q->root.block, &q->root.filter_list);
         if (err)
-               goto err_tcf;
+               return err;
   
         q->root.cl_common.classid = sch->handle;
- -      q->root.refcnt  = 1;
         q->root.sched   = q;
         q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
                                           sch->handle);
@@@ -1407,13 -1450,7 +1409,7 @@@
         qdisc_class_hash_insert(&q->clhash, &q->root.cl_common);
         qdisc_class_hash_grow(sch, &q->clhash);
   
-       qdisc_watchdog_init(&q->watchdog, sch);
- 
         return 0;
- 
- err_tcf:
-       qdisc_class_hash_destroy(&q->clhash);
-       return err;
   }
   
   static int
@@@ -1544,12 -1581,7 +1540,12 @@@ hfsc_enqueue(struct sk_buff *skb, struc
         }
   
         if (cl->qdisc->q.qlen == 1) {
- -              set_active(cl, qdisc_pkt_len(skb));
+ +              unsigned int len = qdisc_pkt_len(skb);
+ +
+ +              if (cl->cl_flags & HFSC_RSC)
+ +                      init_ed(cl, len);
+ +              if (cl->cl_flags & HFSC_FSC)
+ +                      init_vf(cl, len);
                 /*
                  * If this is the first packet, isolate the head so an eventual
                  * head drop before the first dequeue operation has no chance
@@@ -1613,18 -1645,18 +1609,18 @@@ hfsc_dequeue(struct Qdisc *sch
         if (realtime)
                 cl->cl_cumul += qdisc_pkt_len(skb);
   
- -      if (cl->qdisc->q.qlen != 0) {
- -              if (cl->cl_flags & HFSC_RSC) {
+ +      if (cl->cl_flags & HFSC_RSC) {
+ +              if (cl->qdisc->q.qlen != 0) {
                         /* update ed */
                         next_len = qdisc_peek_len(cl->qdisc);
                         if (realtime)
                                 update_ed(cl, next_len);
                         else
                                 update_d(cl, next_len);
+ +              } else {
+ +                      /* the class becomes passive */
+ +                      eltree_remove(cl);
                 }
- -      } else {
- -              /* the class becomes passive */
- -              set_passive(cl);
         }
   
         qdisc_bstats_update(sch, skb);
@@@ -1640,7 -1672,8 +1636,7 @@@ static const struct Qdisc_class_ops hfs
         .graft          = hfsc_graft_class,
         .leaf           = hfsc_class_leaf,
         .qlen_notify    = hfsc_qlen_notify,
- -      .get            = hfsc_get_class,
- -      .put            = hfsc_put_class,
+ +      .find           = hfsc_search_class,
         .bind_tcf       = hfsc_bind_tcf,
         .unbind_tcf     = hfsc_unbind_tcf,
         .tcf_block      = hfsc_tcf_block,
diff --combined net/sched/sch_htb.c

index f955b59d3c7c4b3a1a765e0c1f2ec669c0bca247,5bf5177b2bd3f6aa1b0ba9e4e59a946e1c739e0a..7e148376ba528efabe5a53a09653f9161c264be7
--- 1/net/sched/sch_htb.c
--- 2/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@@ -107,6 -107,7 +107,6 @@@ struct htb_class 
         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
         struct tcf_block        *block;
         int                     filter_cnt;
- -      int                     refcnt;         /* usage count of this class */
   
         int                     level;          /* our level (see above) */
         unsigned int            children;
@@@ -192,10 -193,6 +192,10 @@@ static inline struct htb_class *htb_fin
         return container_of(clc, struct htb_class, common);
   }
   
+ +static unsigned long htb_search(struct Qdisc *sch, u32 handle)
+ +{
+ +      return (unsigned long)htb_find(handle, sch);
+ +}
   /**
    * htb_classify - classify a packet into class
    *
@@@ -1020,6 -1017,9 +1020,9 @@@ static int htb_init(struct Qdisc *sch, 
         int err;
         int i;
   
+       qdisc_watchdog_init(&q->watchdog, sch);
+       INIT_WORK(&q->work, htb_work_func);
+ 
         if (!opt)
                 return -EINVAL;
   
@@@ -1044,8 -1044,6 +1047,6 @@@
         for (i = 0; i < TC_HTB_NUMPRIO; i++)
                 INIT_LIST_HEAD(q->drops + i);
   
-       qdisc_watchdog_init(&q->watchdog, sch);
-       INIT_WORK(&q->work, htb_work_func);
         qdisc_skb_head_init(&q->direct_queue);
   
         if (tb[TCA_HTB_DIRECT_QLEN])
@@@ -1189,7 -1187,16 +1190,7 @@@ static void htb_qlen_notify(struct Qdis
   {
         struct htb_class *cl = (struct htb_class *)arg;
   
- -      if (cl->un.leaf.q->q.qlen == 0)
- -              htb_deactivate(qdisc_priv(sch), cl);
- -}
- -
- -static unsigned long htb_get(struct Qdisc *sch, u32 classid)
- -{
- -      struct htb_class *cl = htb_find(classid, sch);
- -      if (cl)
- -              cl->refcnt++;
- -      return (unsigned long)cl;
+ +      htb_deactivate(qdisc_priv(sch), cl);
   }
   
   static inline int htb_parent_last_child(struct htb_class *cl)
@@@ -1311,10 -1318,22 +1312,10 @@@ static int htb_delete(struct Qdisc *sch
         if (last_child)
                 htb_parent_to_leaf(q, cl, new_q);
   
- -      BUG_ON(--cl->refcnt == 0);
- -      /*
- -       * This shouldn't happen: we "hold" one cops->get() when called
- -       * from tc_ctl_tclass; the destroy method is done from cops->put().
- -       */
- -
         sch_tree_unlock(sch);
- -      return 0;
- -}
   
- -static void htb_put(struct Qdisc *sch, unsigned long arg)
- -{
- -      struct htb_class *cl = (struct htb_class *)arg;
- -
- -      if (--cl->refcnt == 0)
- -              htb_destroy_class(sch, cl);
+ +      htb_destroy_class(sch, cl);
+ +      return 0;
   }
   
   static int htb_change_class(struct Qdisc *sch, u32 classid,
@@@ -1405,6 -1424,7 +1406,6 @@@
                         }
                 }
   
- -              cl->refcnt = 1;
                 cl->children = 0;
                 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
                 RB_CLEAR_NODE(&cl->pq_node);
@@@ -1580,7 -1600,8 +1581,7 @@@ static const struct Qdisc_class_ops htb
         .graft          =       htb_graft,
         .leaf           =       htb_leaf,
         .qlen_notify    =       htb_qlen_notify,
- -      .get            =       htb_get,
- -      .put            =       htb_put,
+ +      .find           =       htb_search,
         .change         =       htb_change_class,
         .delete         =       htb_delete,
         .walk           =       htb_walk,
diff --combined net/sched/sch_multiq.c

index a5df979b624811e78ce07f228832face6f2c13b8,9c454f5d6c38820512485cceecbc06c9fa86f634..ff4fc3e0facd7d98b504b5315d7cfe7d0ffdf68a
--- 1/net/sched/sch_multiq.c
--- 2/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@@ -257,12 -257,7 +257,7 @@@ static int multiq_init(struct Qdisc *sc
         for (i = 0; i < q->max_bands; i++)
                 q->queues[i] = &noop_qdisc;
   
-       err = multiq_tune(sch, opt);
- 
-       if (err)
-               kfree(q->queues);
- 
-       return err;
+       return multiq_tune(sch, opt);
   }
   
   static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@@ -306,7 -301,7 +301,7 @@@ multiq_leaf(struct Qdisc *sch, unsigne
         return q->queues[band];
   }
   
- -static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+ +static unsigned long multiq_find(struct Qdisc *sch, u32 classid)
   {
         struct multiq_sched_data *q = qdisc_priv(sch);
         unsigned long band = TC_H_MIN(classid);
@@@ -319,11 -314,11 +314,11 @@@
   static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
                                  u32 classid)
   {
- -      return multiq_get(sch, classid);
+ +      return multiq_find(sch, classid);
   }
   
   
- -static void multiq_put(struct Qdisc *q, unsigned long cl)
+ +static void multiq_unbind(struct Qdisc *q, unsigned long cl)
   {
   }
   
@@@ -385,11 -380,12 +380,11 @@@ static struct tcf_block *multiq_tcf_blo
   static const struct Qdisc_class_ops multiq_class_ops = {
         .graft          =       multiq_graft,
         .leaf           =       multiq_leaf,
- -      .get            =       multiq_get,
- -      .put            =       multiq_put,
+ +      .find           =       multiq_find,
         .walk           =       multiq_walk,
         .tcf_block      =       multiq_tcf_block,
         .bind_tcf       =       multiq_bind,
- -      .unbind_tcf     =       multiq_put,
+ +      .unbind_tcf     =       multiq_unbind,
         .dump           =       multiq_dump_class,
         .dump_stats     =       multiq_dump_class_stats,
   };
diff --combined net/sched/sch_netem.c

index cf5aad0aabfcc5ed7ea28402e25b3ee1a5c027b5,14d1724e0dc436f49da643be8606be273ce22ebd..b1266e75ca43cf5a66b951ecabccfc5b24069444
--- 1/net/sched/sch_netem.c
--- 2/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@@ -933,11 -933,11 +933,11 @@@ static int netem_init(struct Qdisc *sch
         struct netem_sched_data *q = qdisc_priv(sch);
         int ret;
   
+       qdisc_watchdog_init(&q->watchdog, sch);
+ 
         if (!opt)
                 return -EINVAL;
   
-       qdisc_watchdog_init(&q->watchdog, sch);
- 
         q->loss_model = CLG_RANDOM;
         ret = netem_change(sch, opt);
         if (ret)
@@@ -1096,11 -1096,15 +1096,11 @@@ static struct Qdisc *netem_leaf(struct 
         return q->qdisc;
   }
   
- -static unsigned long netem_get(struct Qdisc *sch, u32 classid)
+ +static unsigned long netem_find(struct Qdisc *sch, u32 classid)
   {
         return 1;
   }
   
- -static void netem_put(struct Qdisc *sch, unsigned long arg)
- -{
- -}
- -
   static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
   {
         if (!walker->stop) {
@@@ -1116,7 -1120,8 +1116,7 @@@
   static const struct Qdisc_class_ops netem_class_ops = {
         .graft          =       netem_graft,
         .leaf           =       netem_leaf,
- -      .get            =       netem_get,
- -      .put            =       netem_put,
+ +      .find           =       netem_find,
         .walk           =       netem_walk,
         .dump           =       netem_dump_class,
   };
diff --combined net/sched/sch_sfq.c

index e0f029a887ac5dec95706624419c0a1354576baf,fc69fc5956e9d4d2dbfe645e4c25e83328517371..74ea863b824009acb94b956facc889dd80970edf
--- 1/net/sched/sch_sfq.c
--- 2/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@@ -292,7 -292,7 +292,7 @@@ static inline void slot_queue_add(struc
         slot->skblist_prev = skb;
   }
   
- -static unsigned int sfq_drop(struct Qdisc *sch)
+ +static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
   {
         struct sfq_sched_data *q = qdisc_priv(sch);
         sfq_index x, d = q->cur_depth;
@@@ -310,8 -310,9 +310,8 @@@ drop
                 slot->backlog -= len;
                 sfq_dec(q, x);
                 sch->q.qlen--;
- -              qdisc_qstats_drop(sch);
                 qdisc_qstats_backlog_dec(sch, skb);
- -              kfree_skb(skb);
+ +              qdisc_drop(skb, sch, to_free);
                 return len;
         }
   
@@@ -359,7 -360,7 +359,7 @@@ sfq_enqueue(struct sk_buff *skb, struc
         if (hash == 0) {
                 if (ret & __NET_XMIT_BYPASS)
                         qdisc_qstats_drop(sch);
- -              kfree_skb(skb);
+ +              __qdisc_drop(skb, to_free);
                 return ret;
         }
         hash--;
@@@ -464,7 -465,7 +464,7 @@@ enqueue
                 return NET_XMIT_SUCCESS;
   
         qlen = slot->qlen;
- -      dropped = sfq_drop(sch);
+ +      dropped = sfq_drop(sch, to_free);
         /* Return Congestion Notification only if we dropped a packet
          * from this flow.
          */
@@@ -627,8 -628,6 +627,8 @@@ static int sfq_change(struct Qdisc *sch
         struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
         unsigned int qlen, dropped = 0;
         struct red_parms *p = NULL;
+ +      struct sk_buff *to_free = NULL;
+ +      struct sk_buff *tail = NULL;
   
         if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
                 return -EINVAL;
@@@ -675,13 -674,8 +675,13 @@@
         }
   
         qlen = sch->q.qlen;
- -      while (sch->q.qlen > q->limit)
- -              dropped += sfq_drop(sch);
+ +      while (sch->q.qlen > q->limit) {
+ +              dropped += sfq_drop(sch, &to_free);
+ +              if (!tail)
+ +                      tail = to_free;
+ +      }
+ +
+ +      rtnl_kfree_skbs(to_free, tail);
         qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
   
         del_timer(&q->perturb_timer);
@@@ -722,13 -716,13 +722,13 @@@ static int sfq_init(struct Qdisc *sch, 
         int i;
         int err;
   
+       setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
+                              (unsigned long)sch);
+ 
         err = tcf_block_get(&q->block, &q->filter_list);
         if (err)
                 return err;
   
-       setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
-                              (unsigned long)sch);
- 
         for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
                 q->dep[i].next = i + SFQ_MAX_FLOWS;
                 q->dep[i].prev = i + SFQ_MAX_FLOWS;
@@@ -814,7 -808,7 +814,7 @@@ static struct Qdisc *sfq_leaf(struct Qd
         return NULL;
   }
   
- -static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
+ +static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
   {
         return 0;
   }
@@@ -827,7 -821,7 +827,7 @@@ static unsigned long sfq_bind(struct Qd
         return 0;
   }
   
- -static void sfq_put(struct Qdisc *q, unsigned long cl)
+ +static void sfq_unbind(struct Qdisc *q, unsigned long cl)
   {
   }
   
@@@ -891,10 -885,11 +891,10 @@@ static void sfq_walk(struct Qdisc *sch
   
   static const struct Qdisc_class_ops sfq_class_ops = {
         .leaf           =       sfq_leaf,
- -      .get            =       sfq_get,
- -      .put            =       sfq_put,
+ +      .find           =       sfq_find,
         .tcf_block      =       sfq_tcf_block,
         .bind_tcf       =       sfq_bind,
- -      .unbind_tcf     =       sfq_put,
+ +      .unbind_tcf     =       sfq_unbind,
         .dump           =       sfq_dump_class,
         .dump_stats     =       sfq_dump_class_stats,
         .walk           =       sfq_walk,
diff --combined net/sched/sch_tbf.c

index d5dba972ab06842da7a72cf0359dbed5af4dc3ab,493270f0d5b055fa07d4dee2b35ec9d40bddc3d0..120f4f36596786746b89a2832c125e1814d6fd9b
--- 1/net/sched/sch_tbf.c
--- 2/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@@ -425,12 -425,13 +425,13 @@@ static int tbf_init(struct Qdisc *sch, 
   {
         struct tbf_sched_data *q = qdisc_priv(sch);
   
+       qdisc_watchdog_init(&q->watchdog, sch);
+       q->qdisc = &noop_qdisc;
+ 
         if (opt == NULL)
                 return -EINVAL;
   
         q->t_c = ktime_get_ns();
-       qdisc_watchdog_init(&q->watchdog, sch);
-       q->qdisc = &noop_qdisc;
   
         return tbf_change(sch, opt);
   }
@@@ -510,11 -511,15 +511,11 @@@ static struct Qdisc *tbf_leaf(struct Qd
         return q->qdisc;
   }
   
- -static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
+ +static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
   {
         return 1;
   }
   
- -static void tbf_put(struct Qdisc *sch, unsigned long arg)
- -{
- -}
- -
   static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
   {
         if (!walker->stop) {
@@@ -530,7 -535,8 +531,7 @@@
   static const struct Qdisc_class_ops tbf_class_ops = {
         .graft          =       tbf_graft,
         .leaf           =       tbf_leaf,
- -      .get            =       tbf_get,
- -      .put            =       tbf_put,
+ +      .find           =       tbf_find,
         .walk           =       tbf_walk,
         .dump           =       tbf_dump_class,
   };
diff --combined net/sctp/socket.c

index c01af72cc603c794204db4b63dff001ae15360ba,8d760863bc411023835b20383620f38d14ee2df1..1b00a1e09b93e4106a38b4f6d45df7175e27598b
--- 1/net/sctp/socket.c
--- 2/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@@ -100,9 -100,8 +100,9 @@@ static int sctp_send_asconf(struct sctp
                             struct sctp_chunk *chunk);
   static int sctp_do_bind(struct sock *, union sctp_addr *, int);
   static int sctp_autobind(struct sock *sk);
- -static void sctp_sock_migrate(struct sock *, struct sock *,
- -                            struct sctp_association *, sctp_socket_type_t);
+ +static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+ +                            struct sctp_association *assoc,
+ +                            enum sctp_socket_type type);
   
   static unsigned long sctp_memory_pressure;
   static atomic_long_t sctp_memory_allocated;
@@@ -1056,7 -1055,7 +1056,7 @@@ static int __sctp_connect(struct sock *
         struct sctp_association *asoc2;
         struct sctp_transport *transport;
         union sctp_addr to;
- -      sctp_scope_t scope;
+ +      enum sctp_scope scope;
         long timeo;
         int err = 0;
         int addrcnt = 0;
@@@ -1594,8 -1593,7 +1594,8 @@@ static int sctp_error(struct sock *sk, 
    */
   /* BUG:  We do not implement the equivalent of sk_stream_wait_memory(). */
   
- -static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
+ +static int sctp_msghdr_parse(const struct msghdr *msg,
+ +                           struct sctp_cmsgs *cmsgs);
   
   static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
   {
@@@ -1611,8 -1609,8 +1611,8 @@@
         struct sctp_sndrcvinfo *sinfo;
         struct sctp_initmsg *sinit;
         sctp_assoc_t associd = 0;
- -      sctp_cmsgs_t cmsgs = { NULL };
- -      sctp_scope_t scope;
+ +      struct sctp_cmsgs cmsgs = { NULL };
+ +      enum sctp_scope scope;
         bool fill_sinfo_ttl = false, wait_connect = false;
         struct sctp_datamsg *datamsg;
         int msg_flags = msg->msg_flags;
@@@ -4540,8 -4538,7 +4540,7 @@@ int sctp_get_sctp_info(struct sock *sk
         info->sctpi_ictrlchunks = asoc->stats.ictrlchunks;
   
         prim = asoc->peer.primary_path;
-       memcpy(&info->sctpi_p_address, &prim->ipaddr,
-              sizeof(struct sockaddr_storage));
+       memcpy(&info->sctpi_p_address, &prim->ipaddr, sizeof(prim->ipaddr));
         info->sctpi_p_state = prim->state;
         info->sctpi_p_cwnd = prim->cwnd;
         info->sctpi_p_srtt = prim->srtt;
@@@ -7447,10 -7444,10 +7446,10 @@@ static int sctp_autobind(struct sock *s
    * msg_control
    * points here
    */
- -static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
+ +static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
   {
- -      struct cmsghdr *cmsg;
         struct msghdr *my_msg = (struct msghdr *)msg;
+ +      struct cmsghdr *cmsg;
   
         for_each_cmsghdr(cmsg, my_msg) {
                 if (!CMSG_OK(my_msg, cmsg))
@@@ -8087,7 -8084,7 +8086,7 @@@ static inline void sctp_copy_descendant
    */
   static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
                               struct sctp_association *assoc,
- -                            sctp_socket_type_t type)
+ +                            enum sctp_socket_type type)
   {
         struct sctp_sock *oldsp = sctp_sk(oldsk);
         struct sctp_sock *newsp = sctp_sk(newsk);
diff --combined net/tipc/bearer.c

index d49598f6002bc154182f4eb7c120942d67f11de0,89cd061c4468247cf761541ff1a2ca27f0836d6f..ac1d66d7e1fdddcfc53e2251e542c0c8e28ef6d8
--- 1/net/tipc/bearer.c
--- 2/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@@ -65,6 -65,8 +65,8 @@@ static struct tipc_bearer *bearer_get(s
   }
   
   static void bearer_disable(struct net *net, struct tipc_bearer *b);
+ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
+                          struct packet_type *pt, struct net_device *orig_dev);
   
   /**
    * tipc_media_find - locates specified media object by name
@@@ -365,6 -367,30 +367,6 @@@ static int tipc_reset_bearer(struct ne
         return 0;
   }
   
- -/* tipc_bearer_reset_all - reset all links on all bearers
- - */
- -void tipc_bearer_reset_all(struct net *net)
- -{
- -      struct tipc_bearer *b;
- -      int i;
- -
- -      for (i = 0; i < MAX_BEARERS; i++) {
- -              b = bearer_get(net, i);
- -              if (b)
- -                      clear_bit_unlock(0, &b->up);
- -      }
- -      for (i = 0; i < MAX_BEARERS; i++) {
- -              b = bearer_get(net, i);
- -              if (b)
- -                      tipc_reset_bearer(net, b);
- -      }
- -      for (i = 0; i < MAX_BEARERS; i++) {
- -              b = bearer_get(net, i);
- -              if (b)
- -                      test_and_set_bit_lock(0, &b->up);
- -      }
- -}
- -
   /**
    * bearer_disable
    *
@@@ -404,6 -430,10 +406,10 @@@ int tipc_enable_l2_media(struct net *ne
   
         /* Associate TIPC bearer with L2 bearer */
         rcu_assign_pointer(b->media_ptr, dev);
+       b->pt.dev = dev;
+       b->pt.type = htons(ETH_P_TIPC);
+       b->pt.func = tipc_l2_rcv_msg;
+       dev_add_pack(&b->pt);
         memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
         memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
         b->bcast_addr.media_id = b->media->type_id;
@@@ -423,6 -453,7 +429,7 @@@ void tipc_disable_l2_media(struct tipc_
         struct net_device *dev;
   
         dev = (struct net_device *)rtnl_dereference(b->media_ptr);
+       dev_remove_pack(&b->pt);
         RCU_INIT_POINTER(dev->tipc_ptr, NULL);
         synchronize_net();
         dev_put(dev);
@@@ -570,11 -601,12 +577,12 @@@ static int tipc_l2_rcv_msg(struct sk_bu
         struct tipc_bearer *b;
   
         rcu_read_lock();
-       b = rcu_dereference_rtnl(dev->tipc_ptr);
+       b = rcu_dereference_rtnl(dev->tipc_ptr) ?:
+               rcu_dereference_rtnl(orig_dev->tipc_ptr);
         if (likely(b && test_bit(0, &b->up) &&
                    (skb->pkt_type <= PACKET_MULTICAST))) {
                 skb->next = NULL;
-               tipc_rcv(dev_net(dev), skb, b);
+               tipc_rcv(dev_net(b->pt.dev), skb, b);
                 rcu_read_unlock();
                 return NET_RX_SUCCESS;
         }
@@@ -635,11 -667,6 +643,6 @@@ static int tipc_l2_device_event(struct 
         return NOTIFY_OK;
   }
   
- static struct packet_type tipc_packet_type __read_mostly = {
-       .type = htons(ETH_P_TIPC),
-       .func = tipc_l2_rcv_msg,
- };
- 
   static struct notifier_block notifier = {
         .notifier_call  = tipc_l2_device_event,
         .priority       = 0,
@@@ -647,19 -674,12 +650,12 @@@
   
   int tipc_bearer_setup(void)
   {
-       int err;
- 
-       err = register_netdevice_notifier(&notifier);
-       if (err)
-               return err;
-       dev_add_pack(&tipc_packet_type);
-       return 0;
+       return register_netdevice_notifier(&notifier);
   }
   
   void tipc_bearer_cleanup(void)
   {
         unregister_netdevice_notifier(&notifier);
-       dev_remove_pack(&tipc_packet_type);
   }
   
   void tipc_bearer_stop(struct net *net)
diff --combined net/tipc/bearer.h

index 865cb0901a20094a9abbf8eb66da4faac5539950,e07a55a80c18ba0f3c4f1187b7544faeca395a4e..42d6eeeb646ddca457aec269de1650b1269cb411
--- 1/net/tipc/bearer.h
--- 2/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@@ -131,6 -131,7 +131,7 @@@ struct tipc_media 
    * @name: bearer name (format = media:interface)
    * @media: ptr to media structure associated with bearer
    * @bcast_addr: media address used in broadcasting
+  * @pt: packet type for bearer
    * @rcu: rcu struct for tipc_bearer
    * @priority: default link priority for bearer
    * @window: default window size for bearer
@@@ -151,6 -152,7 +152,7 @@@ struct tipc_bearer 
         char name[TIPC_MAX_BEARER_NAME];
         struct tipc_media *media;
         struct tipc_media_addr bcast_addr;
+       struct packet_type pt;
         struct rcu_head rcu;
         u32 priority;
         u32 window;
@@@ -210,6 -212,7 +212,6 @@@ void tipc_bearer_remove_dest(struct ne
   struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
   int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id);
   struct tipc_media *tipc_media_find(const char *name);
- -void tipc_bearer_reset_all(struct net *net);
   int tipc_bearer_setup(void);
   void tipc_bearer_cleanup(void);
   void tipc_bearer_stop(struct net *net);
diff --combined net/tipc/node.c

index eb728397c810af8c63a1d5501226f04a40e6a65d,7dd22330a6b4bf9113e189c613a863fce13425a2..198dbc7adbe126cdb00d8e4508ff47a250f5b2f4
--- 1/net/tipc/node.c
--- 2/net/tipc/node.c
+++ b/net/tipc/node.c
@@@ -1126,8 -1126,8 +1126,8 @@@ int tipc_node_get_linkname(struct net *
                 strncpy(linkname, tipc_link_name(link), len);
                 err = 0;
         }
- exit:
         tipc_node_read_unlock(node);
+ exit:
         tipc_node_put(node);
         return err;
   }
@@@ -1284,7 -1284,7 +1284,7 @@@ static void tipc_node_bc_sync_rcv(struc
         rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
   
         if (rc & TIPC_LINK_DOWN_EVT) {
- -              tipc_bearer_reset_all(n->net);
+ +              tipc_node_reset_links(n);
                 return;
         }
   
@@@ -1351,9 -1351,15 +1351,9 @@@ static void tipc_node_bc_rcv(struct ne
         if (!skb_queue_empty(&be->inputq1))
                 tipc_node_mcast_rcv(n);
   
- -      if (rc & TIPC_LINK_DOWN_EVT) {
- -              /* Reception reassembly failure => reset all links to peer */
- -              if (!tipc_link_is_up(be->link))
- -                      tipc_node_reset_links(n);
- -
- -              /* Retransmission failure => reset all links to all peers */
- -              if (!tipc_link_is_up(tipc_bc_sndlink(net)))
- -                      tipc_bearer_reset_all(net);
- -      }
+ +      /* If reassembly or retransmission failure => reset all links to peer */
+ +      if (rc & TIPC_LINK_DOWN_EVT)
+ +              tipc_node_reset_links(n);
   
         tipc_node_put(n);
   }
@@@ -1551,6 -1557,8 +1551,8 @@@ void tipc_rcv(struct net *net, struct s
   
         /* Check/update node state before receiving */
         if (unlikely(skb)) {
+               if (unlikely(skb_linearize(skb)))
+                       goto discard;
                 tipc_node_write_lock(n);
                 if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) {
                         if (le->link) {
diff --combined net/xfrm/xfrm_policy.c

index cc0d783ccbad0b52713dae77d564468b966b9b3c,69b16ee327d9958769f09c66d54ace50889d6665..f06253969972aa3489e557faf1ef76f54b1eb3d3
--- 1/net/xfrm/xfrm_policy.c
--- 2/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@@ -24,7 -24,6 +24,7 @@@
   #include <linux/netfilter.h>
   #include <linux/module.h>
   #include <linux/cache.h>
+ +#include <linux/cpu.h>
   #include <linux/audit.h>
   #include <net/dst.h>
   #include <net/flow.h>
@@@ -45,8 -44,6 +45,8 @@@ struct xfrm_flo 
         u8 flags;
   };
   
+ +static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
+ +static struct work_struct *xfrm_pcpu_work __read_mostly;
   static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
   static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
                                                 __read_mostly;
@@@ -122,7 -119,7 +122,7 @@@ static const struct xfrm_policy_afinfo 
   struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
                                     const xfrm_address_t *saddr,
                                     const xfrm_address_t *daddr,
- -                                  int family)
+ +                                  int family, u32 mark)
   {
         const struct xfrm_policy_afinfo *afinfo;
         struct dst_entry *dst;
@@@ -131,7 -128,7 +131,7 @@@
         if (unlikely(afinfo == NULL))
                 return ERR_PTR(-EAFNOSUPPORT);
   
- -      dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
+ +      dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark);
   
         rcu_read_unlock();
   
@@@ -143,7 -140,7 +143,7 @@@ static inline struct dst_entry *xfrm_ds
                                                 int tos, int oif,
                                                 xfrm_address_t *prev_saddr,
                                                 xfrm_address_t *prev_daddr,
- -                                              int family)
+ +                                              int family, u32 mark)
   {
         struct net *net = xs_net(x);
         xfrm_address_t *saddr = &x->props.saddr;
@@@ -159,7 -156,7 +159,7 @@@
                 daddr = x->coaddr;
         }
   
- -      dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
+ +      dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark);
   
         if (!IS_ERR(dst)) {
                 if (prev_saddr != saddr)
@@@ -249,6 -246,36 +249,6 @@@ expired
         xfrm_pol_put(xp);
   }
   
- -static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
- -{
- -      struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
- -
- -      if (unlikely(pol->walk.dead))
- -              flo = NULL;
- -      else
- -              xfrm_pol_hold(pol);
- -
- -      return flo;
- -}
- -
- -static int xfrm_policy_flo_check(struct flow_cache_object *flo)
- -{
- -      struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
- -
- -      return !pol->walk.dead;
- -}
- -
- -static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
- -{
- -      xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
- -}
- -
- -static const struct flow_cache_ops xfrm_policy_fc_ops = {
- -      .get = xfrm_policy_flo_get,
- -      .check = xfrm_policy_flo_check,
- -      .delete = xfrm_policy_flo_delete,
- -};
- -
   /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
    * SPD calls.
    */
@@@ -271,6 -298,7 +271,6 @@@ struct xfrm_policy *xfrm_policy_alloc(s
                                 (unsigned long)policy);
                 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
                             (unsigned long)policy);
- -              policy->flo.ops = &xfrm_policy_fc_ops;
         }
         return policy;
   }
@@@ -770,6 -798,7 +770,6 @@@ int xfrm_policy_insert(int dir, struct 
         else
                 hlist_add_head(&policy->bydst, chain);
         __xfrm_policy_link(policy, dir);
- -      atomic_inc(&net->xfrm.flow_cache_genid);
   
         /* After previous checking, family can either be AF_INET or AF_INET6 */
         if (policy->family == AF_INET)
@@@ -975,8 -1004,6 +975,8 @@@ int xfrm_policy_flush(struct net *net, 
         }
         if (!cnt)
                 err = -ESRCH;
+ +      else
+ +              xfrm_policy_cache_flush();
   out:
         spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
         return err;
@@@ -1148,7 -1175,7 +1148,7 @@@ fail
   }
   
   static struct xfrm_policy *
- -__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+ +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
   {
   #ifdef CONFIG_XFRM_SUB_POLICY
         struct xfrm_policy *pol;
@@@ -1160,6 -1187,61 +1160,6 @@@
         return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
   }
   
- -static int flow_to_policy_dir(int dir)
- -{
- -      if (XFRM_POLICY_IN == FLOW_DIR_IN &&
- -          XFRM_POLICY_OUT == FLOW_DIR_OUT &&
- -          XFRM_POLICY_FWD == FLOW_DIR_FWD)
- -              return dir;
- -
- -      switch (dir) {
- -      default:
- -      case FLOW_DIR_IN:
- -              return XFRM_POLICY_IN;
- -      case FLOW_DIR_OUT:
- -              return XFRM_POLICY_OUT;
- -      case FLOW_DIR_FWD:
- -              return XFRM_POLICY_FWD;
- -      }
- -}
- -
- -static struct flow_cache_object *
- -xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
- -                 u8 dir, struct flow_cache_object *old_obj, void *ctx)
- -{
- -      struct xfrm_policy *pol;
- -
- -      if (old_obj)
- -              xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
- -
- -      pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
- -      if (IS_ERR_OR_NULL(pol))
- -              return ERR_CAST(pol);
- -
- -      /* Resolver returns two references:
- -       * one for cache and one for caller of flow_cache_lookup() */
- -      xfrm_pol_hold(pol);
- -
- -      return &pol->flo;
- -}
- -
- -static inline int policy_to_flow_dir(int dir)
- -{
- -      if (XFRM_POLICY_IN == FLOW_DIR_IN &&
- -          XFRM_POLICY_OUT == FLOW_DIR_OUT &&
- -          XFRM_POLICY_FWD == FLOW_DIR_FWD)
- -              return dir;
- -      switch (dir) {
- -      default:
- -      case XFRM_POLICY_IN:
- -              return FLOW_DIR_IN;
- -      case XFRM_POLICY_OUT:
- -              return FLOW_DIR_OUT;
- -      case XFRM_POLICY_FWD:
- -              return FLOW_DIR_FWD;
- -      }
- -}
- -
   static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
                                                  const struct flowi *fl, u16 family)
   {
@@@ -1179,7 -1261,7 +1179,7 @@@
                         }
                         err = security_xfrm_policy_lookup(pol->security,
                                                       fl->flowi_secid,
- -                                                    policy_to_flow_dir(dir));
+ +                                                    dir);
                         if (!err) {
                                 if (!xfrm_pol_hold_rcu(pol))
                                         goto again;
@@@ -1340,14 -1422,14 +1340,14 @@@ int __xfrm_sk_clone_policy(struct sock 
   
   static int
   xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
- -             xfrm_address_t *remote, unsigned short family)
+ +             xfrm_address_t *remote, unsigned short family, u32 mark)
   {
         int err;
         const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
   
         if (unlikely(afinfo == NULL))
                 return -EINVAL;
- -      err = afinfo->get_saddr(net, oif, local, remote);
+ +      err = afinfo->get_saddr(net, oif, local, remote, mark);
         rcu_read_unlock();
         return err;
   }
@@@ -1378,7 -1460,7 +1378,7 @@@ xfrm_tmpl_resolve_one(struct xfrm_polic
                         if (xfrm_addr_any(local, tmpl->encap_family)) {
                                 error = xfrm_get_saddr(net, fl->flowi_oif,
                                                        &tmp, remote,
- -                                                     tmpl->encap_family);
+ +                                                     tmpl->encap_family, 0);
                                 if (error)
                                         goto fail;
                                 local = &tmp;
@@@ -1463,6 -1545,58 +1463,6 @@@ static int xfrm_get_tos(const struct fl
         return tos;
   }
   
- -static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
- -{
- -      struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
- -      struct dst_entry *dst = &xdst->u.dst;
- -
- -      if (xdst->route == NULL) {
- -              /* Dummy bundle - if it has xfrms we were not
- -               * able to build bundle as template resolution failed.
- -               * It means we need to try again resolving. */
- -              if (xdst->num_xfrms > 0)
- -                      return NULL;
- -      } else if (dst->flags & DST_XFRM_QUEUE) {
- -              return NULL;
- -      } else {
- -              /* Real bundle */
- -              if (stale_bundle(dst))
- -                      return NULL;
- -      }
- -
- -      dst_hold(dst);
- -      return flo;
- -}
- -
- -static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
- -{
- -      struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
- -      struct dst_entry *dst = &xdst->u.dst;
- -
- -      if (!xdst->route)
- -              return 0;
- -      if (stale_bundle(dst))
- -              return 0;
- -
- -      return 1;
- -}
- -
- -static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
- -{
- -      struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
- -      struct dst_entry *dst = &xdst->u.dst;
- -
- -      /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- -      dst->obsolete = DST_OBSOLETE_DEAD;
- -      dst_release_immediate(dst);
- -}
- -
- -static const struct flow_cache_ops xfrm_bundle_fc_ops = {
- -      .get = xfrm_bundle_flo_get,
- -      .check = xfrm_bundle_flo_check,
- -      .delete = xfrm_bundle_flo_delete,
- -};
- -
   static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
   {
         const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@@ -1490,6 -1624,7 +1490,6 @@@
                 struct dst_entry *dst = &xdst->u.dst;
   
                 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
- -              xdst->flo.ops = &xfrm_bundle_fc_ops;
         } else
                 xdst = ERR_PTR(-ENOBUFS);
   
@@@ -1598,8 -1733,7 +1598,8 @@@ static struct dst_entry *xfrm_bundle_cr
                 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
                         family = xfrm[i]->props.family;
                         dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
- -                                            &saddr, &daddr, family);
+ +                                            &saddr, &daddr, family,
+ +                                            xfrm[i]->props.output_mark);
                         err = PTR_ERR(dst);
                         if (IS_ERR(dst))
                                 goto put_states;
@@@ -1706,102 -1840,6 +1706,102 @@@ static int xfrm_expand_policies(const s
   
   }
   
+ +static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
+ +{
+ +      this_cpu_write(xfrm_last_dst, xdst);
+ +      if (old)
+ +              dst_release(&old->u.dst);
+ +}
+ +
+ +static void __xfrm_pcpu_work_fn(void)
+ +{
+ +      struct xfrm_dst *old;
+ +
+ +      old = this_cpu_read(xfrm_last_dst);
+ +      if (old && !xfrm_bundle_ok(old))
+ +              xfrm_last_dst_update(NULL, old);
+ +}
+ +
+ +static void xfrm_pcpu_work_fn(struct work_struct *work)
+ +{
+ +      local_bh_disable();
+ +      rcu_read_lock();
+ +      __xfrm_pcpu_work_fn();
+ +      rcu_read_unlock();
+ +      local_bh_enable();
+ +}
+ +
+ +void xfrm_policy_cache_flush(void)
+ +{
+ +      struct xfrm_dst *old;
+ +      bool found = 0;
+ +      int cpu;
+ +
+ +      local_bh_disable();
+ +      rcu_read_lock();
+ +      for_each_possible_cpu(cpu) {
+ +              old = per_cpu(xfrm_last_dst, cpu);
+ +              if (old && !xfrm_bundle_ok(old)) {
+ +                      if (smp_processor_id() == cpu) {
+ +                              __xfrm_pcpu_work_fn();
+ +                              continue;
+ +                      }
+ +                      found = true;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      rcu_read_unlock();
+ +      local_bh_enable();
+ +
+ +      if (!found)
+ +              return;
+ +
+ +      get_online_cpus();
+ +
+ +      for_each_possible_cpu(cpu) {
+ +              bool bundle_release;
+ +
+ +              rcu_read_lock();
+ +              old = per_cpu(xfrm_last_dst, cpu);
+ +              bundle_release = old && !xfrm_bundle_ok(old);
+ +              rcu_read_unlock();
+ +
+ +              if (!bundle_release)
+ +                      continue;
+ +
+ +              if (cpu_online(cpu)) {
+ +                      schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
+ +                      continue;
+ +              }
+ +
+ +              rcu_read_lock();
+ +              old = per_cpu(xfrm_last_dst, cpu);
+ +              if (old && !xfrm_bundle_ok(old)) {
+ +                      per_cpu(xfrm_last_dst, cpu) = NULL;
+ +                      dst_release(&old->u.dst);
+ +              }
+ +              rcu_read_unlock();
+ +      }
+ +
+ +      put_online_cpus();
+ +}
+ +
+ +static bool xfrm_pol_dead(struct xfrm_dst *xdst)
+ +{
+ +      unsigned int num_pols = xdst->num_pols;
+ +      unsigned int pol_dead = 0, i;
+ +
+ +      for (i = 0; i < num_pols; i++)
+ +              pol_dead |= xdst->pols[i]->walk.dead;
+ +
+ +      /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
+ +      if (pol_dead)
+ +              xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
+ +
+ +      return pol_dead;
+ +}
+ +
   static struct xfrm_dst *
   xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
                                const struct flowi *fl, u16 family,
@@@ -1809,23 -1847,10 +1809,23 @@@
   {
         struct net *net = xp_net(pols[0]);
         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+ +      struct xfrm_dst *xdst, *old;
         struct dst_entry *dst;
- -      struct xfrm_dst *xdst;
         int err;
   
+ +      xdst = this_cpu_read(xfrm_last_dst);
+ +      if (xdst &&
+ +          xdst->u.dst.dev == dst_orig->dev &&
+ +          xdst->num_pols == num_pols &&
+ +          !xfrm_pol_dead(xdst) &&
+ +          memcmp(xdst->pols, pols,
+ +                 sizeof(struct xfrm_policy *) * num_pols) == 0 &&
+ +          xfrm_bundle_ok(xdst)) {
+ +              dst_hold(&xdst->u.dst);
+ +              return xdst;
+ +      }
+ +
+ +      old = xdst;
         /* Try to instantiate a bundle */
         err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
         if (err <= 0) {
@@@ -1846,9 -1871,6 +1846,9 @@@
         memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
         xdst->policy_genid = atomic_read(&pols[0]->genid);
   
+ +      atomic_set(&xdst->u.dst.__refcnt, 2);
+ +      xfrm_last_dst_update(xdst, old);
+ +
         return xdst;
   }
   
@@@ -2029,39 -2051,86 +2029,39 @@@ free_dst
         goto out;
   }
   
- -static struct flow_cache_object *
- -xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
- -                 struct flow_cache_object *oldflo, void *ctx)
+ +static struct xfrm_dst *
+ +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
   {
- -      struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
- -      struct xfrm_dst *xdst, *new_xdst;
- -      int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
- -
- -      /* Check if the policies from old bundle are usable */
- -      xdst = NULL;
- -      if (oldflo) {
- -              xdst = container_of(oldflo, struct xfrm_dst, flo);
- -              num_pols = xdst->num_pols;
- -              num_xfrms = xdst->num_xfrms;
- -              pol_dead = 0;
- -              for (i = 0; i < num_pols; i++) {
- -                      pols[i] = xdst->pols[i];
- -                      pol_dead |= pols[i]->walk.dead;
- -              }
- -              if (pol_dead) {
- -                      /* Mark DST_OBSOLETE_DEAD to fail the next
- -                       * xfrm_dst_check()
- -                       */
- -                      xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
- -                      dst_release_immediate(&xdst->u.dst);
- -                      xdst = NULL;
- -                      num_pols = 0;
- -                      num_xfrms = 0;
- -                      oldflo = NULL;
- -              }
- -      }
+ +      int num_pols = 0, num_xfrms = 0, err;
+ +      struct xfrm_dst *xdst;
   
         /* Resolve policies to use if we couldn't get them from
          * previous cache entry */
- -      if (xdst == NULL) {
- -              num_pols = 1;
- -              pols[0] = __xfrm_policy_lookup(net, fl, family,
- -                                             flow_to_policy_dir(dir));
- -              err = xfrm_expand_policies(fl, family, pols,
+ +      num_pols = 1;
+ +      pols[0] = xfrm_policy_lookup(net, fl, family, dir);
+ +      err = xfrm_expand_policies(fl, family, pols,
                                            &num_pols, &num_xfrms);
- -              if (err < 0)
- -                      goto inc_error;
- -              if (num_pols == 0)
- -                      return NULL;
- -              if (num_xfrms <= 0)
- -                      goto make_dummy_bundle;
- -      }
+ +      if (err < 0)
+ +              goto inc_error;
+ +      if (num_pols == 0)
+ +              return NULL;
+ +      if (num_xfrms <= 0)
+ +              goto make_dummy_bundle;
   
- -      new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+ +      xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
                                                   xflo->dst_orig);
- -      if (IS_ERR(new_xdst)) {
- -              err = PTR_ERR(new_xdst);
+ +      if (IS_ERR(xdst)) {
+ +              err = PTR_ERR(xdst);
                 if (err != -EAGAIN)
                         goto error;
- -              if (oldflo == NULL)
- -                      goto make_dummy_bundle;
- -              dst_hold(&xdst->u.dst);
- -              return oldflo;
- -      } else if (new_xdst == NULL) {
+ +              goto make_dummy_bundle;
+ +      } else if (xdst == NULL) {
                 num_xfrms = 0;
- -              if (oldflo == NULL)
- -                      goto make_dummy_bundle;
- -              xdst->num_xfrms = 0;
- -              dst_hold(&xdst->u.dst);
- -              return oldflo;
- -      }
- -
- -      /* Kill the previous bundle */
- -      if (xdst) {
- -              /* The policies were stolen for newly generated bundle */
- -              xdst->num_pols = 0;
- -              /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- -              xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
- -              dst_release_immediate(&xdst->u.dst);
+ +              goto make_dummy_bundle;
         }
   
- -      /* We do need to return one reference for original caller */
- -      dst_hold(&new_xdst->u.dst);
- -      return &new_xdst->flo;
+ +      return xdst;
   
   make_dummy_bundle:
         /* We found policies, but there's no bundles to instantiate:
@@@ -2077,12 -2146,17 +2077,12 @@@
         memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
   
         dst_hold(&xdst->u.dst);
- -      return &xdst->flo;
+ +      return xdst;
   
   inc_error:
         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
   error:
- -      if (xdst != NULL) {
- -              /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- -              xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
- -              dst_release_immediate(&xdst->u.dst);
- -      } else
- -              xfrm_pols_put(pols, num_pols);
+ +      xfrm_pols_put(pols, num_pols);
         return ERR_PTR(err);
   }
   
@@@ -2113,10 -2187,11 +2113,10 @@@ struct dst_entry *xfrm_lookup(struct ne
                               const struct sock *sk, int flags)
   {
         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
- -      struct flow_cache_object *flo;
         struct xfrm_dst *xdst;
         struct dst_entry *dst, *route;
         u16 family = dst_orig->ops->family;
- -      u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+ +      u8 dir = XFRM_POLICY_OUT;
         int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
   
         dst = NULL;
@@@ -2151,7 -2226,6 +2151,6 @@@
                                 goto no_transform;
                         }
   
-                       dst_hold(&xdst->u.dst);
                         route = xdst->route;
                 }
         }
@@@ -2167,13 -2241,15 +2166,13 @@@
                     !net->xfrm.policy_count[XFRM_POLICY_OUT])
                         goto nopol;
   
- -              flo = flow_cache_lookup(net, fl, family, dir,
- -                                      xfrm_bundle_lookup, &xflo);
- -              if (flo == NULL)
+ +              xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+ +              if (xdst == NULL)
                         goto nopol;
- -              if (IS_ERR(flo)) {
- -                      err = PTR_ERR(flo);
+ +              if (IS_ERR(xdst)) {
+ +                      err = PTR_ERR(xdst);
                         goto dropdst;
                 }
- -              xdst = container_of(flo, struct xfrm_dst, flo);
   
                 num_pols = xdst->num_pols;
                 num_xfrms = xdst->num_xfrms;
@@@ -2372,10 -2448,12 +2371,10 @@@ int __xfrm_policy_check(struct sock *sk
         int pi;
         int reverse;
         struct flowi fl;
- -      u8 fl_dir;
         int xerr_idx = -1;
   
         reverse = dir & ~XFRM_POLICY_MASK;
         dir &= XFRM_POLICY_MASK;
- -      fl_dir = policy_to_flow_dir(dir);
   
         if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
@@@ -2407,8 -2485,16 +2406,8 @@@
                 }
         }
   
- -      if (!pol) {
- -              struct flow_cache_object *flo;
- -
- -              flo = flow_cache_lookup(net, &fl, family, fl_dir,
- -                                      xfrm_policy_lookup, NULL);
- -              if (IS_ERR_OR_NULL(flo))
- -                      pol = ERR_CAST(flo);
- -              else
- -                      pol = container_of(flo, struct xfrm_policy, flo);
- -      }
+ +      if (!pol)
+ +              pol = xfrm_policy_lookup(net, &fl, family, dir);
   
         if (IS_ERR(pol)) {
                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@@ -2554,9 -2640,11 +2553,9 @@@ static struct dst_entry *xfrm_dst_check
          * notice.  That's what we are validating here via the
          * stale_bundle() check.
          *
- -       * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will
- -       * be marked on it.
          * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will
          * be marked on it.
- -       * Both will force stable_bundle() to fail on any xdst bundle with
+ +       * This will force stale_bundle() to fail on any xdst bundle with
          * this dst linked in it.
          */
         if (dst->obsolete < 0 && !stale_bundle(dst))
@@@ -2596,6 -2684,18 +2595,6 @@@ static struct dst_entry *xfrm_negative_
         return dst;
   }
   
- -void xfrm_garbage_collect(struct net *net)
- -{
- -      flow_cache_flush(net);
- -}
- -EXPORT_SYMBOL(xfrm_garbage_collect);
- -
- -void xfrm_garbage_collect_deferred(struct net *net)
- -{
- -      flow_cache_flush_deferred(net);
- -}
- -EXPORT_SYMBOL(xfrm_garbage_collect_deferred);
- -
   static void xfrm_init_pmtu(struct dst_entry *dst)
   {
         do {
@@@ -2933,9 -3033,14 +2932,9 @@@ static int __net_init xfrm_net_init(str
         rv = xfrm_sysctl_init(net);
         if (rv < 0)
                 goto out_sysctl;
- -      rv = flow_cache_init(net);
- -      if (rv < 0)
- -              goto out;
   
         return 0;
   
- -out:
- -      xfrm_sysctl_fini(net);
   out_sysctl:
         xfrm_policy_fini(net);
   out_policy:
@@@ -2948,6 -3053,7 +2947,6 @@@ out_statistics
   
   static void __net_exit xfrm_net_exit(struct net *net)
   {
- -      flow_cache_fini(net);
         xfrm_sysctl_fini(net);
         xfrm_policy_fini(net);
         xfrm_state_fini(net);
@@@ -2961,15 -3067,7 +2960,15 @@@ static struct pernet_operations __net_i
   
   void __init xfrm_init(void)
   {
- -      flow_cache_hp_init();
+ +      int i;
+ +
+ +      xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
+ +                                     GFP_KERNEL);
+ +      BUG_ON(!xfrm_pcpu_work);
+ +
+ +      for (i = 0; i < NR_CPUS; i++)
+ +              INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
+ +
         register_pernet_subsys(&xfrm_net_ops);
         seqcount_init(&xfrm_policy_hash_generation);
         xfrm_input_init();
@@@ -3209,9 -3307,15 +3208,15 @@@ int xfrm_migrate(const struct xfrm_sele
         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
         struct xfrm_migrate *mp;
   
+       /* Stage 0 - sanity checks */
         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
                 goto out;
   
+       if (dir >= XFRM_POLICY_MAX) {
+               err = -EINVAL;
+               goto out;
+       }
+ 
         /* Stage 1 - find policy */
         if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
                 err = -ENOENT;
diff --combined net/xfrm/xfrm_state.c

index a41e2ef789c025ce5b4612208223a7fd65d5e9d4,a792effdb0b5d51fb88835349a44a756d3a9e5e7..0dab1cd79ce4d1afe84ba9422a740689a9ebdf71
--- 1/net/xfrm/xfrm_state.c
--- 2/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@@ -296,14 -296,12 +296,14 @@@ int xfrm_unregister_type_offload(const 
   }
   EXPORT_SYMBOL(xfrm_unregister_type_offload);
   
- -static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family)
+ +static const struct xfrm_type_offload *
+ +xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
   {
         struct xfrm_state_afinfo *afinfo;
         const struct xfrm_type_offload **typemap;
         const struct xfrm_type_offload *type;
   
+ +retry:
         afinfo = xfrm_state_get_afinfo(family);
         if (unlikely(afinfo == NULL))
                 return NULL;
@@@ -313,12 -311,6 +313,12 @@@
         if ((type && !try_module_get(type->owner)))
                 type = NULL;
   
+ +      if (!type && try_load) {
+ +              request_module("xfrm-offload-%d-%d", family, proto);
+ +              try_load = 0;
+ +              goto retry;
+ +      }
+ +
         rcu_read_unlock();
         return type;
   }
@@@ -732,10 -724,9 +732,10 @@@ restart
                         }
                 }
         }
- -      if (cnt)
+ +      if (cnt) {
                 err = 0;
- -
+ +              xfrm_policy_cache_flush();
+ +      }
   out:
         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
         return err;
@@@ -1629,6 -1620,7 +1629,7 @@@ in
   xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
                unsigned short family, struct net *net)
   {
+       int i;
         int err = 0;
         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
         if (!afinfo)
@@@ -1637,6 -1629,9 +1638,9 @@@
         spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
         if (afinfo->tmpl_sort)
                 err = afinfo->tmpl_sort(dst, src, n);
+       else
+               for (i = 0; i < n; i++)
+                       dst[i] = src[i];
         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
         rcu_read_unlock();
         return err;
@@@ -1647,6 -1642,7 +1651,7 @@@ in
   xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
                 unsigned short family)
   {
+       int i;
         int err = 0;
         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
         struct net *net = xs_net(*src);
@@@ -1657,6 -1653,9 +1662,9 @@@
         spin_lock_bh(&net->xfrm.xfrm_state_lock);
         if (afinfo->state_sort)
                 err = afinfo->state_sort(dst, src, n);
+       else
+               for (i = 0; i < n; i++)
+                       dst[i] = src[i];
         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
         rcu_read_unlock();
         return err;
@@@ -2173,7 -2172,7 +2181,7 @@@ int xfrm_state_mtu(struct xfrm_state *x
         return mtu - x->props.header_len;
   }
   
- -int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
+ +int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
   {
         struct xfrm_state_afinfo *afinfo;
         struct xfrm_mode *inner_mode;
@@@ -2238,7 -2237,7 +2246,7 @@@
         if (x->type == NULL)
                 goto error;
   
- -      x->type_offload = xfrm_get_type_offload(x->id.proto, family);
+ +      x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
   
         err = x->type->init_state(x);
         if (err)
@@@ -2266,7 -2265,7 +2274,7 @@@ EXPORT_SYMBOL(__xfrm_init_state)
   
   int xfrm_init_state(struct xfrm_state *x)
   {
- -      return __xfrm_init_state(x, true);
+ +      return __xfrm_init_state(x, true, false);
   }
   
   EXPORT_SYMBOL(xfrm_init_state);
diff --combined net/xfrm/xfrm_user.c

index 490132d6dc36dd4c1060b8848ff7d7c1a362f5ae,9391ced0525986ce72938a9ed59c27ea124f7ba5..2bfbd9121e3b21b0eb793d2d3a685bd4cebde22b
--- 1/net/xfrm/xfrm_user.c
--- 2/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@@ -584,10 -584,7 +584,10 @@@ static struct xfrm_state *xfrm_state_co
   
         xfrm_mark_get(attrs, &x->mark);
   
- -      err = __xfrm_init_state(x, false);
+ +      if (attrs[XFRMA_OUTPUT_MARK])
+ +              x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
+ +
+ +      err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
         if (err)
                 goto error;
   
@@@ -799,7 -796,7 +799,7 @@@ static int copy_user_offload(struct xfr
                 return -EMSGSIZE;
   
         xuo = nla_data(attr);
- 
+       memset(xuo, 0, sizeof(*xuo));
         xuo->ifindex = xso->dev->ifindex;
         xuo->flags = xso->flags;
   
@@@ -900,11 -897,6 +900,11 @@@ static int copy_to_user_state_extra(str
                 ret = copy_user_offload(&x->xso, skb);
         if (ret)
                 goto out;
+ +      if (x->props.output_mark) {
+ +              ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark);
+ +              if (ret)
+ +                      goto out;
+ +      }
         if (x->security)
                 ret = copy_sec_ctx(x->security, skb);
   out:
@@@ -1823,6 -1815,8 +1823,6 @@@ static int xfrm_get_policy(struct sk_bu
   
   out:
         xfrm_pol_put(xp);
- -      if (delete && err == 0)
- -              xfrm_garbage_collect(net);
         return err;
   }
   
@@@ -1875,6 -1869,7 +1875,7 @@@ static int build_aevent(struct sk_buff 
                 return -EMSGSIZE;
   
         id = nlmsg_data(nlh);
+       memset(&id->sa_id, 0, sizeof(id->sa_id));
         memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr));
         id->sa_id.spi = x->id.spi;
         id->sa_id.family = x->props.family;
@@@ -2033,6 -2028,7 +2034,6 @@@ static int xfrm_flush_policy(struct sk_
                         return 0;
                 return err;
         }
- -      xfrm_garbage_collect(net);
   
         c.data.type = type;
         c.event = nlh->nlmsg_type;
@@@ -2462,7 -2458,6 +2463,7 @@@ static const struct nla_policy xfrma_po
         [XFRMA_PROTO]           = { .type = NLA_U8 },
         [XFRMA_ADDRESS_FILTER]  = { .len = sizeof(struct xfrm_address_filter) },
         [XFRMA_OFFLOAD_DEV]     = { .len = sizeof(struct xfrm_user_offload) },
+ +      [XFRMA_OUTPUT_MARK]     = { .len = NLA_U32 },
   };
   
   static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@@ -2584,6 -2579,8 +2585,8 @@@ static int build_expire(struct sk_buff 
         ue = nlmsg_data(nlh);
         copy_to_user_state(x, &ue->state);
         ue->hard = (c->data.hard != 0) ? 1 : 0;
+       /* clear the padding bytes */
+       memset(&ue->hard + 1, 0, sizeof(*ue) - offsetofend(typeof(*ue), hard));
   
         err = xfrm_mark_put(skb, &x->mark);
         if (err)
@@@ -2682,8 -2679,6 +2685,8 @@@ static inline size_t xfrm_sa_len(struc
                 l += nla_total_size(sizeof(x->props.extra_flags));
         if (x->xso.dev)
                  l += nla_total_size(sizeof(x->xso));
+ +      if (x->props.output_mark)
+ +              l += nla_total_size(sizeof(x->props.output_mark));
   
         /* Must count x->lastused as it may become non-zero behind our back. */
         l += nla_total_size_64bit(sizeof(u64));
@@@ -2723,6 -2718,7 +2726,7 @@@ static int xfrm_notify_sa(struct xfrm_s
                 struct nlattr *attr;
   
                 id = nlmsg_data(nlh);
+               memset(id, 0, sizeof(*id));
                 memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr));
                 id->spi = x->id.spi;
                 id->family = x->props.family;
author	David S. Miller <davem@davemloft.net>
	Sat, 2 Sep 2017 00:42:05 +0000 (17:42 -0700)
committer	David S. Miller <davem@davemloft.net>
	Sat, 2 Sep 2017 00:42:05 +0000 (17:42 -0700)
		1	2
drivers/net/dsa/bcm_sf2.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/dsa/bcm_sf2.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/aquantia/atlantic/aq_ring.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/aquantia/atlantic/aq_vec.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/broadcom/bcmsysport.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/broadcom/bnxt/bnxt.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/broadcom/genet/bcmgenet.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/faraday/ftgmac100.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/freescale/fman/mac.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/marvell/mvpp2.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx5/core/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlxsw/spectrum.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/netronome/nfp/flower/offload.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/netronome/nfp/nfp_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/netronome/nfp/nfp_net_common.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/netronome/nfp/nfp_net_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/qlogic/qlge/qlge_dbg.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/hyperv/netvsc_drv.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/phy/phy.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/phy/phy_device.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/usb/cdc_ncm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/virtio_net.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/wireless/intel/iwlwifi/pcie/internal.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/wireless/intel/iwlwifi/pcie/rx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/wireless/intel/iwlwifi/pcie/trans.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mlx5/driver.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/netdevice.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/skbuff.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/ip6_fib.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/sch_generic.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/tcp.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/udp.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/events/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/datagram.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/filter.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/skbuff.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/dsa/dsa2.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/dsa/tag_ksz.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/dsa/tag_trailer.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/esp4.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/esp4_offload.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp_cong.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/udp.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/addrconf.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/esp6.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/esp6_offload.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/ip6_fib.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/route.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/udp.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/kcm/kcmsock.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/packet/af_packet.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/cls_api.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/sch_api.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/sch_cbq.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/sch_fq_codel.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/sch_generic.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/sch_hfsc.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/sch_htb.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/sch_multiq.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/sch_netem.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/sch_sfq.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/sch_tbf.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sctp/socket.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/tipc/bearer.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/tipc/bearer.h	patch \|	diff1 \|	diff2 \|	blob \| history
net/tipc/node.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/xfrm/xfrm_policy.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/xfrm/xfrm_state.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/xfrm/xfrm_user.c	patch \|	diff1 \|	diff2 \|	blob \| history