Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next...

author David S. Miller <davem@davemloft.net>

Wed, 27 Mar 2019 18:19:13 +0000 (11:19 -0700)

committer David S. Miller <davem@davemloft.net>

Wed, 27 Mar 2019 18:19:13 +0000 (11:19 -0700)
author David S. Miller <davem@davemloft.net>
Wed, 27 Mar 2019 18:19:13 +0000 (11:19 -0700)
committer David S. Miller <davem@davemloft.net>
Wed, 27 Mar 2019 18:19:13 +0000 (11:19 -0700)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h

index ff0f4c503f534a792e22114af9395a81d896d4ef..67cce2736806d1054d75ab87ac211f93d210e273 100644 (file)
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -101,6 +101,7 @@
  #define MVPP2_CLS_FLOW_TBL1_REG                        0x1828
  #define     MVPP2_CLS_FLOW_TBL1_N_FIELDS_MASK  0x7
  #define     MVPP2_CLS_FLOW_TBL1_N_FIELDS(x)    (x)
+#define     MVPP2_CLS_FLOW_TBL1_LU_TYPE(lu)    (((lu) & 0x3f) << 3)
  #define     MVPP2_CLS_FLOW_TBL1_PRIO_MASK      0x3f
  #define     MVPP2_CLS_FLOW_TBL1_PRIO(x)                ((x) << 9)
  #define     MVPP2_CLS_FLOW_TBL1_SEQ_MASK       0x7
@@ -123,7 +124,10 @@
  #define MVPP22_CLS_C2_TCAM_DATA2               0x1b18
  #define MVPP22_CLS_C2_TCAM_DATA3               0x1b1c
  #define MVPP22_CLS_C2_TCAM_DATA4               0x1b20
+#define     MVPP22_CLS_C2_LU_TYPE(lu)          ((lu) & 0x3f)
  #define     MVPP22_CLS_C2_PORT_ID(port)                ((port) << 8)
+#define MVPP22_CLS_C2_TCAM_INV                 0x1b24
+#define     MVPP22_CLS_C2_TCAM_INV_BIT         BIT(31)
  #define MVPP22_CLS_C2_HIT_CTR                  0x1b50
  #define MVPP22_CLS_C2_ACT                      0x1b60
  #define     MVPP22_CLS_C2_ACT_RSS_EN(act)      (((act) & 0x3) << 19)
@@ -610,6 +614,8 @@
  #define MVPP2_BIT_TO_WORD(bit)         ((bit) / 32)
  #define MVPP2_BIT_IN_WORD(bit)         ((bit) % 32)
  
+#define MVPP2_N_PRS_FLOWS              52
+
  /* RSS constants */
  #define MVPP22_RSS_TABLE_ENTRIES       32
  
@@ -710,6 +716,7 @@ enum mvpp2_prs_l3_cast {
  #define MVPP2_DESC_DMA_MASK    DMA_BIT_MASK(40)
  
  /* Definitions */
+struct mvpp2_dbgfs_entries;
  
  /* Shared Packet Processor resources */
  struct mvpp2 {
@@ -771,6 +778,9 @@ struct mvpp2 {
  
         /* Debugfs root entry */
         struct dentry *dbgfs_dir;
+
+       /* Debugfs entries private data */
+       struct mvpp2_dbgfs_entries *dbgfs_entries;
  };
  
  struct mvpp2_pcpu_stats {
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c

index efdb7a65683576a84806639630fce4d0928defcd..1087974d3b98aefc295b14fe8c4e8546beb5421d 100644 (file)
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
@@ -22,7 +22,7 @@
         }                                                       \
  }
  
-static struct mvpp2_cls_flow cls_flows[MVPP2_N_FLOWS] = {
+static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
         /* TCP over IPv4 flows, Not fragmented, no vlan tag */
         MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
                        MVPP22_CLS_HEK_IP4_5T,
@@ -429,12 +429,6 @@ static void mvpp2_cls_flow_port_id_sel(struct mvpp2_cls_flow_entry *fe,
                 fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_PORT_ID_SEL;
  }
  
-static void mvpp2_cls_flow_seq_set(struct mvpp2_cls_flow_entry *fe, u32 seq)
-{
-       fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_SEQ(MVPP2_CLS_FLOW_TBL1_SEQ_MASK);
-       fe->data[1] |= MVPP2_CLS_FLOW_TBL1_SEQ(seq);
-}
-
  static void mvpp2_cls_flow_last_set(struct mvpp2_cls_flow_entry *fe,
                                     bool is_last)
  {
@@ -454,9 +448,16 @@ static void mvpp2_cls_flow_port_add(struct mvpp2_cls_flow_entry *fe,
         fe->data[0] |= MVPP2_CLS_FLOW_TBL0_PORT_ID(port);
  }
  
+static void mvpp2_cls_flow_lu_type_set(struct mvpp2_cls_flow_entry *fe,
+                                      u8 lu_type)
+{
+       fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK);
+       fe->data[1] |= MVPP2_CLS_FLOW_TBL1_LU_TYPE(lu_type);
+}
+
  /* Initialize the parser entry for the given flow */
  static void mvpp2_cls_flow_prs_init(struct mvpp2 *priv,
-                                   struct mvpp2_cls_flow *flow)
+                                   const struct mvpp2_cls_flow *flow)
  {
         mvpp2_prs_add_flow(priv, flow->flow_id, flow->prs_ri.ri,
                            flow->prs_ri.ri_mask);
@@ -464,7 +465,7 @@ static void mvpp2_cls_flow_prs_init(struct mvpp2 *priv,
  
  /* Initialize the Lookup Id table entry for the given flow */
  static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv,
-                                   struct mvpp2_cls_flow *flow)
+                                   const struct mvpp2_cls_flow *flow)
  {
         struct mvpp2_cls_lookup_entry le;
  
@@ -477,7 +478,7 @@ static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv,
         /* We point on the first lookup in the sequence for the flow, that is
          * the C2 lookup.
          */
-       le.data |= MVPP2_CLS_LKP_FLOW_PTR(MVPP2_FLOW_C2_ENTRY(flow->flow_id));
+       le.data |= MVPP2_CLS_LKP_FLOW_PTR(MVPP2_CLS_FLT_FIRST(flow->flow_id));
  
         /* CLS is always enabled, RSS is enabled/disabled in C2 lookup */
         le.data |= MVPP2_CLS_LKP_TBL_LOOKUP_EN_MASK;
@@ -485,21 +486,86 @@ static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv,
         mvpp2_cls_lookup_write(priv, &le);
  }
  
+static void mvpp2_cls_c2_write(struct mvpp2 *priv,
+                              struct mvpp2_cls_c2_entry *c2)
+{
+       u32 val;
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2->index);
+
+       val = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_INV);
+       if (c2->valid)
+               val &= ~MVPP22_CLS_C2_TCAM_INV_BIT;
+       else
+               val |= MVPP22_CLS_C2_TCAM_INV_BIT;
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_INV, val);
+
+       mvpp2_write(priv, MVPP22_CLS_C2_ACT, c2->act);
+
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR0, c2->attr[0]);
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR1, c2->attr[1]);
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR2, c2->attr[2]);
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR3, c2->attr[3]);
+
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA0, c2->tcam[0]);
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA1, c2->tcam[1]);
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA2, c2->tcam[2]);
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA3, c2->tcam[3]);
+       /* Writing TCAM_DATA4 flushes writes to TCAM_DATA0-4 and INV to HW */
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA4, c2->tcam[4]);
+}
+
+void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
+                      struct mvpp2_cls_c2_entry *c2)
+{
+       u32 val;
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, index);
+
+       c2->index = index;
+
+       c2->tcam[0] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA0);
+       c2->tcam[1] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA1);
+       c2->tcam[2] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA2);
+       c2->tcam[3] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA3);
+       c2->tcam[4] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA4);
+
+       c2->act = mvpp2_read(priv, MVPP22_CLS_C2_ACT);
+
+       c2->attr[0] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR0);
+       c2->attr[1] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR1);
+       c2->attr[2] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR2);
+       c2->attr[3] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR3);
+
+       val = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_INV);
+       c2->valid = !(val & MVPP22_CLS_C2_TCAM_INV_BIT);
+}
+
  /* Initialize the flow table entries for the given flow */
-static void mvpp2_cls_flow_init(struct mvpp2 *priv, struct mvpp2_cls_flow *flow)
+static void mvpp2_cls_flow_init(struct mvpp2 *priv,
+                               const struct mvpp2_cls_flow *flow)
  {
         struct mvpp2_cls_flow_entry fe;
-       int i;
+       int i, pri = 0;
+
+       /* Assign default values to all entries in the flow */
+       for (i = MVPP2_CLS_FLT_FIRST(flow->flow_id);
+            i <= MVPP2_CLS_FLT_LAST(flow->flow_id); i++) {
+               memset(&fe, 0, sizeof(fe));
+               fe.index = i;
+               mvpp2_cls_flow_pri_set(&fe, pri++);
  
-       /* C2 lookup */
-       memset(&fe, 0, sizeof(fe));
-       fe.index = MVPP2_FLOW_C2_ENTRY(flow->flow_id);
+               if (i == MVPP2_CLS_FLT_LAST(flow->flow_id))
+                       mvpp2_cls_flow_last_set(&fe, 1);
+
+               mvpp2_cls_flow_write(priv, &fe);
+       }
+
+       /* RSS config C2 lookup */
+       mvpp2_cls_flow_read(priv, MVPP2_CLS_FLT_C2_RSS_ENTRY(flow->flow_id),
+                           &fe);
  
         mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C2);
         mvpp2_cls_flow_port_id_sel(&fe, true);
-       mvpp2_cls_flow_last_set(&fe, 0);
-       mvpp2_cls_flow_pri_set(&fe, 0);
-       mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_FIRST1);
+       mvpp2_cls_flow_lu_type_set(&fe, MVPP2_CLS_LU_ALL);
  
         /* Add all ports */
         for (i = 0; i < MVPP2_MAX_PORTS; i++)
@@ -509,22 +575,19 @@ static void mvpp2_cls_flow_init(struct mvpp2 *priv, struct mvpp2_cls_flow *flow)
  
         /* C3Hx lookups */
         for (i = 0; i < MVPP2_MAX_PORTS; i++) {
-               memset(&fe, 0, sizeof(fe));
-               fe.index = MVPP2_PORT_FLOW_HASH_ENTRY(i, flow->flow_id);
+               mvpp2_cls_flow_read(priv,
+                                   MVPP2_CLS_FLT_HASH_ENTRY(i, flow->flow_id),
+                                   &fe);
  
+               /* Set a default engine. Will be overwritten when setting the
+                * real HEK parameters
+                */
+               mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C3HA);
                 mvpp2_cls_flow_port_id_sel(&fe, true);
-               mvpp2_cls_flow_pri_set(&fe, i + 1);
-               mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_MIDDLE);
                 mvpp2_cls_flow_port_add(&fe, BIT(i));
  
                 mvpp2_cls_flow_write(priv, &fe);
         }
-
-       /* Update the last entry */
-       mvpp2_cls_flow_last_set(&fe, 1);
-       mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_LAST);
-
-       mvpp2_cls_flow_write(priv, &fe);
  }
  
  /* Adds a field to the Header Extracted Key generation parameters*/
@@ -555,6 +618,9 @@ static int mvpp2_flow_set_hek_fields(struct mvpp2_cls_flow_entry *fe,
  
         for_each_set_bit(i, &hash_opts, MVPP22_CLS_HEK_N_FIELDS) {
                 switch (BIT(i)) {
+               case MVPP22_CLS_HEK_OPT_MAC_DA:
+                       field_id = MVPP22_CLS_FIELD_MAC_DA;
+                       break;
                 case MVPP22_CLS_HEK_OPT_VLAN:
                         field_id = MVPP22_CLS_FIELD_VLAN;
                         break;
@@ -586,9 +652,9 @@ static int mvpp2_flow_set_hek_fields(struct mvpp2_cls_flow_entry *fe,
         return 0;
  }
  
-struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow)
+const struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow)
  {
-       if (flow >= MVPP2_N_FLOWS)
+       if (flow >= MVPP2_N_PRS_FLOWS)
                 return NULL;
  
         return &cls_flows[flow];
@@ -608,21 +674,17 @@ struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow)
  static int mvpp2_port_rss_hash_opts_set(struct mvpp2_port *port, int flow_type,
                                         u16 requested_opts)
  {
+       const struct mvpp2_cls_flow *flow;
         struct mvpp2_cls_flow_entry fe;
-       struct mvpp2_cls_flow *flow;
         int i, engine, flow_index;
         u16 hash_opts;
  
-       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+       for_each_cls_flow_id_with_type(i, flow_type) {
                 flow = mvpp2_cls_flow_get(i);
                 if (!flow)
                         return -EINVAL;
  
-               if (flow->flow_type != flow_type)
-                       continue;
-
-               flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id,
-                                                       flow->flow_id);
+               flow_index = MVPP2_CLS_FLT_HASH_ENTRY(port->id, flow->flow_id);
  
                 mvpp2_cls_flow_read(port->priv, flow_index, &fe);
  
@@ -697,21 +759,17 @@ u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe)
   */
  static u16 mvpp2_port_rss_hash_opts_get(struct mvpp2_port *port, int flow_type)
  {
+       const struct mvpp2_cls_flow *flow;
         struct mvpp2_cls_flow_entry fe;
-       struct mvpp2_cls_flow *flow;
         int i, flow_index;
         u16 hash_opts = 0;
  
-       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+       for_each_cls_flow_id_with_type(i, flow_type) {
                 flow = mvpp2_cls_flow_get(i);
                 if (!flow)
                         return 0;
  
-               if (flow->flow_type != flow_type)
-                       continue;
-
-               flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id,
-                                                       flow->flow_id);
+               flow_index = MVPP2_CLS_FLT_HASH_ENTRY(port->id, flow->flow_id);
  
                 mvpp2_cls_flow_read(port->priv, flow_index, &fe);
  
@@ -723,10 +781,10 @@ static u16 mvpp2_port_rss_hash_opts_get(struct mvpp2_port *port, int flow_type)
  
  static void mvpp2_cls_port_init_flows(struct mvpp2 *priv)
  {
-       struct mvpp2_cls_flow *flow;
+       const struct mvpp2_cls_flow *flow;
         int i;
  
-       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+       for (i = 0; i < MVPP2_N_PRS_FLOWS; i++) {
                 flow = mvpp2_cls_flow_get(i);
                 if (!flow)
                         break;
@@ -737,47 +795,6 @@ static void mvpp2_cls_port_init_flows(struct mvpp2 *priv)
         }
  }
  
-static void mvpp2_cls_c2_write(struct mvpp2 *priv,
-                              struct mvpp2_cls_c2_entry *c2)
-{
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2->index);
-
-       /* Write TCAM */
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA0, c2->tcam[0]);
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA1, c2->tcam[1]);
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA2, c2->tcam[2]);
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA3, c2->tcam[3]);
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA4, c2->tcam[4]);
-
-       mvpp2_write(priv, MVPP22_CLS_C2_ACT, c2->act);
-
-       mvpp2_write(priv, MVPP22_CLS_C2_ATTR0, c2->attr[0]);
-       mvpp2_write(priv, MVPP22_CLS_C2_ATTR1, c2->attr[1]);
-       mvpp2_write(priv, MVPP22_CLS_C2_ATTR2, c2->attr[2]);
-       mvpp2_write(priv, MVPP22_CLS_C2_ATTR3, c2->attr[3]);
-}
-
-void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
-                      struct mvpp2_cls_c2_entry *c2)
-{
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, index);
-
-       c2->index = index;
-
-       c2->tcam[0] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA0);
-       c2->tcam[1] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA1);
-       c2->tcam[2] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA2);
-       c2->tcam[3] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA3);
-       c2->tcam[4] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA4);
-
-       c2->act = mvpp2_read(priv, MVPP22_CLS_C2_ACT);
-
-       c2->attr[0] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR0);
-       c2->attr[1] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR1);
-       c2->attr[2] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR2);
-       c2->attr[3] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR3);
-}
-
  static void mvpp2_port_c2_cls_init(struct mvpp2_port *port)
  {
         struct mvpp2_cls_c2_entry c2;
@@ -791,6 +808,10 @@ static void mvpp2_port_c2_cls_init(struct mvpp2_port *port)
         c2.tcam[4] = MVPP22_CLS_C2_PORT_ID(pmap);
         c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_PORT_ID(pmap));
  
+       /* Match on Lookup Type */
+       c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK));
+       c2.tcam[4] |= MVPP22_CLS_C2_LU_TYPE(MVPP2_CLS_LU_ALL);
+
         /* Update RSS status after matching this entry */
         c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
  
@@ -809,6 +830,8 @@ static void mvpp2_port_c2_cls_init(struct mvpp2_port *port)
         c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
                       MVPP22_CLS_C2_ATTR0_QLOW(ql);
  
+       c2.valid = true;
+
         mvpp2_cls_c2_write(port->priv, &c2);
  }
  
@@ -817,6 +840,7 @@ void mvpp2_cls_init(struct mvpp2 *priv)
  {
         struct mvpp2_cls_lookup_entry le;
         struct mvpp2_cls_flow_entry fe;
+       struct mvpp2_cls_c2_entry c2;
         int index;
  
         /* Enable classifier */
@@ -840,6 +864,14 @@ void mvpp2_cls_init(struct mvpp2 *priv)
                 mvpp2_cls_lookup_write(priv, &le);
         }
  
+       /* Clear C2 TCAM engine table */
+       memset(&c2, 0, sizeof(c2));
+       c2.valid = false;
+       for (index = 0; index < MVPP22_CLS_C2_N_ENTRIES; index++) {
+               c2.index = index;
+               mvpp2_cls_c2_write(priv, &c2);
+       }
+
         mvpp2_cls_port_init_flows(priv);
  }
  
@@ -902,12 +934,12 @@ static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port)
         mvpp2_cls_c2_write(port->priv, &c2);
  }
  
-void mvpp22_rss_enable(struct mvpp2_port *port)
+void mvpp22_port_rss_enable(struct mvpp2_port *port)
  {
         mvpp2_rss_port_c2_enable(port);
  }
  
-void mvpp22_rss_disable(struct mvpp2_port *port)
+void mvpp22_port_rss_disable(struct mvpp2_port *port)
  {
         mvpp2_rss_port_c2_disable(port);
  }
@@ -1037,7 +1069,7 @@ int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info)
         return 0;
  }
  
-void mvpp22_rss_port_init(struct mvpp2_port *port)
+void mvpp22_port_rss_init(struct mvpp2_port *port)
  {
         struct mvpp2 *priv = port->priv;
         int i;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h

index 089f05f298917ae394b9245cdb255259d269b3c7..96304ffc5d49efd8f6c25d245700252931486262 100644 (file)
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
@@ -71,14 +71,6 @@ enum mvpp2_cls_field_id {
         MVPP22_CLS_FIELD_L4DIP = 0x1e,
  };
  
-enum mvpp2_cls_flow_seq {
-       MVPP2_CLS_FLOW_SEQ_NORMAL = 0,
-       MVPP2_CLS_FLOW_SEQ_FIRST1,
-       MVPP2_CLS_FLOW_SEQ_FIRST2,
-       MVPP2_CLS_FLOW_SEQ_LAST,
-       MVPP2_CLS_FLOW_SEQ_MIDDLE
-};
-
  /* Classifier C2 engine constants */
  #define MVPP22_CLS_C2_TCAM_EN(data)            ((data) << 16)
  
@@ -105,34 +97,25 @@ enum mvpp22_cls_c2_fwd_action {
  
  struct mvpp2_cls_c2_entry {
         u32 index;
+       /* TCAM lookup key */
         u32 tcam[MVPP2_CLS_C2_TCAM_WORDS];
+       /* Actions to perform upon TCAM match */
         u32 act;
+       /* Attributes relative to the actions to perform */
         u32 attr[MVPP2_CLS_C2_ATTR_WORDS];
+       /* Entry validity */
+       u8 valid;
  };
  
  /* Classifier C2 engine entries */
-#define MVPP22_CLS_C2_RSS_ENTRY(port)  (port)
-#define MVPP22_CLS_C2_N_ENTRIES                MVPP2_MAX_PORTS
+#define MVPP22_CLS_C2_N_ENTRIES                256
  
-/* RSS flow entries in the flow table. We have 2 entries per port for RSS.
- *
- * The first performs a lookup using the C2 TCAM engine, to tag the
- * packet for software forwarding (needed for RSS), enable or disable RSS, and
- * assign the default rx queue.
- *
- * The second configures the hash generation, by specifying which fields of the
- * packet header are used to generate the hash, and specifies the relevant hash
- * engine to use.
- */
-#define MVPP22_RSS_FLOW_C2_OFFS                0
-#define MVPP22_RSS_FLOW_HASH_OFFS      1
-#define MVPP22_RSS_FLOW_SIZE           (MVPP22_RSS_FLOW_HASH_OFFS + 1)
+/* Number of per-port dedicated entries in the C2 TCAM */
+#define MVPP22_CLS_C2_PORT_RANGE       8
  
-#define MVPP22_RSS_FLOW_C2(port)       ((port) * MVPP22_RSS_FLOW_SIZE + \
-                                        MVPP22_RSS_FLOW_C2_OFFS)
-#define MVPP22_RSS_FLOW_HASH(port)     ((port) * MVPP22_RSS_FLOW_SIZE + \
-                                        MVPP22_RSS_FLOW_HASH_OFFS)
-#define MVPP22_RSS_FLOW_FIRST(port)    MVPP22_RSS_FLOW_C2(port)
+#define MVPP22_CLS_C2_PORT_FIRST(p)    (MVPP22_CLS_C2_N_ENTRIES - \
+                                       ((p) * MVPP22_CLS_C2_PORT_RANGE))
+#define MVPP22_CLS_C2_RSS_ENTRY(p)     (MVPP22_CLS_C2_PORT_FIRST(p) - 1)
  
  /* Packet flow ID */
  enum mvpp2_prs_flow {
@@ -162,6 +145,15 @@ enum mvpp2_prs_flow {
         MVPP2_FL_LAST,
  };
  
+enum mvpp2_cls_lu_type {
+       MVPP2_CLS_LU_ALL = 0,
+};
+
+/* LU Type defined for all engines, and specified in the flow table */
+#define MVPP2_CLS_LU_TYPE_MASK                 0x3f
+
+#define MVPP2_N_FLOWS          (MVPP2_FL_LAST - MVPP2_FL_START)
+
  struct mvpp2_cls_flow {
         /* The L2-L4 traffic flow type */
         int flow_type;
@@ -176,12 +168,37 @@ struct mvpp2_cls_flow {
         struct mvpp2_prs_result_info prs_ri;
  };
  
-#define MVPP2_N_FLOWS  52
+#define MVPP2_CLS_FLT_ENTRIES_PER_FLOW         (MVPP2_MAX_PORTS + 1)
+#define MVPP2_CLS_FLT_FIRST(id)                        (((id) - MVPP2_FL_START) * \
+                                                MVPP2_CLS_FLT_ENTRIES_PER_FLOW)
+#define MVPP2_CLS_FLT_C2_RSS_ENTRY(id)         (MVPP2_CLS_FLT_FIRST(id))
+#define MVPP2_CLS_FLT_HASH_ENTRY(port, id)     (MVPP2_CLS_FLT_C2_RSS_ENTRY(id) + (port) + 1)
+#define MVPP2_CLS_FLT_LAST(id)                 (MVPP2_CLS_FLT_FIRST(id) + \
+                                                MVPP2_CLS_FLT_ENTRIES_PER_FLOW - 1)
+
+/* Iterate on each classifier flow id. Sets 'i' to be the index of the first
+ * entry in the cls_flows table for each different flow_id.
+ * This relies on entries having the same flow_id in the cls_flows table being
+ * contiguous.
+ */
+#define for_each_cls_flow_id(i)                                                      \
+       for ((i) = 0; (i) < MVPP2_N_PRS_FLOWS; (i)++)                         \
+               if ((i) > 0 &&                                                \
+                   cls_flows[(i)].flow_id == cls_flows[(i) - 1].flow_id)       \
+                       continue;                                             \
+               else
+
+/* Iterate on each classifier flow that has a given flow_type. Sets 'i' to be
+ * the index of the first entry in the cls_flow table for each different flow_id
+ * that has the given flow_type. This allows to operate on all flows that
+ * matches a given ethtool flow type.
+ */
+#define for_each_cls_flow_id_with_type(i, type)                                      \
+       for_each_cls_flow_id((i))                                             \
+               if (cls_flows[(i)].flow_type != (type))                       \
+                       continue;                                             \
+               else
  
-#define MVPP2_ENTRIES_PER_FLOW                 (MVPP2_MAX_PORTS + 1)
-#define MVPP2_FLOW_C2_ENTRY(id)                        ((id) * MVPP2_ENTRIES_PER_FLOW)
-#define MVPP2_PORT_FLOW_HASH_ENTRY(port, id)   ((id) * MVPP2_ENTRIES_PER_FLOW + \
-                                               (port) + 1)
  struct mvpp2_cls_flow_entry {
         u32 index;
         u32 data[MVPP2_CLS_FLOWS_TBL_DATA_WORDS];
@@ -194,11 +211,10 @@ struct mvpp2_cls_lookup_entry {
  };
  
  void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table);
+void mvpp22_port_rss_init(struct mvpp2_port *port);
  
-void mvpp22_rss_port_init(struct mvpp2_port *port);
-
-void mvpp22_rss_enable(struct mvpp2_port *port);
-void mvpp22_rss_disable(struct mvpp2_port *port);
+void mvpp22_port_rss_enable(struct mvpp2_port *port);
+void mvpp22_port_rss_disable(struct mvpp2_port *port);
  
  int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info);
  int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info);
@@ -213,7 +229,7 @@ int mvpp2_cls_flow_eng_get(struct mvpp2_cls_flow_entry *fe);
  
  u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe);
  
-struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow);
+const struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow);
  
  u32 mvpp2_cls_flow_hits(struct mvpp2 *priv, int index);
  
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c

index f9744a61e5dd6fc6282eddca38b2718dac899e12..0ee39ea47b6b63be44de58f5897eb44d054b1477 100644 (file)
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
@@ -18,22 +18,48 @@ struct mvpp2_dbgfs_prs_entry {
         struct mvpp2 *priv;
  };
  
+struct mvpp2_dbgfs_c2_entry {
+       int id;
+       struct mvpp2 *priv;
+};
+
  struct mvpp2_dbgfs_flow_entry {
         int flow;
         struct mvpp2 *priv;
  };
  
+struct mvpp2_dbgfs_flow_tbl_entry {
+       int id;
+       struct mvpp2 *priv;
+};
+
  struct mvpp2_dbgfs_port_flow_entry {
         struct mvpp2_port *port;
         struct mvpp2_dbgfs_flow_entry *dbg_fe;
  };
  
+struct mvpp2_dbgfs_entries {
+       /* Entries for Header Parser debug info */
+       struct mvpp2_dbgfs_prs_entry prs_entries[MVPP2_PRS_TCAM_SRAM_SIZE];
+
+       /* Entries for Classifier C2 engine debug info */
+       struct mvpp2_dbgfs_c2_entry c2_entries[MVPP22_CLS_C2_N_ENTRIES];
+
+       /* Entries for Classifier Flow Table debug info */
+       struct mvpp2_dbgfs_flow_tbl_entry flt_entries[MVPP2_CLS_FLOWS_TBL_SIZE];
+
+       /* Entries for Classifier flows debug info */
+       struct mvpp2_dbgfs_flow_entry flow_entries[MVPP2_N_PRS_FLOWS];
+
+       /* Entries for per-port flows debug info */
+       struct mvpp2_dbgfs_port_flow_entry port_flow_entries[MVPP2_MAX_PORTS];
+};
+
  static int mvpp2_dbgfs_flow_flt_hits_show(struct seq_file *s, void *unused)
  {
-       struct mvpp2_dbgfs_flow_entry *entry = s->private;
-       int id = MVPP2_FLOW_C2_ENTRY(entry->flow);
+       struct mvpp2_dbgfs_flow_tbl_entry *entry = s->private;
  
-       u32 hits = mvpp2_cls_flow_hits(entry->priv, id);
+       u32 hits = mvpp2_cls_flow_hits(entry->priv, entry->id);
  
         seq_printf(s, "%u\n", hits);
  
@@ -58,7 +84,7 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_dec_hits);
  static int mvpp2_dbgfs_flow_type_show(struct seq_file *s, void *unused)
  {
         struct mvpp2_dbgfs_flow_entry *entry = s->private;
-       struct mvpp2_cls_flow *f;
+       const struct mvpp2_cls_flow *f;
         const char *flow_name;
  
         f = mvpp2_cls_flow_get(entry->flow);
@@ -93,30 +119,12 @@ static int mvpp2_dbgfs_flow_type_show(struct seq_file *s, void *unused)
         return 0;
  }
  
-static int mvpp2_dbgfs_flow_type_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, mvpp2_dbgfs_flow_type_show, inode->i_private);
-}
-
-static int mvpp2_dbgfs_flow_type_release(struct inode *inode, struct file *file)
-{
-       struct seq_file *seq = file->private_data;
-       struct mvpp2_dbgfs_flow_entry *flow_entry = seq->private;
-
-       kfree(flow_entry);
-       return single_release(inode, file);
-}
-
-static const struct file_operations mvpp2_dbgfs_flow_type_fops = {
-       .open = mvpp2_dbgfs_flow_type_open,
-       .read = seq_read,
-       .release = mvpp2_dbgfs_flow_type_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_type);
  
  static int mvpp2_dbgfs_flow_id_show(struct seq_file *s, void *unused)
  {
-       struct mvpp2_dbgfs_flow_entry *entry = s->private;
-       struct mvpp2_cls_flow *f;
+       const struct mvpp2_dbgfs_flow_entry *entry = s->private;
+       const struct mvpp2_cls_flow *f;
  
         f = mvpp2_cls_flow_get(entry->flow);
         if (!f)
@@ -134,7 +142,7 @@ static int mvpp2_dbgfs_port_flow_hash_opt_show(struct seq_file *s, void *unused)
         struct mvpp2_dbgfs_port_flow_entry *entry = s->private;
         struct mvpp2_port *port = entry->port;
         struct mvpp2_cls_flow_entry fe;
-       struct mvpp2_cls_flow *f;
+       const struct mvpp2_cls_flow *f;
         int flow_index;
         u16 hash_opts;
  
@@ -142,7 +150,7 @@ static int mvpp2_dbgfs_port_flow_hash_opt_show(struct seq_file *s, void *unused)
         if (!f)
                 return -EINVAL;
  
-       flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(entry->port->id, f->flow_id);
+       flow_index = MVPP2_CLS_FLT_HASH_ENTRY(entry->port->id, f->flow_id);
  
         mvpp2_cls_flow_read(port->priv, flow_index, &fe);
  
@@ -153,42 +161,21 @@ static int mvpp2_dbgfs_port_flow_hash_opt_show(struct seq_file *s, void *unused)
         return 0;
  }
  
-static int mvpp2_dbgfs_port_flow_hash_opt_open(struct inode *inode,
-                                              struct file *file)
-{
-       return single_open(file, mvpp2_dbgfs_port_flow_hash_opt_show,
-                          inode->i_private);
-}
-
-static int mvpp2_dbgfs_port_flow_hash_opt_release(struct inode *inode,
-                                                 struct file *file)
-{
-       struct seq_file *seq = file->private_data;
-       struct mvpp2_dbgfs_port_flow_entry *flow_entry = seq->private;
-
-       kfree(flow_entry);
-       return single_release(inode, file);
-}
-
-static const struct file_operations mvpp2_dbgfs_port_flow_hash_opt_fops = {
-       .open = mvpp2_dbgfs_port_flow_hash_opt_open,
-       .read = seq_read,
-       .release = mvpp2_dbgfs_port_flow_hash_opt_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_port_flow_hash_opt);
  
  static int mvpp2_dbgfs_port_flow_engine_show(struct seq_file *s, void *unused)
  {
         struct mvpp2_dbgfs_port_flow_entry *entry = s->private;
         struct mvpp2_port *port = entry->port;
         struct mvpp2_cls_flow_entry fe;
-       struct mvpp2_cls_flow *f;
+       const struct mvpp2_cls_flow *f;
         int flow_index, engine;
  
         f = mvpp2_cls_flow_get(entry->dbg_fe->flow);
         if (!f)
                 return -EINVAL;
  
-       flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(entry->port->id, f->flow_id);
+       flow_index = MVPP2_CLS_FLT_HASH_ENTRY(entry->port->id, f->flow_id);
  
         mvpp2_cls_flow_read(port->priv, flow_index, &fe);
  
@@ -203,11 +190,10 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_port_flow_engine);
  
  static int mvpp2_dbgfs_flow_c2_hits_show(struct seq_file *s, void *unused)
  {
-       struct mvpp2_port *port = s->private;
+       struct mvpp2_dbgfs_c2_entry *entry = s->private;
         u32 hits;
  
-       hits = mvpp2_cls_c2_hit_count(port->priv,
-                                     MVPP22_CLS_C2_RSS_ENTRY(port->id));
+       hits = mvpp2_cls_c2_hit_count(entry->priv, entry->id);
  
         seq_printf(s, "%u\n", hits);
  
@@ -218,11 +204,11 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_c2_hits);
  
  static int mvpp2_dbgfs_flow_c2_rxq_show(struct seq_file *s, void *unused)
  {
-       struct mvpp2_port *port = s->private;
+       struct mvpp2_dbgfs_c2_entry *entry = s->private;
         struct mvpp2_cls_c2_entry c2;
         u8 qh, ql;
  
-       mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+       mvpp2_cls_c2_read(entry->priv, entry->id, &c2);
  
         qh = (c2.attr[0] >> MVPP22_CLS_C2_ATTR0_QHIGH_OFFS) &
              MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
@@ -239,11 +225,11 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_c2_rxq);
  
  static int mvpp2_dbgfs_flow_c2_enable_show(struct seq_file *s, void *unused)
  {
-       struct mvpp2_port *port = s->private;
+       struct mvpp2_dbgfs_c2_entry *entry = s->private;
         struct mvpp2_cls_c2_entry c2;
         int enabled;
  
-       mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+       mvpp2_cls_c2_read(entry->priv, entry->id, &c2);
  
         enabled = !!(c2.attr[2] & MVPP22_CLS_C2_ATTR2_RSS_EN);
  
@@ -456,25 +442,7 @@ static int mvpp2_dbgfs_prs_valid_show(struct seq_file *s, void *unused)
         return 0;
  }
  
-static int mvpp2_dbgfs_prs_valid_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, mvpp2_dbgfs_prs_valid_show, inode->i_private);
-}
-
-static int mvpp2_dbgfs_prs_valid_release(struct inode *inode, struct file *file)
-{
-       struct seq_file *seq = file->private_data;
-       struct mvpp2_dbgfs_prs_entry *entry = seq->private;
-
-       kfree(entry);
-       return single_release(inode, file);
-}
-
-static const struct file_operations mvpp2_dbgfs_prs_valid_fops = {
-       .open = mvpp2_dbgfs_prs_valid_open,
-       .read = seq_read,
-       .release = mvpp2_dbgfs_prs_valid_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_prs_valid);
  
  static int mvpp2_dbgfs_flow_port_init(struct dentry *parent,
                                       struct mvpp2_port *port,
@@ -487,10 +455,7 @@ static int mvpp2_dbgfs_flow_port_init(struct dentry *parent,
         if (IS_ERR(port_dir))
                 return PTR_ERR(port_dir);
  
-       /* This will be freed by 'hash_opts' release op */
-       port_entry = kmalloc(sizeof(*port_entry), GFP_KERNEL);
-       if (!port_entry)
-               return -ENOMEM;
+       port_entry = &port->priv->dbgfs_entries->port_flow_entries[port->id];
  
         port_entry->port = port;
         port_entry->dbg_fe = entry;
@@ -518,17 +483,11 @@ static int mvpp2_dbgfs_flow_entry_init(struct dentry *parent,
         if (!flow_entry_dir)
                 return -ENOMEM;
  
-       /* This will be freed by 'type' release op */
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-       if (!entry)
-               return -ENOMEM;
+       entry = &priv->dbgfs_entries->flow_entries[flow];
  
         entry->flow = flow;
         entry->priv = priv;
  
-       debugfs_create_file("flow_hits", 0444, flow_entry_dir, entry,
-                           &mvpp2_dbgfs_flow_flt_hits_fops);
-
         debugfs_create_file("dec_hits", 0444, flow_entry_dir, entry,
                             &mvpp2_dbgfs_flow_dec_hits_fops);
  
@@ -545,6 +504,7 @@ static int mvpp2_dbgfs_flow_entry_init(struct dentry *parent,
                 if (ret)
                         return ret;
         }
+
         return 0;
  }
  
@@ -557,7 +517,7 @@ static int mvpp2_dbgfs_flow_init(struct dentry *parent, struct mvpp2 *priv)
         if (!flow_dir)
                 return -ENOMEM;
  
-       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+       for (i = 0; i < MVPP2_N_PRS_FLOWS; i++) {
                 ret = mvpp2_dbgfs_flow_entry_init(flow_dir, priv, i);
                 if (ret)
                         return ret;
@@ -582,10 +542,7 @@ static int mvpp2_dbgfs_prs_entry_init(struct dentry *parent,
         if (!prs_entry_dir)
                 return -ENOMEM;
  
-       /* The 'valid' entry's ops will free that */
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-       if (!entry)
-               return -ENOMEM;
+       entry = &priv->dbgfs_entries->prs_entries[tid];
  
         entry->tid = tid;
         entry->priv = priv;
@@ -630,6 +587,98 @@ static int mvpp2_dbgfs_prs_init(struct dentry *parent, struct mvpp2 *priv)
         return 0;
  }
  
+static int mvpp2_dbgfs_c2_entry_init(struct dentry *parent,
+                                    struct mvpp2 *priv, int id)
+{
+       struct mvpp2_dbgfs_c2_entry *entry;
+       struct dentry *c2_entry_dir;
+       char c2_entry_name[10];
+
+       if (id >= MVPP22_CLS_C2_N_ENTRIES)
+               return -EINVAL;
+
+       sprintf(c2_entry_name, "%03d", id);
+
+       c2_entry_dir = debugfs_create_dir(c2_entry_name, parent);
+       if (!c2_entry_dir)
+               return -ENOMEM;
+
+       entry = &priv->dbgfs_entries->c2_entries[id];
+
+       entry->id = id;
+       entry->priv = priv;
+
+       debugfs_create_file("hits", 0444, c2_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_c2_hits_fops);
+
+       debugfs_create_file("default_rxq", 0444, c2_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_c2_rxq_fops);
+
+       debugfs_create_file("rss_enable", 0444, c2_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_c2_enable_fops);
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_flow_tbl_entry_init(struct dentry *parent,
+                                          struct mvpp2 *priv, int id)
+{
+       struct mvpp2_dbgfs_flow_tbl_entry *entry;
+       struct dentry *flow_tbl_entry_dir;
+       char flow_tbl_entry_name[10];
+
+       if (id >= MVPP2_CLS_FLOWS_TBL_SIZE)
+               return -EINVAL;
+
+       sprintf(flow_tbl_entry_name, "%03d", id);
+
+       flow_tbl_entry_dir = debugfs_create_dir(flow_tbl_entry_name, parent);
+       if (!flow_tbl_entry_dir)
+               return -ENOMEM;
+
+       entry = &priv->dbgfs_entries->flt_entries[id];
+
+       entry->id = id;
+       entry->priv = priv;
+
+       debugfs_create_file("hits", 0444, flow_tbl_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_flt_hits_fops);
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_cls_init(struct dentry *parent, struct mvpp2 *priv)
+{
+       struct dentry *cls_dir, *c2_dir, *flow_tbl_dir;
+       int i, ret;
+
+       cls_dir = debugfs_create_dir("classifier", parent);
+       if (!cls_dir)
+               return -ENOMEM;
+
+       c2_dir = debugfs_create_dir("c2", cls_dir);
+       if (!c2_dir)
+               return -ENOMEM;
+
+       for (i = 0; i < MVPP22_CLS_C2_N_ENTRIES; i++) {
+               ret = mvpp2_dbgfs_c2_entry_init(c2_dir, priv, i);
+               if (ret)
+                       return ret;
+       }
+
+       flow_tbl_dir = debugfs_create_dir("flow_table", cls_dir);
+       if (!flow_tbl_dir)
+               return -ENOMEM;
+
+       for (i = 0; i < MVPP2_CLS_FLOWS_TBL_SIZE; i++) {
+               ret = mvpp2_dbgfs_flow_tbl_entry_init(flow_tbl_dir, priv, i);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
  static int mvpp2_dbgfs_port_init(struct dentry *parent,
                                  struct mvpp2_port *port)
  {
@@ -648,21 +697,14 @@ static int mvpp2_dbgfs_port_init(struct dentry *parent,
         debugfs_create_file("vid_filter", 0444, port_dir, port,
                             &mvpp2_dbgfs_port_vid_fops);
  
-       debugfs_create_file("c2_hits", 0444, port_dir, port,
-                           &mvpp2_dbgfs_flow_c2_hits_fops);
-
-       debugfs_create_file("default_rxq", 0444, port_dir, port,
-                           &mvpp2_dbgfs_flow_c2_rxq_fops);
-
-       debugfs_create_file("rss_enable", 0444, port_dir, port,
-                           &mvpp2_dbgfs_flow_c2_enable_fops);
-
         return 0;
  }
  
  void mvpp2_dbgfs_cleanup(struct mvpp2 *priv)
  {
         debugfs_remove_recursive(priv->dbgfs_dir);
+
+       kfree(priv->dbgfs_entries);
  }
  
  void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name)
@@ -682,11 +724,18 @@ void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name)
                 return;
  
         priv->dbgfs_dir = mvpp2_dir;
+       priv->dbgfs_entries = kzalloc(sizeof(*priv->dbgfs_entries), GFP_KERNEL);
+       if (!priv->dbgfs_entries)
+               goto err;
  
         ret = mvpp2_dbgfs_prs_init(mvpp2_dir, priv);
         if (ret)
                 goto err;
  
+       ret = mvpp2_dbgfs_cls_init(mvpp2_dir, priv);
+       if (ret)
+               goto err;
+
         for (i = 0; i < priv->port_count; i++) {
                 ret = mvpp2_dbgfs_port_init(mvpp2_dir, priv->port_list[i]);
                 if (ret)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c

index 25fbed2b8d94674d43133f327740048773827115..f128ea22b33958775d32ad176f5f21cad950889f 100644 (file)
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3741,9 +3741,9 @@ static int mvpp2_set_features(struct net_device *dev,
  
         if (changed & NETIF_F_RXHASH) {
                 if (features & NETIF_F_RXHASH)
-                       mvpp22_rss_enable(port);
+                       mvpp22_port_rss_enable(port);
                 else
-                       mvpp22_rss_disable(port);
+                       mvpp22_port_rss_disable(port);
         }
  
         return 0;
@@ -4301,7 +4301,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
         mvpp2_cls_port_config(port);
  
         if (mvpp22_rss_is_supported())
-               mvpp22_rss_port_init(port);
+               mvpp22_port_rss_init(port);
  
         /* Provide an initial Rx packet size */
         port->pkt_size = MVPP2_RX_PKT_SIZE(port->dev->mtu);
@@ -4848,6 +4848,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
         struct mvpp2_port *port;
         struct mvpp2_port_pcpu *port_pcpu;
         struct device_node *port_node = to_of_node(port_fwnode);
+       netdev_features_t features;
         struct net_device *dev;
         struct resource *res;
         struct phylink *phylink;
@@ -4856,7 +4857,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
         unsigned long flags = 0;
         bool has_tx_irqs;
         u32 id;
-       int features;
         int phy_mode;
         int err, i;
  
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index f02367faa58dbe44171454de6af50777ddc0ebf3..f628971988449a94898d8da72ffa3743d7a3d694 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -205,6 +205,7 @@ enum bpf_return_type {
         RET_PTR_TO_MAP_VALUE_OR_NULL,   /* returns a pointer to map elem value or NULL */
         RET_PTR_TO_SOCKET_OR_NULL,      /* returns a pointer to a socket or NULL */
         RET_PTR_TO_TCP_SOCK_OR_NULL,    /* returns a pointer to a tcp_sock or NULL */
+       RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
  };
  
  /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 929c8e537a14a517c0a3c7ca5b6b15353d622c30..837024512bafd92c3773282ac5362d826fc93502 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1478,13 +1478,27 @@ union bpf_attr {
   *             Grow or shrink the room for data in the packet associated to
   *             *skb* by *len_diff*, and according to the selected *mode*.
   *
- *             There is a single supported mode at this time:
+ *             There are two supported modes at this time:
+ *
+ *             * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ *               (room space is added or removed below the layer 2 header).
   *
   *             * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
   *               (room space is added or removed below the layer 3 header).
   *
- *             All values for *flags* are reserved for future usage, and must
- *             be left at zero.
+ *             The following flags are supported at this time:
+ *
+ *             * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ *               Adjusting mss in this way is not allowed for datagrams.
+ *
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
+ *               Any new space is reserved to hold a tunnel header.
+ *               Configure skb offsets and other fields accordingly.
+ *
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
+ *               Use with ENCAP_L3 flags to further specify the tunnel type.
   *
   *             A call to this helper is susceptible to change the underlaying
   *             packet buffer. Therefore, at load time, all checks on pointers
@@ -2431,6 +2445,38 @@ union bpf_attr {
   *     Return
   *             A **struct bpf_sock** pointer on success, or **NULL** in
   *             case of failure.
+ *
+ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ *     Description
+ *             Look for TCP socket matching *tuple*, optionally in a child
+ *             network namespace *netns*. The return value must be checked,
+ *             and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ *             This function is identical to bpf_sk_lookup_tcp, except that it
+ *             also returns timewait or request sockets. Use bpf_sk_fullsock
+ *             or bpf_tcp_socket to access the full structure.
+ *
+ *             This helper is available only if the kernel was compiled with
+ *             **CONFIG_NET** configuration option.
+ *     Return
+ *             Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ *             For sockets with reuseport option, the **struct bpf_sock**
+ *             result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ *     Description
+ *             Check whether iph and th contain a valid SYN cookie ACK for
+ *             the listening socket in sk.
+ *
+ *             iph points to the start of the IPv4 or IPv6 header, while
+ *             iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
+ *
+ *             th points to the start of the TCP header, while th_len contains
+ *             sizeof(struct tcphdr).
+ *
+ *     Return
+ *             0 if iph and th are a valid SYN cookie ACK, or a negative error
+ *             otherwise.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -2531,7 +2577,9 @@ union bpf_attr {
         FN(sk_fullsock),                \
         FN(tcp_sock),                   \
         FN(skb_ecn_set_ce),             \
-       FN(get_listener_sock),
+       FN(get_listener_sock),          \
+       FN(skc_lookup_tcp),             \
+       FN(tcp_check_syncookie),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@ -2590,9 +2638,18 @@ enum bpf_func_id {
  /* Current network namespace */
  #define BPF_F_CURRENT_NETNS            (-1L)
  
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO       (1ULL << 0)
+
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4   (1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6   (1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE    (1ULL << 3)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP    (1ULL << 4)
+
  /* Mode for BPF_FUNC_skb_adjust_room helper. */
  enum bpf_adj_room_mode {
         BPF_ADJ_ROOM_NET,
+       BPF_ADJ_ROOM_MAC,
  };
  
  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 86f9cd5d1c4e1a747e2173152c3e6e9adfef9be8..dffeec3706ce60a1c9779c00faedebe300da009f 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -369,7 +369,8 @@ static bool is_release_function(enum bpf_func_id func_id)
  static bool is_acquire_function(enum bpf_func_id func_id)
  {
         return func_id == BPF_FUNC_sk_lookup_tcp ||
-               func_id == BPF_FUNC_sk_lookup_udp;
+               func_id == BPF_FUNC_sk_lookup_udp ||
+               func_id == BPF_FUNC_skc_lookup_tcp;
  }
  
  static bool is_ptr_cast_function(enum bpf_func_id func_id)
@@ -3147,19 +3148,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
         } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
                 mark_reg_known_zero(env, regs, BPF_REG_0);
                 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
-               if (is_acquire_function(func_id)) {
-                       int id = acquire_reference_state(env, insn_idx);
-
-                       if (id < 0)
-                               return id;
-                       /* For mark_ptr_or_null_reg() */
-                       regs[BPF_REG_0].id = id;
-                       /* For release_reference() */
-                       regs[BPF_REG_0].ref_obj_id = id;
-               } else {
-                       /* For mark_ptr_or_null_reg() */
-                       regs[BPF_REG_0].id = ++env->id_gen;
-               }
+               regs[BPF_REG_0].id = ++env->id_gen;
+       } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
+               mark_reg_known_zero(env, regs, BPF_REG_0);
+               regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
+               regs[BPF_REG_0].id = ++env->id_gen;
         } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
                 mark_reg_known_zero(env, regs, BPF_REG_0);
                 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
@@ -3170,9 +3163,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                 return -EINVAL;
         }
  
-       if (is_ptr_cast_function(func_id))
+       if (is_ptr_cast_function(func_id)) {
                 /* For release_reference() */
                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+       } else if (is_acquire_function(func_id)) {
+               int id = acquire_reference_state(env, insn_idx);
+
+               if (id < 0)
+                       return id;
+               /* For mark_ptr_or_null_reg() */
+               regs[BPF_REG_0].id = id;
+               /* For release_reference() */
+               regs[BPF_REG_0].ref_obj_id = id;
+       }
  
         do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
  
diff --git a/net/core/filter.c b/net/core/filter.c

index 647c63a7b25b6745e75a812b65a4052f3c72b690..22eb2edf55734f2f4db3e72d636aad90c1de4000 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2963,42 +2963,113 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
         }
  }
  
-static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK   (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
+                                        BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+
+#define BPF_F_ADJ_ROOM_MASK            (BPF_F_ADJ_ROOM_FIXED_GSO | \
+                                        BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
+                                        BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
+                                        BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+
+static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
+                           u64 flags)
  {
-       u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+       bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
+       u16 mac_len = 0, inner_net = 0, inner_trans = 0;
+       unsigned int gso_type = SKB_GSO_DODGY;
         int ret;
  
-       if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
-               return -ENOTSUPP;
+       if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+               /* udp gso_size delineates datagrams, only allow if fixed */
+               if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+                   !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+                       return -ENOTSUPP;
+       }
  
-       ret = skb_cow(skb, len_diff);
+       ret = skb_cow_head(skb, len_diff);
         if (unlikely(ret < 0))
                 return ret;
  
+       if (encap) {
+               if (skb->protocol != htons(ETH_P_IP) &&
+                   skb->protocol != htons(ETH_P_IPV6))
+                       return -ENOTSUPP;
+
+               if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
+                   flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+                       return -EINVAL;
+
+               if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
+                   flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+                       return -EINVAL;
+
+               if (skb->encapsulation)
+                       return -EALREADY;
+
+               mac_len = skb->network_header - skb->mac_header;
+               inner_net = skb->network_header;
+               inner_trans = skb->transport_header;
+       }
+
         ret = bpf_skb_net_hdr_push(skb, off, len_diff);
         if (unlikely(ret < 0))
                 return ret;
  
+       if (encap) {
+               /* inner mac == inner_net on l3 encap */
+               skb->inner_mac_header = inner_net;
+               skb->inner_network_header = inner_net;
+               skb->inner_transport_header = inner_trans;
+               skb_set_inner_protocol(skb, skb->protocol);
+
+               skb->encapsulation = 1;
+               skb_set_network_header(skb, mac_len);
+
+               if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+                       gso_type |= SKB_GSO_UDP_TUNNEL;
+               else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
+                       gso_type |= SKB_GSO_GRE;
+               else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+                       gso_type |= SKB_GSO_IPXIP6;
+               else
+                       gso_type |= SKB_GSO_IPXIP4;
+
+               if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
+                   flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
+                       int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
+                                       sizeof(struct ipv6hdr) :
+                                       sizeof(struct iphdr);
+
+                       skb_set_transport_header(skb, mac_len + nh_len);
+               }
+       }
+
         if (skb_is_gso(skb)) {
                 struct skb_shared_info *shinfo = skb_shinfo(skb);
  
                 /* Due to header grow, MSS needs to be downgraded. */
-               skb_decrease_gso_size(shinfo, len_diff);
+               if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+                       skb_decrease_gso_size(shinfo, len_diff);
+
                 /* Header must be checked, and gso_segs recomputed. */
-               shinfo->gso_type |= SKB_GSO_DODGY;
+               shinfo->gso_type |= gso_type;
                 shinfo->gso_segs = 0;
         }
  
         return 0;
  }
  
-static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
+static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
+                             u64 flags)
  {
-       u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
         int ret;
  
-       if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
-               return -ENOTSUPP;
+       if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+               /* udp gso_size delineates datagrams, only allow if fixed */
+               if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+                   !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+                       return -ENOTSUPP;
+       }
  
         ret = skb_unclone(skb, GFP_ATOMIC);
         if (unlikely(ret < 0))
@@ -3012,7 +3083,9 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
                 struct skb_shared_info *shinfo = skb_shinfo(skb);
  
                 /* Due to header shrink, MSS can be upgraded. */
-               skb_increase_gso_size(shinfo, len_diff);
+               if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+                       skb_increase_gso_size(shinfo, len_diff);
+
                 /* Header must be checked, and gso_segs recomputed. */
                 shinfo->gso_type |= SKB_GSO_DODGY;
                 shinfo->gso_segs = 0;
@@ -3027,49 +3100,50 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb)
                           SKB_MAX_ALLOC;
  }
  
-static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
+BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+          u32, mode, u64, flags)
  {
-       bool trans_same = skb->transport_header == skb->network_header;
         u32 len_cur, len_diff_abs = abs(len_diff);
         u32 len_min = bpf_skb_net_base_len(skb);
         u32 len_max = __bpf_skb_max_len(skb);
         __be16 proto = skb->protocol;
         bool shrink = len_diff < 0;
+       u32 off;
         int ret;
  
+       if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
+               return -EINVAL;
         if (unlikely(len_diff_abs > 0xfffU))
                 return -EFAULT;
         if (unlikely(proto != htons(ETH_P_IP) &&
                      proto != htons(ETH_P_IPV6)))
                 return -ENOTSUPP;
  
+       off = skb_mac_header_len(skb);
+       switch (mode) {
+       case BPF_ADJ_ROOM_NET:
+               off += bpf_skb_net_base_len(skb);
+               break;
+       case BPF_ADJ_ROOM_MAC:
+               break;
+       default:
+               return -ENOTSUPP;
+       }
+
         len_cur = skb->len - skb_network_offset(skb);
-       if (skb_transport_header_was_set(skb) && !trans_same)
-               len_cur = skb_network_header_len(skb);
         if ((shrink && (len_diff_abs >= len_cur ||
                         len_cur - len_diff_abs < len_min)) ||
             (!shrink && (skb->len + len_diff_abs > len_max &&
                          !skb_is_gso(skb))))
                 return -ENOTSUPP;
  
-       ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
-                      bpf_skb_net_grow(skb, len_diff_abs);
+       ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
+                      bpf_skb_net_grow(skb, off, len_diff_abs, flags);
  
         bpf_compute_data_pointers(skb);
         return ret;
  }
  
-BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
-          u32, mode, u64, flags)
-{
-       if (unlikely(flags))
-               return -EINVAL;
-       if (likely(mode == BPF_ADJ_ROOM_NET))
-               return bpf_skb_adjust_net(skb, len_diff);
-
-       return -ENOTSUPP;
-}
-
  static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
         .func           = bpf_skb_adjust_room,
         .gpl_only       = false,
@@ -5156,15 +5230,15 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
         return sk;
  }
  
-/* bpf_sk_lookup performs the core lookup for different types of sockets,
+/* bpf_skc_lookup performs the core lookup for different types of sockets,
   * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
   * Returns the socket as an 'unsigned long' to simplify the casting in the
   * callers to satisfy BPF_CALL declarations.
   */
-static unsigned long
-__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
-               struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
-               u64 flags)
+static struct sock *
+__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+                struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+                u64 flags)
  {
         struct sock *sk = NULL;
         u8 family = AF_UNSPEC;
@@ -5192,15 +5266,27 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
                 put_net(net);
         }
  
+out:
+       return sk;
+}
+
+static struct sock *
+__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+               struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+               u64 flags)
+{
+       struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
+                                          ifindex, proto, netns_id, flags);
+
         if (sk)
                 sk = sk_to_full_sk(sk);
-out:
-       return (unsigned long) sk;
+
+       return sk;
  }
  
-static unsigned long
-bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
-             u8 proto, u64 netns_id, u64 flags)
+static struct sock *
+bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+              u8 proto, u64 netns_id, u64 flags)
  {
         struct net *caller_net;
         int ifindex;
@@ -5213,14 +5299,47 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
                 ifindex = 0;
         }
  
-       return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex,
-                             proto, netns_id, flags);
+       return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
+                               netns_id, flags);
+}
+
+static struct sock *
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+             u8 proto, u64 netns_id, u64 flags)
+{
+       struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
+                                        flags);
+
+       if (sk)
+               sk = sk_to_full_sk(sk);
+
+       return sk;
+}
+
+BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
+          struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+       return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
+                                            netns_id, flags);
  }
  
+static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
+       .func           = bpf_skc_lookup_tcp,
+       .gpl_only       = false,
+       .pkt_access     = true,
+       .ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+       .arg4_type      = ARG_ANYTHING,
+       .arg5_type      = ARG_ANYTHING,
+};
+
  BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
            struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
  {
-       return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags);
+       return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
+                                           netns_id, flags);
  }
  
  static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
@@ -5238,7 +5357,8 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
  BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
            struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
  {
-       return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags);
+       return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
+                                           netns_id, flags);
  }
  
  static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
@@ -5273,8 +5393,9 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
         struct net *caller_net = dev_net(ctx->rxq->dev);
         int ifindex = ctx->rxq->dev->ifindex;
  
-       return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
-                             IPPROTO_UDP, netns_id, flags);
+       return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+                                             ifindex, IPPROTO_UDP, netns_id,
+                                             flags);
  }
  
  static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
@@ -5289,14 +5410,38 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
         .arg5_type      = ARG_ANYTHING,
  };
  
+BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
+          struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+       struct net *caller_net = dev_net(ctx->rxq->dev);
+       int ifindex = ctx->rxq->dev->ifindex;
+
+       return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
+                                              ifindex, IPPROTO_TCP, netns_id,
+                                              flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
+       .func           = bpf_xdp_skc_lookup_tcp,
+       .gpl_only       = false,
+       .pkt_access     = true,
+       .ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+       .arg4_type      = ARG_ANYTHING,
+       .arg5_type      = ARG_ANYTHING,
+};
+
  BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
            struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
  {
         struct net *caller_net = dev_net(ctx->rxq->dev);
         int ifindex = ctx->rxq->dev->ifindex;
  
-       return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
-                             IPPROTO_TCP, netns_id, flags);
+       return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+                                             ifindex, IPPROTO_TCP, netns_id,
+                                             flags);
  }
  
  static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
@@ -5311,11 +5456,31 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
         .arg5_type      = ARG_ANYTHING,
  };
  
+BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+          struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+       return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
+                                              sock_net(ctx->sk), 0,
+                                              IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
+       .func           = bpf_sock_addr_skc_lookup_tcp,
+       .gpl_only       = false,
+       .ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+       .arg4_type      = ARG_ANYTHING,
+       .arg5_type      = ARG_ANYTHING,
+};
+
  BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
            struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
  {
-       return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
-                              IPPROTO_TCP, netns_id, flags);
+       return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+                                             sock_net(ctx->sk), 0, IPPROTO_TCP,
+                                             netns_id, flags);
  }
  
  static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
@@ -5332,8 +5497,9 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
  BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
            struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
  {
-       return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
-                              IPPROTO_UDP, netns_id, flags);
+       return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+                                             sock_net(ctx->sk), 0, IPPROTO_UDP,
+                                             netns_id, flags);
  }
  
  static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
@@ -5461,6 +5627,74 @@ static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
         .ret_type       = RET_INTEGER,
         .arg1_type      = ARG_PTR_TO_CTX,
  };
+
+BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+          struct tcphdr *, th, u32, th_len)
+{
+#ifdef CONFIG_SYN_COOKIES
+       u32 cookie;
+       int ret;
+
+       if (unlikely(th_len < sizeof(*th)))
+               return -EINVAL;
+
+       /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
+       if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+               return -EINVAL;
+
+       if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+               return -EINVAL;
+
+       if (!th->ack || th->rst || th->syn)
+               return -ENOENT;
+
+       if (tcp_synq_no_recent_overflow(sk))
+               return -ENOENT;
+
+       cookie = ntohl(th->ack_seq) - 1;
+
+       switch (sk->sk_family) {
+       case AF_INET:
+               if (unlikely(iph_len < sizeof(struct iphdr)))
+                       return -EINVAL;
+
+               ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
+               break;
+
+#if IS_BUILTIN(CONFIG_IPV6)
+       case AF_INET6:
+               if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+                       return -EINVAL;
+
+               ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
+               break;
+#endif /* CONFIG_IPV6 */
+
+       default:
+               return -EPROTONOSUPPORT;
+       }
+
+       if (ret > 0)
+               return 0;
+
+       return -ENOENT;
+#else
+       return -ENOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
+       .func           = bpf_tcp_check_syncookie,
+       .gpl_only       = true,
+       .pkt_access     = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+       .arg4_type      = ARG_PTR_TO_MEM,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
  #endif /* CONFIG_INET */
  
  bool bpf_helper_changes_pkt_data(void *func)
@@ -5586,6 +5820,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_sock_addr_sk_lookup_udp_proto;
         case BPF_FUNC_sk_release:
                 return &bpf_sk_release_proto;
+       case BPF_FUNC_skc_lookup_tcp:
+               return &bpf_sock_addr_skc_lookup_tcp_proto;
  #endif /* CONFIG_INET */
         default:
                 return bpf_base_func_proto(func_id);
@@ -5719,6 +5955,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_tcp_sock_proto;
         case BPF_FUNC_get_listener_sock:
                 return &bpf_get_listener_sock_proto;
+       case BPF_FUNC_skc_lookup_tcp:
+               return &bpf_skc_lookup_tcp_proto;
+       case BPF_FUNC_tcp_check_syncookie:
+               return &bpf_tcp_check_syncookie_proto;
+       case BPF_FUNC_skb_ecn_set_ce:
+               return &bpf_skb_ecn_set_ce_proto;
  #endif
         default:
                 return bpf_base_func_proto(func_id);
@@ -5754,6 +5996,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_xdp_sk_lookup_tcp_proto;
         case BPF_FUNC_sk_release:
                 return &bpf_sk_release_proto;
+       case BPF_FUNC_skc_lookup_tcp:
+               return &bpf_xdp_skc_lookup_tcp_proto;
+       case BPF_FUNC_tcp_check_syncookie:
+               return &bpf_tcp_check_syncookie_proto;
  #endif
         default:
                 return bpf_base_func_proto(func_id);
@@ -5846,6 +6092,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_sk_lookup_udp_proto;
         case BPF_FUNC_sk_release:
                 return &bpf_sk_release_proto;
+       case BPF_FUNC_skc_lookup_tcp:
+               return &bpf_skc_lookup_tcp_proto;
  #endif
         default:
                 return bpf_base_func_proto(func_id);
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore

index dbb817dbacfcaefd42daa9d55ca04addb05fc83a..59e40998e2493ba75cc72d112404ddfbd1df5655 100644 (file)
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -44,5 +44,6 @@ xdp_redirect_cpu
  xdp_redirect_map
  xdp_router_ipv4
  xdp_rxq_info
+xdp_sample_pkts
  xdp_tx_iptunnel
  xdpsock
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 929c8e537a14a517c0a3c7ca5b6b15353d622c30..837024512bafd92c3773282ac5362d826fc93502 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1478,13 +1478,27 @@ union bpf_attr {
   *             Grow or shrink the room for data in the packet associated to
   *             *skb* by *len_diff*, and according to the selected *mode*.
   *
- *             There is a single supported mode at this time:
+ *             There are two supported modes at this time:
+ *
+ *             * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ *               (room space is added or removed below the layer 2 header).
   *
   *             * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
   *               (room space is added or removed below the layer 3 header).
   *
- *             All values for *flags* are reserved for future usage, and must
- *             be left at zero.
+ *             The following flags are supported at this time:
+ *
+ *             * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ *               Adjusting mss in this way is not allowed for datagrams.
+ *
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
+ *               Any new space is reserved to hold a tunnel header.
+ *               Configure skb offsets and other fields accordingly.
+ *
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
+ *               Use with ENCAP_L3 flags to further specify the tunnel type.
   *
   *             A call to this helper is susceptible to change the underlaying
   *             packet buffer. Therefore, at load time, all checks on pointers
@@ -2431,6 +2445,38 @@ union bpf_attr {
   *     Return
   *             A **struct bpf_sock** pointer on success, or **NULL** in
   *             case of failure.
+ *
+ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ *     Description
+ *             Look for TCP socket matching *tuple*, optionally in a child
+ *             network namespace *netns*. The return value must be checked,
+ *             and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ *             This function is identical to bpf_sk_lookup_tcp, except that it
+ *             also returns timewait or request sockets. Use bpf_sk_fullsock
+ *             or bpf_tcp_socket to access the full structure.
+ *
+ *             This helper is available only if the kernel was compiled with
+ *             **CONFIG_NET** configuration option.
+ *     Return
+ *             Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ *             For sockets with reuseport option, the **struct bpf_sock**
+ *             result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ *     Description
+ *             Check whether iph and th contain a valid SYN cookie ACK for
+ *             the listening socket in sk.
+ *
+ *             iph points to the start of the IPv4 or IPv6 header, while
+ *             iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
+ *
+ *             th points to the start of the TCP header, while th_len contains
+ *             sizeof(struct tcphdr).
+ *
+ *     Return
+ *             0 if iph and th are a valid SYN cookie ACK, or a negative error
+ *             otherwise.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -2531,7 +2577,9 @@ union bpf_attr {
         FN(sk_fullsock),                \
         FN(tcp_sock),                   \
         FN(skb_ecn_set_ce),             \
-       FN(get_listener_sock),
+       FN(get_listener_sock),          \
+       FN(skc_lookup_tcp),             \
+       FN(tcp_check_syncookie),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@ -2590,9 +2638,18 @@ enum bpf_func_id {
  /* Current network namespace */
  #define BPF_F_CURRENT_NETNS            (-1L)
  
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO       (1ULL << 0)
+
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4   (1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6   (1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE    (1ULL << 3)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP    (1ULL << 4)
+
  /* Mode for BPF_FUNC_skb_adjust_room helper. */
  enum bpf_adj_room_mode {
         BPF_ADJ_ROOM_NET,
+       BPF_ADJ_ROOM_MAC,
  };
  
  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore

index 3b74d23fffabe46e5fac0f78b4c28be5dce6d13c..41e8a689aa77744e91282bd14a98c9d16a20dc2a 100644 (file)
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -30,4 +30,5 @@ test_netcnt
  test_section_names
  test_tcpnotify_user
  test_libbpf
+test_tcp_check_syncookie_user
  alu32
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index 2aed37ea61a4cba8f2e871686d7a94590cac3326..77b73b892136be4f96fa772ad9e24c9d8a77544c 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -51,7 +51,10 @@ TEST_PROGS := test_kmod.sh \
         test_skb_cgroup_id.sh \
         test_flow_dissector.sh \
         test_xdp_vlan.sh \
-       test_lwt_ip_encap.sh
+       test_lwt_ip_encap.sh \
+       test_tcp_check_syncookie.sh \
+       test_tc_tunnel.sh \
+       test_tc_edt.sh
  
  TEST_PROGS_EXTENDED := with_addr.sh \
         with_tunnels.sh \
@@ -60,7 +63,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
  
  # Compile but not part of 'make run_tests'
  TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
-       flow_dissector_load test_flow_dissector
+       flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user
  
  include ../lib.mk
  
@@ -69,7 +72,7 @@ TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
  all: $(TEST_CUSTOM_PROGS)
  
  $(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c
-       $(CC) -o $@ -static $< -Wl,--build-id
+       $(CC) -o $@ $< -Wl,--build-id
  
  BPFOBJ := $(OUTPUT)/libbpf.a
  
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h

index c81fc350f7ad46ad60d53ac3dd8121059020f9a6..97d140961438fa5171175fa463a04301105771c6 100644 (file)
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -159,6 +159,11 @@ static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
                                              int size, unsigned long long netns_id,
                                              unsigned long long flags) =
         (void *) BPF_FUNC_sk_lookup_tcp;
+static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx,
+                                            struct bpf_sock_tuple *tuple,
+                                            int size, unsigned long long netns_id,
+                                            unsigned long long flags) =
+       (void *) BPF_FUNC_skc_lookup_tcp;
  static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
                                              struct bpf_sock_tuple *tuple,
                                              int size, unsigned long long netns_id,
@@ -184,6 +189,9 @@ static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
         (void *) BPF_FUNC_get_listener_sock;
  static int (*bpf_skb_ecn_set_ce)(void *ctx) =
         (void *) BPF_FUNC_skb_ecn_set_ce;
+static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk,
+           void *ip, int ip_len, void *tcp, int tcp_len) =
+       (void *) BPF_FUNC_tcp_check_syncookie;
  
  /* llvm builtin functions that eBPF C program may use to
   * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -274,6 +282,9 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
  #elif defined(__TARGET_ARCH_s930x)
         #define bpf_target_s930x
         #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm)
+       #define bpf_target_arm
+       #define bpf_target_defined
  #elif defined(__TARGET_ARCH_arm64)
         #define bpf_target_arm64
         #define bpf_target_defined
@@ -296,6 +307,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
         #define bpf_target_x86
  #elif defined(__s390x__)
         #define bpf_target_s930x
+#elif defined(__arm__)
+       #define bpf_target_arm
  #elif defined(__aarch64__)
         #define bpf_target_arm64
  #elif defined(__mips__)
@@ -333,6 +346,19 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
  #define PT_REGS_SP(x) ((x)->gprs[15])
  #define PT_REGS_IP(x) ((x)->psw.addr)
  
+#elif defined(bpf_target_arm)
+
+#define PT_REGS_PARM1(x) ((x)->uregs[0])
+#define PT_REGS_PARM2(x) ((x)->uregs[1])
+#define PT_REGS_PARM3(x) ((x)->uregs[2])
+#define PT_REGS_PARM4(x) ((x)->uregs[3])
+#define PT_REGS_PARM5(x) ((x)->uregs[4])
+#define PT_REGS_RET(x) ((x)->uregs[14])
+#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->uregs[0])
+#define PT_REGS_SP(x) ((x)->uregs[13])
+#define PT_REGS_IP(x) ((x)->uregs[12])
+
  #elif defined(bpf_target_arm64)
  
  #define PT_REGS_PARM1(x) ((x)->regs[0])
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config

index 37f947ec44ed91533489572b2bd053ded3f080cd..a42f4fc4dc11f6f1a4319847599ff9fb2728b32b 100644 (file)
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -23,3 +23,5 @@ CONFIG_LWTUNNEL=y
  CONFIG_BPF_STREAM_PARSER=y
  CONFIG_XDP_SOCKETS=y
  CONFIG_FTRACE_SYSCALLS=y
+CONFIG_IPV6_TUNNEL=y
+CONFIG_IPV6_GRE=y
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c

index 8a114bb1c379040b81bd70c87e10ab8fd9401af1..1c1a2f75f3d828da1288ecaa52657851c0600be8 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -1,13 +1,25 @@
  // SPDX-License-Identifier: GPL-2.0
  #include <test_progs.h>
  
+static __u64 read_perf_max_sample_freq(void)
+{
+       __u64 sample_freq = 5000; /* fallback to 5000 on error */
+       FILE *f;
+
+       f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
+       if (f == NULL)
+               return sample_freq;
+       fscanf(f, "%llu", &sample_freq);
+       fclose(f);
+       return sample_freq;
+}
+
  void test_stacktrace_build_id_nmi(void)
  {
         int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
         const char *file = "./test_stacktrace_build_id.o";
         int err, pmu_fd, prog_fd;
         struct perf_event_attr attr = {
-               .sample_freq = 5000,
                 .freq = 1,
                 .type = PERF_TYPE_HARDWARE,
                 .config = PERF_COUNT_HW_CPU_CYCLES,
@@ -20,6 +32,8 @@ void test_stacktrace_build_id_nmi(void)
         int build_id_matches = 0;
         int retry = 1;
  
+       attr.sample_freq = read_perf_max_sample_freq();
+
  retry:
         err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
         if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c

new file mode 100644 (file)

index 0000000..3af64c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* the maximum delay we are willing to add (drop packets beyond that) */
+#define TIME_HORIZON_NS (2000 * 1000 * 1000)
+#define NS_PER_SEC 1000000000
+#define ECN_HORIZON_NS 5000000
+#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
+
+/* flow_key => last_tstamp timestamp used */
+struct bpf_map_def SEC("maps") flow_map = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(uint32_t),
+       .value_size = sizeof(uint64_t),
+       .max_entries = 1,
+};
+
+static inline int throttle_flow(struct __sk_buff *skb)
+{
+       int key = 0;
+       uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key);
+       uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC /
+                       THROTTLE_RATE_BPS;
+       uint64_t now = bpf_ktime_get_ns();
+       uint64_t tstamp, next_tstamp = 0;
+
+       if (last_tstamp)
+               next_tstamp = *last_tstamp + delay_ns;
+
+       tstamp = skb->tstamp;
+       if (tstamp < now)
+               tstamp = now;
+
+       /* should we throttle? */
+       if (next_tstamp <= tstamp) {
+               if (bpf_map_update_elem(&flow_map, &key, &tstamp, BPF_ANY))
+                       return TC_ACT_SHOT;
+               return TC_ACT_OK;
+       }
+
+       /* do not queue past the time horizon */
+       if (next_tstamp - now >= TIME_HORIZON_NS)
+               return TC_ACT_SHOT;
+
+       /* set ecn bit, if needed */
+       if (next_tstamp - now >= ECN_HORIZON_NS)
+               bpf_skb_ecn_set_ce(skb);
+
+       if (bpf_map_update_elem(&flow_map, &key, &next_tstamp, BPF_EXIST))
+               return TC_ACT_SHOT;
+       skb->tstamp = next_tstamp;
+
+       return TC_ACT_OK;
+}
+
+static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp)
+{
+       void *data_end = (void *)(long)skb->data_end;
+
+       /* drop malformed packets */
+       if ((void *)(tcp + 1) > data_end)
+               return TC_ACT_SHOT;
+
+       if (tcp->dest == bpf_htons(9000))
+               return throttle_flow(skb);
+
+       return TC_ACT_OK;
+}
+
+static inline int handle_ipv4(struct __sk_buff *skb)
+{
+       void *data_end = (void *)(long)skb->data_end;
+       void *data = (void *)(long)skb->data;
+       struct iphdr *iph;
+       uint32_t ihl;
+
+       /* drop malformed packets */
+       if (data + sizeof(struct ethhdr) > data_end)
+               return TC_ACT_SHOT;
+       iph = (struct iphdr *)(data + sizeof(struct ethhdr));
+       if ((void *)(iph + 1) > data_end)
+               return TC_ACT_SHOT;
+       ihl = iph->ihl * 4;
+       if (((void *)iph) + ihl > data_end)
+               return TC_ACT_SHOT;
+
+       if (iph->protocol == IPPROTO_TCP)
+               return handle_tcp(skb, (struct tcphdr *)(((void *)iph) + ihl));
+
+       return TC_ACT_OK;
+}
+
+SEC("cls_test") int tc_prog(struct __sk_buff *skb)
+{
+       if (skb->protocol == bpf_htons(ETH_P_IP))
+               return handle_ipv4(skb);
+
+       return TC_ACT_OK;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c

new file mode 100644 (file)

index 0000000..f541c2d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* In-place tunneling */
+
+#include <stdbool.h>
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <linux/types.h>
+
+#include "bpf_endian.h"
+#include "bpf_helpers.h"
+
+static const int cfg_port = 8000;
+
+struct grev4hdr {
+       struct iphdr ip;
+       __be16 flags;
+       __be16 protocol;
+} __attribute__((packed));
+
+struct grev6hdr {
+       struct ipv6hdr ip;
+       __be16 flags;
+       __be16 protocol;
+} __attribute__((packed));
+
+static __always_inline void set_ipv4_csum(struct iphdr *iph)
+{
+       __u16 *iph16 = (__u16 *)iph;
+       __u32 csum;
+       int i;
+
+       iph->check = 0;
+
+#pragma clang loop unroll(full)
+       for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
+               csum += *iph16++;
+
+       iph->check = ~((csum & 0xffff) + (csum >> 16));
+}
+
+static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
+{
+       struct grev4hdr h_outer;
+       struct iphdr iph_inner;
+       struct tcphdr tcph;
+       __u64 flags;
+       int olen;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+                              sizeof(iph_inner)) < 0)
+               return TC_ACT_OK;
+
+       /* filter only packets we want */
+       if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
+               return TC_ACT_OK;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
+                              &tcph, sizeof(tcph)) < 0)
+               return TC_ACT_OK;
+
+       if (tcph.dest != __bpf_constant_htons(cfg_port))
+               return TC_ACT_OK;
+
+       flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
+       if (with_gre) {
+               flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
+               olen = sizeof(h_outer);
+       } else {
+               olen = sizeof(h_outer.ip);
+       }
+
+       /* add room between mac and network header */
+       if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
+               return TC_ACT_SHOT;
+
+       /* prepare new outer network header */
+       h_outer.ip = iph_inner;
+       h_outer.ip.tot_len = bpf_htons(olen +
+                                     bpf_htons(h_outer.ip.tot_len));
+       if (with_gre) {
+               h_outer.ip.protocol = IPPROTO_GRE;
+               h_outer.protocol = bpf_htons(ETH_P_IP);
+               h_outer.flags = 0;
+       } else {
+               h_outer.ip.protocol = IPPROTO_IPIP;
+       }
+
+       set_ipv4_csum((void *)&h_outer.ip);
+
+       /* store new outer network header */
+       if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
+                               BPF_F_INVALIDATE_HASH) < 0)
+               return TC_ACT_SHOT;
+
+       return TC_ACT_OK;
+}
+
+static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
+{
+       struct ipv6hdr iph_inner;
+       struct grev6hdr h_outer;
+       struct tcphdr tcph;
+       __u64 flags;
+       int olen;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+                              sizeof(iph_inner)) < 0)
+               return TC_ACT_OK;
+
+       /* filter only packets we want */
+       if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
+                              &tcph, sizeof(tcph)) < 0)
+               return TC_ACT_OK;
+
+       if (tcph.dest != __bpf_constant_htons(cfg_port))
+               return TC_ACT_OK;
+
+       flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
+       if (with_gre) {
+               flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
+               olen = sizeof(h_outer);
+       } else {
+               olen = sizeof(h_outer.ip);
+       }
+
+
+       /* add room between mac and network header */
+       if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
+               return TC_ACT_SHOT;
+
+       /* prepare new outer network header */
+       h_outer.ip = iph_inner;
+       h_outer.ip.payload_len = bpf_htons(olen +
+                                          bpf_ntohs(h_outer.ip.payload_len));
+       if (with_gre) {
+               h_outer.ip.nexthdr = IPPROTO_GRE;
+               h_outer.protocol = bpf_htons(ETH_P_IPV6);
+               h_outer.flags = 0;
+       } else {
+               h_outer.ip.nexthdr = IPPROTO_IPV6;
+       }
+
+       /* store new outer network header */
+       if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
+                               BPF_F_INVALIDATE_HASH) < 0)
+               return TC_ACT_SHOT;
+
+       return TC_ACT_OK;
+}
+
+SEC("encap_ipip")
+int __encap_ipip(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+               return encap_ipv4(skb, false);
+       else
+               return TC_ACT_OK;
+}
+
+SEC("encap_gre")
+int __encap_gre(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+               return encap_ipv4(skb, true);
+       else
+               return TC_ACT_OK;
+}
+
+SEC("encap_ip6tnl")
+int __encap_ip6tnl(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+               return encap_ipv6(skb, false);
+       else
+               return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre")
+int __encap_ip6gre(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+               return encap_ipv6(skb, true);
+       else
+               return TC_ACT_OK;
+}
+
+static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
+{
+       char buf[sizeof(struct grev6hdr)];
+       int olen;
+
+       switch (proto) {
+       case IPPROTO_IPIP:
+       case IPPROTO_IPV6:
+               olen = len;
+               break;
+       case IPPROTO_GRE:
+               olen = len + 4 /* gre hdr */;
+               break;
+       default:
+               return TC_ACT_OK;
+       }
+
+       if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
+                               BPF_F_ADJ_ROOM_FIXED_GSO))
+               return TC_ACT_SHOT;
+
+       return TC_ACT_OK;
+}
+
+static int decap_ipv4(struct __sk_buff *skb)
+{
+       struct iphdr iph_outer;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
+                              sizeof(iph_outer)) < 0)
+               return TC_ACT_OK;
+
+       if (iph_outer.ihl != 5)
+               return TC_ACT_OK;
+
+       return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
+                             iph_outer.protocol);
+}
+
+static int decap_ipv6(struct __sk_buff *skb)
+{
+       struct ipv6hdr iph_outer;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
+                              sizeof(iph_outer)) < 0)
+               return TC_ACT_OK;
+
+       return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
+                             iph_outer.nexthdr);
+}
+
+SEC("decap")
+int decap_f(struct __sk_buff *skb)
+{
+       switch (skb->protocol) {
+       case __bpf_constant_htons(ETH_P_IP):
+               return decap_ipv4(skb);
+       case __bpf_constant_htons(ETH_P_IPV6):
+               return decap_ipv6(skb);
+       default:
+               /* does not match, ignore */
+               return TC_ACT_OK;
+       }
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c

new file mode 100644 (file)

index 0000000..1ab095b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+// Copyright (c) 2019 Cloudflare
+
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <sys/socket.h>
+#include <linux/tcp.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct bpf_map_def SEC("maps") results = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(__u64),
+       .max_entries = 1,
+};
+
+static __always_inline void check_syncookie(void *ctx, void *data,
+                                           void *data_end)
+{
+       struct bpf_sock_tuple tup;
+       struct bpf_sock *sk;
+       struct ethhdr *ethh;
+       struct iphdr *ipv4h;
+       struct ipv6hdr *ipv6h;
+       struct tcphdr *tcph;
+       int ret;
+       __u32 key = 0;
+       __u64 value = 1;
+
+       ethh = data;
+       if (ethh + 1 > data_end)
+               return;
+
+       switch (bpf_ntohs(ethh->h_proto)) {
+       case ETH_P_IP:
+               ipv4h = data + sizeof(struct ethhdr);
+               if (ipv4h + 1 > data_end)
+                       return;
+
+               if (ipv4h->ihl != 5)
+                       return;
+
+               tcph = data + sizeof(struct ethhdr) + sizeof(struct iphdr);
+               if (tcph + 1 > data_end)
+                       return;
+
+               tup.ipv4.saddr = ipv4h->saddr;
+               tup.ipv4.daddr = ipv4h->daddr;
+               tup.ipv4.sport = tcph->source;
+               tup.ipv4.dport = tcph->dest;
+
+               sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv4),
+                                       BPF_F_CURRENT_NETNS, 0);
+               if (!sk)
+                       return;
+
+               if (sk->state != BPF_TCP_LISTEN)
+                       goto release;
+
+               ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h),
+                                             tcph, sizeof(*tcph));
+               break;
+
+       case ETH_P_IPV6:
+               ipv6h = data + sizeof(struct ethhdr);
+               if (ipv6h + 1 > data_end)
+                       return;
+
+               if (ipv6h->nexthdr != IPPROTO_TCP)
+                       return;
+
+               tcph = data + sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+               if (tcph + 1 > data_end)
+                       return;
+
+               memcpy(tup.ipv6.saddr, &ipv6h->saddr, sizeof(tup.ipv6.saddr));
+               memcpy(tup.ipv6.daddr, &ipv6h->daddr, sizeof(tup.ipv6.daddr));
+               tup.ipv6.sport = tcph->source;
+               tup.ipv6.dport = tcph->dest;
+
+               sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv6),
+                                       BPF_F_CURRENT_NETNS, 0);
+               if (!sk)
+                       return;
+
+               if (sk->state != BPF_TCP_LISTEN)
+                       goto release;
+
+               ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h),
+                                             tcph, sizeof(*tcph));
+               break;
+
+       default:
+               return;
+       }
+
+       if (ret == 0)
+               bpf_map_update_elem(&results, &key, &value, 0);
+
+release:
+       bpf_sk_release(sk);
+}
+
+SEC("clsact/check_syncookie")
+int check_syncookie_clsact(struct __sk_buff *skb)
+{
+       check_syncookie(skb, (void *)(long)skb->data,
+                       (void *)(long)skb->data_end);
+       return TC_ACT_OK;
+}
+
+SEC("xdp/check_syncookie")
+int check_syncookie_xdp(struct xdp_md *ctx)
+{
+       check_syncookie(ctx, (void *)(long)ctx->data,
+                       (void *)(long)ctx->data_end);
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh

new file mode 100755 (executable)

index 0000000..f38567e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tc_edt.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test installs a TC bpf program that throttles a TCP flow
+# with dst port = 9000 down to 5MBps. Then it measures actual
+# throughput of the flow.
+
+if [[ $EUID -ne 0 ]]; then
+       echo "This script must be run as root"
+       echo "FAIL"
+       exit 1
+fi
+
+# check that nc, dd, and timeout are present
+command -v nc >/dev/null 2>&1 || \
+       { echo >&2 "nc is not available"; exit 1; }
+command -v dd >/dev/null 2>&1 || \
+       { echo >&2 "nc is not available"; exit 1; }
+command -v timeout >/dev/null 2>&1 || \
+       { echo >&2 "timeout is not available"; exit 1; }
+
+readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
+readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
+
+readonly IP_SRC="172.16.1.100"
+readonly IP_DST="172.16.2.100"
+
+cleanup()
+{
+       ip netns del ${NS_SRC}
+       ip netns del ${NS_DST}
+}
+
+trap cleanup EXIT
+
+set -e  # exit on error
+
+ip netns add "${NS_SRC}"
+ip netns add "${NS_DST}"
+ip link add veth_src type veth peer name veth_dst
+ip link set veth_src netns ${NS_SRC}
+ip link set veth_dst netns ${NS_DST}
+
+ip -netns ${NS_SRC} addr add ${IP_SRC}/24  dev veth_src
+ip -netns ${NS_DST} addr add ${IP_DST}/24  dev veth_dst
+
+ip -netns ${NS_SRC} link set dev veth_src up
+ip -netns ${NS_DST} link set dev veth_dst up
+
+ip -netns ${NS_SRC} route add ${IP_DST}/32  dev veth_src
+ip -netns ${NS_DST} route add ${IP_SRC}/32  dev veth_dst
+
+# set up TC on TX
+ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
+ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
+ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
+       bpf da obj test_tc_edt.o sec cls_test
+
+
+# start the listener
+ip netns exec ${NS_DST} bash -c \
+       "nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &"
+declare -i NC_PID=$!
+sleep 1
+
+declare -ir TIMEOUT=20
+declare -ir EXPECTED_BPS=5000000
+
+# run the load, capture RX bytes on DST
+declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \
+       cat /sys/class/net/veth_dst/statistics/rx_bytes )
+
+set +e
+ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \
+       bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
+set -e
+
+declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \
+       cat /sys/class/net/veth_dst/statistics/rx_bytes )
+
+declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT ))
+
+echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \
+       awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n",
+               $1, ($2-$3)*100.0/$3}'
+
+# Pass the test if the actual bps is within 1% of the expected bps.
+# The difference is usually about 0.1% on a 20-sec test, and ==> zero
+# the longer the test runs.
+declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \
+        awk 'function abs(x){return ((x < 0.0) ? -x : x)}
+             {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" }
+               else { print "0"} }' )
+if [ "${RES}" == "0" ] ; then
+       echo "PASS"
+else
+       echo "FAIL"
+       exit 1
+fi
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh

new file mode 100755 (executable)

index 0000000..c805adb
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# In-place tunneling
+
+# must match the port that the bpf program filters on
+readonly port=8000
+
+readonly ns_prefix="ns-$$-"
+readonly ns1="${ns_prefix}1"
+readonly ns2="${ns_prefix}2"
+
+readonly ns1_v4=192.168.1.1
+readonly ns2_v4=192.168.1.2
+readonly ns1_v6=fd::1
+readonly ns2_v6=fd::2
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+
+setup() {
+       ip netns add "${ns1}"
+       ip netns add "${ns2}"
+
+       ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \
+             peer name veth2 mtu 1500 netns "${ns2}"
+
+       ip netns exec "${ns1}" ethtool -K veth1 tso off
+
+       ip -netns "${ns1}" link set veth1 up
+       ip -netns "${ns2}" link set veth2 up
+
+       ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
+       ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
+       ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
+       ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
+
+       # clamp route to reserve room for tunnel headers
+       ip -netns "${ns1}" -4 route flush table main
+       ip -netns "${ns1}" -6 route flush table main
+       ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1476 dev veth1
+       ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1456 dev veth1
+
+       sleep 1
+
+       dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
+}
+
+cleanup() {
+       ip netns del "${ns2}"
+       ip netns del "${ns1}"
+
+       if [[ -f "${outfile}" ]]; then
+               rm "${outfile}"
+       fi
+       if [[ -f "${infile}" ]]; then
+               rm "${infile}"
+       fi
+}
+
+server_listen() {
+       ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+       server_pid=$!
+       sleep 0.2
+}
+
+client_connect() {
+       ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}"
+       echo $?
+}
+
+verify_data() {
+       wait "${server_pid}"
+       # sha1sum returns two fields [sha1] [filepath]
+       # convert to bash array and access first elem
+       insum=($(sha1sum ${infile}))
+       outsum=($(sha1sum ${outfile}))
+       if [[ "${insum[0]}" != "${outsum[0]}" ]]; then
+               echo "data mismatch"
+               exit 1
+       fi
+}
+
+set -e
+
+# no arguments: automated test, run all
+if [[ "$#" -eq "0" ]]; then
+       echo "ipip"
+       $0 ipv4 ipip 100
+
+       echo "ip6ip6"
+       $0 ipv6 ip6tnl 100
+
+       echo "ip gre"
+       $0 ipv4 gre 100
+
+       echo "ip6 gre"
+       $0 ipv6 ip6gre 100
+
+       echo "ip gre gso"
+       $0 ipv4 gre 2000
+
+       echo "ip6 gre gso"
+       $0 ipv6 ip6gre 2000
+
+       echo "OK. All tests passed"
+       exit 0
+fi
+
+if [[ "$#" -ne "3" ]]; then
+       echo "Usage: $0"
+       echo "   or: $0 <ipv4|ipv6> <tuntype> <data_len>"
+       exit 1
+fi
+
+case "$1" in
+"ipv4")
+       readonly addr1="${ns1_v4}"
+       readonly addr2="${ns2_v4}"
+       readonly netcat_opt=-4
+       ;;
+"ipv6")
+       readonly addr1="${ns1_v6}"
+       readonly addr2="${ns2_v6}"
+       readonly netcat_opt=-6
+       ;;
+*)
+       echo "unknown arg: $1"
+       exit 1
+       ;;
+esac
+
+readonly tuntype=$2
+readonly datalen=$3
+
+echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}"
+
+trap cleanup EXIT
+
+setup
+
+# basic communication works
+echo "test basic connectivity"
+server_listen
+client_connect
+verify_data
+
+# clientside, insert bpf program to encap all TCP to port ${port}
+# client can no longer connect
+ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
+ip netns exec "${ns1}" tc filter add dev veth1 egress \
+       bpf direct-action object-file ./test_tc_tunnel.o \
+       section "encap_${tuntype}"
+echo "test bpf encap without decap (expect failure)"
+server_listen
+! client_connect
+
+# serverside, insert decap module
+# server is still running
+# client can connect again
+ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
+       remote "${addr1}" local "${addr2}"
+# Because packets are decapped by the tunnel they arrive on testtun0 from
+# the IP stack perspective.  Ensure reverse path filtering is disabled
+# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
+# expected veth2 (veth2 is where 192.168.1.2 is configured).
+ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+# rp needs to be disabled for both all and testtun0 as the rp value is
+# selected as the max of the "all" and device-specific values.
+ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
+ip netns exec "${ns2}" ip link set dev testtun0 up
+echo "test bpf encap with tunnel device decap"
+client_connect
+verify_data
+
+# serverside, use BPF for decap
+ip netns exec "${ns2}" ip link del dev testtun0
+ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
+ip netns exec "${ns2}" tc filter add dev veth2 ingress \
+       bpf direct-action object-file ./test_tc_tunnel.o section decap
+server_listen
+echo "test bpf encap with bpf decap"
+client_connect
+verify_data
+
+echo OK
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh

new file mode 100755 (executable)

index 0000000..d48e517
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Facebook
+# Copyright (c) 2019 Cloudflare
+
+set -eu
+
+wait_for_ip()
+{
+       local _i
+       printf "Wait for IP %s to become available " "$1"
+       for _i in $(seq ${MAX_PING_TRIES}); do
+               printf "."
+               if ns1_exec ping -c 1 -W 1 "$1" >/dev/null 2>&1; then
+                       echo " OK"
+                       return
+               fi
+               sleep 1
+       done
+       echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+       exit 1
+}
+
+get_prog_id()
+{
+       awk '/ id / {sub(/.* id /, "", $0); print($1)}'
+}
+
+ns1_exec()
+{
+       ip netns exec ns1 "$@"
+}
+
+setup()
+{
+       ip netns add ns1
+       ns1_exec ip link set lo up
+
+       ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
+
+       wait_for_ip 127.0.0.1
+       wait_for_ip ::1
+}
+
+cleanup()
+{
+       ip netns del ns1 2>/dev/null || :
+}
+
+main()
+{
+       trap cleanup EXIT 2 3 6 15
+       setup
+
+       printf "Testing clsact..."
+       ns1_exec tc qdisc add dev "${TEST_IF}" clsact
+       ns1_exec tc filter add dev "${TEST_IF}" ingress \
+               bpf obj "${BPF_PROG_OBJ}" sec "${CLSACT_SECTION}" da
+
+       BPF_PROG_ID=$(ns1_exec tc filter show dev "${TEST_IF}" ingress | \
+                     get_prog_id)
+       ns1_exec "${PROG}" "${BPF_PROG_ID}"
+       ns1_exec tc qdisc del dev "${TEST_IF}" clsact
+
+       printf "Testing XDP..."
+       ns1_exec ip link set "${TEST_IF}" xdp \
+               object "${BPF_PROG_OBJ}" section "${XDP_SECTION}"
+       BPF_PROG_ID=$(ns1_exec ip link show "${TEST_IF}" | get_prog_id)
+       ns1_exec "${PROG}" "${BPF_PROG_ID}"
+}
+
+DIR=$(dirname $0)
+TEST_IF=lo
+MAX_PING_TRIES=5
+BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
+CLSACT_SECTION="clsact/check_syncookie"
+XDP_SECTION="xdp/check_syncookie"
+BPF_PROG_ID=0
+PROG="${DIR}/test_tcp_check_syncookie_user"
+
+main
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c

new file mode 100644 (file)

index 0000000..87829c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+// Copyright (c) 2019 Cloudflare
+
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+static int start_server(const struct sockaddr *addr, socklen_t len)
+{
+       int fd;
+
+       fd = socket(addr->sa_family, SOCK_STREAM, 0);
+       if (fd == -1) {
+               log_err("Failed to create server socket");
+               goto out;
+       }
+
+       if (bind(fd, addr, len) == -1) {
+               log_err("Failed to bind server socket");
+               goto close_out;
+       }
+
+       if (listen(fd, 128) == -1) {
+               log_err("Failed to listen on server socket");
+               goto close_out;
+       }
+
+       goto out;
+
+close_out:
+       close(fd);
+       fd = -1;
+out:
+       return fd;
+}
+
+static int connect_to_server(int server_fd)
+{
+       struct sockaddr_storage addr;
+       socklen_t len = sizeof(addr);
+       int fd = -1;
+
+       if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+               log_err("Failed to get server addr");
+               goto out;
+       }
+
+       fd = socket(addr.ss_family, SOCK_STREAM, 0);
+       if (fd == -1) {
+               log_err("Failed to create client socket");
+               goto out;
+       }
+
+       if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+               log_err("Fail to connect to server");
+               goto close_out;
+       }
+
+       goto out;
+
+close_out:
+       close(fd);
+       fd = -1;
+out:
+       return fd;
+}
+
+static int get_map_fd_by_prog_id(int prog_id)
+{
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       __u32 map_ids[1];
+       int prog_fd = -1;
+       int map_fd = -1;
+
+       prog_fd = bpf_prog_get_fd_by_id(prog_id);
+       if (prog_fd < 0) {
+               log_err("Failed to get fd by prog id %d", prog_id);
+               goto err;
+       }
+
+       info.nr_map_ids = 1;
+       info.map_ids = (__u64)(unsigned long)map_ids;
+
+       if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+               log_err("Failed to get info by prog fd %d", prog_fd);
+               goto err;
+       }
+
+       if (!info.nr_map_ids) {
+               log_err("No maps found for prog fd %d", prog_fd);
+               goto err;
+       }
+
+       map_fd = bpf_map_get_fd_by_id(map_ids[0]);
+       if (map_fd < 0)
+               log_err("Failed to get fd by map id %d", map_ids[0]);
+err:
+       if (prog_fd >= 0)
+               close(prog_fd);
+       return map_fd;
+}
+
+static int run_test(int server_fd, int results_fd)
+{
+       int client = -1, srv_client = -1;
+       int ret = 0;
+       __u32 key = 0;
+       __u64 value = 0;
+
+       if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) {
+               log_err("Can't clear results");
+               goto err;
+       }
+
+       client = connect_to_server(server_fd);
+       if (client == -1)
+               goto err;
+
+       srv_client = accept(server_fd, NULL, 0);
+       if (srv_client == -1) {
+               log_err("Can't accept connection");
+               goto err;
+       }
+
+       if (bpf_map_lookup_elem(results_fd, &key, &value) < 0) {
+               log_err("Can't lookup result");
+               goto err;
+       }
+
+       if (value != 1) {
+               log_err("Didn't match syncookie: %llu", value);
+               goto err;
+       }
+
+       goto out;
+
+err:
+       ret = 1;
+out:
+       close(client);
+       close(srv_client);
+       return ret;
+}
+
+int main(int argc, char **argv)
+{
+       struct sockaddr_in addr4;
+       struct sockaddr_in6 addr6;
+       int server = -1;
+       int server_v6 = -1;
+       int results = -1;
+       int err = 0;
+
+       if (argc < 2) {
+               fprintf(stderr, "Usage: %s prog_id\n", argv[0]);
+               exit(1);
+       }
+
+       results = get_map_fd_by_prog_id(atoi(argv[1]));
+       if (results < 0) {
+               log_err("Can't get map");
+               goto err;
+       }
+
+       memset(&addr4, 0, sizeof(addr4));
+       addr4.sin_family = AF_INET;
+       addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+       addr4.sin_port = 0;
+
+       memset(&addr6, 0, sizeof(addr6));
+       addr6.sin6_family = AF_INET6;
+       addr6.sin6_addr = in6addr_loopback;
+       addr6.sin6_port = 0;
+
+       server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
+       if (server == -1)
+               goto err;
+
+       server_v6 = start_server((const struct sockaddr *)&addr6,
+                                sizeof(addr6));
+       if (server_v6 == -1)
+               goto err;
+
+       if (run_test(server, results))
+               goto err;
+
+       if (run_test(server_v6, results))
+               goto err;
+
+       printf("ok\n");
+       goto out;
+err:
+       err = 1;
+out:
+       close(server);
+       close(server_v6);
+       close(results);
+       return err;
+}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c

index 477a9dcf9ffff4b5b47a73759ba00d92d13b0106..19b5d03acc2a83d8425f5356dc53b2165ee96a1a 100644 (file)
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -198,7 +198,7 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
  }
  
  /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
-#define BPF_SK_LOOKUP                                                  \
+#define BPF_SK_LOOKUP(func)                                            \
         /* struct bpf_sock_tuple tuple = {} */                          \
         BPF_MOV64_IMM(BPF_REG_2, 0),                                    \
         BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),                  \
@@ -207,13 +207,13 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
         BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32),                \
         BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40),                \
         BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48),                \
-       /* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */       \
+       /* sk = func(ctx, &tuple, sizeof tuple, 0, 0) */                \
         BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),                           \
         BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),                         \
         BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),        \
         BPF_MOV64_IMM(BPF_REG_4, 0),                                    \
         BPF_MOV64_IMM(BPF_REG_5, 0),                                    \
-       BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp)
+       BPF_EMIT_CALL(BPF_FUNC_ ## func)
  
  /* BPF_DIRECT_PKT_R2 contains 7 instructions, it initializes default return
   * value into 0 and does necessary preparation for direct packet access
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c

index 9de8b7cb4e6df6b5929c915566dff74163efbcf2..db781052758d3774b42cb891b86b09320e3d95d5 100644 (file)
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -7,11 +7,19 @@
  
  #define BUF_SIZE 256
  
+static __attribute__((noinline))
+void urandom_read(int fd, int count)
+{
+       char buf[BUF_SIZE];
+       int i;
+
+       for (i = 0; i < count; ++i)
+               read(fd, buf, BUF_SIZE);
+}
+
  int main(int argc, char *argv[])
  {
         int fd = open("/dev/urandom", O_RDONLY);
-       int i;
-       char buf[BUF_SIZE];
         int count = 4;
  
         if (fd < 0)
@@ -20,8 +28,7 @@ int main(int argc, char *argv[])
         if (argc == 2)
                 count = atoi(argv[1]);
  
-       for (i = 0; i < count; ++i)
-               read(fd, buf, BUF_SIZE);
+       urandom_read(fd, count);
  
         close(fd);
         return 0;
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c

index 923f2110072d6f1f4a124824228c082d43f094d8..ebcbf154c4600d7c9f8fc6d1acc63061cbaaa1df 100644 (file)
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -1,7 +1,18 @@
  {
         "reference tracking: leak potential reference",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .errstr = "Unreleased reference",
+       .result = REJECT,
+},
+{
+       "reference tracking: leak potential reference to sock_common",
+       .insns = {
+       BPF_SK_LOOKUP(skc_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
         BPF_EXIT_INSN(),
         },
@@ -12,7 +23,7 @@
  {
         "reference tracking: leak potential reference on stack",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
         BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
         BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
@@ -26,7 +37,7 @@
  {
         "reference tracking: leak potential reference on stack 2",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
         BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
         BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
@@ -41,7 +52,18 @@
  {
         "reference tracking: zero potential reference",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
+       BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .errstr = "Unreleased reference",
+       .result = REJECT,
+},
+{
+       "reference tracking: zero potential reference to sock_common",
+       .insns = {
+       BPF_SK_LOOKUP(skc_lookup_tcp),
         BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
         BPF_EXIT_INSN(),
         },
@@ -52,7 +74,7 @@
  {
         "reference tracking: copy and zero potential references",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
         BPF_MOV64_IMM(BPF_REG_0, 0),
         BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */
@@ -65,7 +87,7 @@
  {
         "reference tracking: release reference without check",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         /* reference in r0 may be NULL */
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_MOV64_IMM(BPF_REG_2, 0),
@@ -76,10 +98,36 @@
         .errstr = "type=sock_or_null expected=sock",
         .result = REJECT,
  },
+{
+       "reference tracking: release reference to sock_common without check",
+       .insns = {
+       BPF_SK_LOOKUP(skc_lookup_tcp),
+       /* reference in r0 may be NULL */
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_MOV64_IMM(BPF_REG_2, 0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .errstr = "type=sock_common_or_null expected=sock",
+       .result = REJECT,
+},
  {
         "reference tracking: release reference",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = ACCEPT,
+},
+{
+       "reference tracking: release reference to sock_common",
+       .insns = {
+       BPF_SK_LOOKUP(skc_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
         BPF_EMIT_CALL(BPF_FUNC_sk_release),
@@ -91,7 +139,7 @@
  {
         "reference tracking: release reference 2",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
         BPF_EXIT_INSN(),
@@ -104,7 +152,7 @@
  {
         "reference tracking: release reference twice",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
@@ -120,7 +168,7 @@
  {
         "reference tracking: release reference twice inside branch",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */
@@ -147,7 +195,7 @@
         BPF_EXIT_INSN(),
         BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
                     offsetof(struct __sk_buff, mark)),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */
         /* Leak reference in R0 */
         BPF_EXIT_INSN(),
@@ -175,7 +223,7 @@
         BPF_EXIT_INSN(),
         BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
                     offsetof(struct __sk_buff, mark)),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
@@ -193,7 +241,7 @@
  {
         "reference tracking in call: free reference in subprog",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
         BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -211,7 +259,7 @@
  {
         "reference tracking in call: free reference in subprog and outside",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
         BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
@@ -241,7 +289,7 @@
  
         /* subprog 1 */
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_4),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         /* spill unchecked sk_ptr into stack of caller */
         BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
@@ -262,7 +310,7 @@
         BPF_EXIT_INSN(),
  
         /* subprog 1 */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_EXIT_INSN(), /* return sk */
         },
         .prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -291,7 +339,7 @@
         BPF_EXIT_INSN(),
  
         /* subprog 2 */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_EXIT_INSN(),
         },
         .prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -324,7 +372,7 @@
         BPF_EXIT_INSN(),
  
         /* subprog 2 */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_EXIT_INSN(),
         },
         .prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -334,7 +382,7 @@
         "reference tracking: allow LD_ABS",
         .insns = {
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
         BPF_EMIT_CALL(BPF_FUNC_sk_release),
@@ -350,7 +398,7 @@
         "reference tracking: forbid LD_ABS while holding reference",
         .insns = {
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_LD_ABS(BPF_B, 0),
         BPF_LD_ABS(BPF_H, 0),
         BPF_LD_ABS(BPF_W, 0),
@@ -367,7 +415,7 @@
         "reference tracking: allow LD_IND",
         .insns = {
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
         BPF_EMIT_CALL(BPF_FUNC_sk_release),
@@ -384,7 +432,7 @@
         "reference tracking: forbid LD_IND while holding reference",
         .insns = {
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
         BPF_MOV64_IMM(BPF_REG_7, 1),
         BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
@@ -402,7 +450,7 @@
         "reference tracking: check reference or tail call",
         .insns = {
         BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         /* if (sk) bpf_sk_release() */
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
@@ -424,7 +472,7 @@
         "reference tracking: release reference then tail call",
         .insns = {
         BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         /* if (sk) bpf_sk_release() */
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
@@ -446,7 +494,7 @@
         .insns = {
         BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
         /* Look up socket and store in REG_6 */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         /* bpf_tail_call() */
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
         BPF_MOV64_IMM(BPF_REG_3, 2),
@@ -470,7 +518,7 @@
         .insns = {
         BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
         /* Look up socket and store in REG_6 */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
         /* if (!sk) goto end */
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
@@ -492,7 +540,7 @@
  {
         "reference tracking: mangle and release sock_or_null",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
@@ -506,7 +554,7 @@
  {
         "reference tracking: mangle and release sock",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
         BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
@@ -520,7 +568,7 @@
  {
         "reference tracking: access member",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
         BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
@@ -534,7 +582,7 @@
  {
         "reference tracking: write to member",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
@@ -553,7 +601,7 @@
  {
         "reference tracking: invalid 64-bit access of member",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
         BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
@@ -568,7 +616,7 @@
  {
         "reference tracking: access after release",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
         BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
         BPF_EMIT_CALL(BPF_FUNC_sk_release),
@@ -608,7 +656,7 @@
  {
         "reference tracking: use ptr from bpf_tcp_sock() after release",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
         BPF_EXIT_INSN(),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -631,7 +679,7 @@
  {
         "reference tracking: use ptr from bpf_sk_fullsock() after release",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
         BPF_EXIT_INSN(),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -654,7 +702,7 @@
  {
         "reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
         BPF_EXIT_INSN(),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -681,7 +729,7 @@
  {
         "reference tracking: use sk after bpf_sk_release(tp)",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
         BPF_EXIT_INSN(),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -703,7 +751,7 @@
  {
         "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
         BPF_EXIT_INSN(),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -725,7 +773,7 @@
  {
         "reference tracking: bpf_sk_release(listen_sk)",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
         BPF_EXIT_INSN(),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -750,7 +798,7 @@
         /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
         "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)",
         .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
         BPF_EXIT_INSN(),
         BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c

index dbaf5be947b2be8ea3007cae023ef94be600a553..91bb77c24a2ef3bb392ce2763e90d7a05f2af34d 100644 (file)
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -242,7 +242,7 @@
         .insns = {
         BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
         /* struct bpf_sock *sock = bpf_sock_lookup(...); */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
         /* u64 foo; */
         /* void *target = &foo; */
@@ -276,7 +276,7 @@
         .insns = {
         BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
         /* struct bpf_sock *sock = bpf_sock_lookup(...); */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
         /* u64 foo; */
         /* void *target = &foo; */
@@ -307,7 +307,7 @@
         .insns = {
         BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
         /* struct bpf_sock *sock = bpf_sock_lookup(...); */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
         /* u64 foo; */
         /* void *target = &foo; */
@@ -339,7 +339,7 @@
         .insns = {
         BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
         /* struct bpf_sock *sock = bpf_sock_lookup(...); */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
         BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
         /* u64 foo; */
         /* void *target = &foo; */
author	David S. Miller <davem@davemloft.net>
	Wed, 27 Mar 2019 18:19:13 +0000 (11:19 -0700)
committer	David S. Miller <davem@davemloft.net>
	Wed, 27 Mar 2019 18:19:13 +0000 (11:19 -0700)
drivers/net/ethernet/marvell/mvpp2/mvpp2.h		patch \| blob \| history
drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c		patch \| blob \| history
drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h		patch \| blob \| history
drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c		patch \| blob \| history
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c		patch \| blob \| history
include/linux/bpf.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
samples/bpf/.gitignore		patch \| blob \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| history
tools/testing/selftests/bpf/.gitignore		patch \| blob \| history
tools/testing/selftests/bpf/Makefile		patch \| blob \| history
tools/testing/selftests/bpf/bpf_helpers.h		patch \| blob \| history
tools/testing/selftests/bpf/config		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_tc_edt.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_tc_tunnel.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/test_tc_edt.sh	[new file with mode: 0755]	patch \| blob
tools/testing/selftests/bpf/test_tc_tunnel.sh	[new file with mode: 0755]	patch \| blob
tools/testing/selftests/bpf/test_tcp_check_syncookie.sh	[new file with mode: 0755]	patch \| blob
tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/test_verifier.c		patch \| blob \| history
tools/testing/selftests/bpf/urandom_read.c		patch \| blob \| history
tools/testing/selftests/bpf/verifier/ref_tracking.c		patch \| blob \| history
tools/testing/selftests/bpf/verifier/unpriv.c		patch \| blob \| history