]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next...
authorDavid S. Miller <davem@davemloft.net>
Wed, 27 Mar 2019 18:19:13 +0000 (11:19 -0700)
committerDavid S. Miller <davem@davemloft.net>
Wed, 27 Mar 2019 18:19:13 +0000 (11:19 -0700)
Jeff Kirsher says:

====================
100GbE Intel Wired LAN Driver Updates 2019-03-26

This series contains more updates to the ice driver only.

Jeremiah provides his first patch to the Linux kernel to clean up
un-necessary newlines in driver log messages.

Mitch updates the ice driver to use existing status codes in the iavf
driver so that when errors occur, it will not report nonsensical
results.  Adds support for VF admin queue interrupts by programming the
VPINT_MBX_CTL register array.

Brett adds a check for a bit that we set while preparing for a reset, to
ensure we are prepared to do a proper reset.  Also implemented PCI error
handling operations.  Went through and audited the hot path with pahole
and made modifications based on the results since 2 structures were
taking up more space than necessary due to cache alignment issues.
Fixed an issue where when flow control was disabled, the state of flow
control was being displayed as "Unknown".

Anirudh fixes adaptive interrupt moderation changes by adding code that
was missed, that should have been added in the initial patch to add that
support.  Cleaned up a function prototype that was never implemented.
Did additional code cleanup by removing unneeded braces and redundant
code comments.

Akeem fixes an issue that occurs when the VF is attempting to remove the
default LAN/MAC address, which is programmed by the administrator by
updating the error message to explicitly say that the VF cannot change
the MAC programmed by the PF.

Preethi fixes the driver to not fall into the error path when a added
filter already exists, but instead continue to process the rest of the
function and add appropriate checks after adding MAC filters.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
27 files changed:
drivers/net/ethernet/marvell/mvpp2/mvpp2.h
drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
include/linux/bpf.h
include/uapi/linux/bpf.h
kernel/bpf/verifier.c
net/core/filter.c
samples/bpf/.gitignore
tools/include/uapi/linux/bpf.h
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_helpers.h
tools/testing/selftests/bpf/config
tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
tools/testing/selftests/bpf/progs/test_tc_edt.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_tc_tunnel.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_tc_edt.sh [new file with mode: 0755]
tools/testing/selftests/bpf/test_tc_tunnel.sh [new file with mode: 0755]
tools/testing/selftests/bpf/test_tcp_check_syncookie.sh [new file with mode: 0755]
tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/urandom_read.c
tools/testing/selftests/bpf/verifier/ref_tracking.c
tools/testing/selftests/bpf/verifier/unpriv.c

index ff0f4c503f534a792e22114af9395a81d896d4ef..67cce2736806d1054d75ab87ac211f93d210e273 100644 (file)
 #define MVPP2_CLS_FLOW_TBL1_REG                        0x1828
 #define     MVPP2_CLS_FLOW_TBL1_N_FIELDS_MASK  0x7
 #define     MVPP2_CLS_FLOW_TBL1_N_FIELDS(x)    (x)
+#define     MVPP2_CLS_FLOW_TBL1_LU_TYPE(lu)    (((lu) & 0x3f) << 3)
 #define     MVPP2_CLS_FLOW_TBL1_PRIO_MASK      0x3f
 #define     MVPP2_CLS_FLOW_TBL1_PRIO(x)                ((x) << 9)
 #define     MVPP2_CLS_FLOW_TBL1_SEQ_MASK       0x7
 #define MVPP22_CLS_C2_TCAM_DATA2               0x1b18
 #define MVPP22_CLS_C2_TCAM_DATA3               0x1b1c
 #define MVPP22_CLS_C2_TCAM_DATA4               0x1b20
+#define     MVPP22_CLS_C2_LU_TYPE(lu)          ((lu) & 0x3f)
 #define     MVPP22_CLS_C2_PORT_ID(port)                ((port) << 8)
+#define MVPP22_CLS_C2_TCAM_INV                 0x1b24
+#define     MVPP22_CLS_C2_TCAM_INV_BIT         BIT(31)
 #define MVPP22_CLS_C2_HIT_CTR                  0x1b50
 #define MVPP22_CLS_C2_ACT                      0x1b60
 #define     MVPP22_CLS_C2_ACT_RSS_EN(act)      (((act) & 0x3) << 19)
 #define MVPP2_BIT_TO_WORD(bit)         ((bit) / 32)
 #define MVPP2_BIT_IN_WORD(bit)         ((bit) % 32)
 
+#define MVPP2_N_PRS_FLOWS              52
+
 /* RSS constants */
 #define MVPP22_RSS_TABLE_ENTRIES       32
 
@@ -710,6 +716,7 @@ enum mvpp2_prs_l3_cast {
 #define MVPP2_DESC_DMA_MASK    DMA_BIT_MASK(40)
 
 /* Definitions */
+struct mvpp2_dbgfs_entries;
 
 /* Shared Packet Processor resources */
 struct mvpp2 {
@@ -771,6 +778,9 @@ struct mvpp2 {
 
        /* Debugfs root entry */
        struct dentry *dbgfs_dir;
+
+       /* Debugfs entries private data */
+       struct mvpp2_dbgfs_entries *dbgfs_entries;
 };
 
 struct mvpp2_pcpu_stats {
index efdb7a65683576a84806639630fce4d0928defcd..1087974d3b98aefc295b14fe8c4e8546beb5421d 100644 (file)
@@ -22,7 +22,7 @@
        }                                                       \
 }
 
-static struct mvpp2_cls_flow cls_flows[MVPP2_N_FLOWS] = {
+static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
        /* TCP over IPv4 flows, Not fragmented, no vlan tag */
        MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
                       MVPP22_CLS_HEK_IP4_5T,
@@ -429,12 +429,6 @@ static void mvpp2_cls_flow_port_id_sel(struct mvpp2_cls_flow_entry *fe,
                fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_PORT_ID_SEL;
 }
 
-static void mvpp2_cls_flow_seq_set(struct mvpp2_cls_flow_entry *fe, u32 seq)
-{
-       fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_SEQ(MVPP2_CLS_FLOW_TBL1_SEQ_MASK);
-       fe->data[1] |= MVPP2_CLS_FLOW_TBL1_SEQ(seq);
-}
-
 static void mvpp2_cls_flow_last_set(struct mvpp2_cls_flow_entry *fe,
                                    bool is_last)
 {
@@ -454,9 +448,16 @@ static void mvpp2_cls_flow_port_add(struct mvpp2_cls_flow_entry *fe,
        fe->data[0] |= MVPP2_CLS_FLOW_TBL0_PORT_ID(port);
 }
 
+static void mvpp2_cls_flow_lu_type_set(struct mvpp2_cls_flow_entry *fe,
+                                      u8 lu_type)
+{
+       fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK);
+       fe->data[1] |= MVPP2_CLS_FLOW_TBL1_LU_TYPE(lu_type);
+}
+
 /* Initialize the parser entry for the given flow */
 static void mvpp2_cls_flow_prs_init(struct mvpp2 *priv,
-                                   struct mvpp2_cls_flow *flow)
+                                   const struct mvpp2_cls_flow *flow)
 {
        mvpp2_prs_add_flow(priv, flow->flow_id, flow->prs_ri.ri,
                           flow->prs_ri.ri_mask);
@@ -464,7 +465,7 @@ static void mvpp2_cls_flow_prs_init(struct mvpp2 *priv,
 
 /* Initialize the Lookup Id table entry for the given flow */
 static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv,
-                                   struct mvpp2_cls_flow *flow)
+                                   const struct mvpp2_cls_flow *flow)
 {
        struct mvpp2_cls_lookup_entry le;
 
@@ -477,7 +478,7 @@ static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv,
        /* We point on the first lookup in the sequence for the flow, that is
         * the C2 lookup.
         */
-       le.data |= MVPP2_CLS_LKP_FLOW_PTR(MVPP2_FLOW_C2_ENTRY(flow->flow_id));
+       le.data |= MVPP2_CLS_LKP_FLOW_PTR(MVPP2_CLS_FLT_FIRST(flow->flow_id));
 
        /* CLS is always enabled, RSS is enabled/disabled in C2 lookup */
        le.data |= MVPP2_CLS_LKP_TBL_LOOKUP_EN_MASK;
@@ -485,21 +486,86 @@ static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv,
        mvpp2_cls_lookup_write(priv, &le);
 }
 
+static void mvpp2_cls_c2_write(struct mvpp2 *priv,
+                              struct mvpp2_cls_c2_entry *c2)
+{
+       u32 val;
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2->index);
+
+       val = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_INV);
+       if (c2->valid)
+               val &= ~MVPP22_CLS_C2_TCAM_INV_BIT;
+       else
+               val |= MVPP22_CLS_C2_TCAM_INV_BIT;
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_INV, val);
+
+       mvpp2_write(priv, MVPP22_CLS_C2_ACT, c2->act);
+
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR0, c2->attr[0]);
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR1, c2->attr[1]);
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR2, c2->attr[2]);
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR3, c2->attr[3]);
+
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA0, c2->tcam[0]);
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA1, c2->tcam[1]);
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA2, c2->tcam[2]);
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA3, c2->tcam[3]);
+       /* Writing TCAM_DATA4 flushes writes to TCAM_DATA0-4 and INV to HW */
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA4, c2->tcam[4]);
+}
+
+void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
+                      struct mvpp2_cls_c2_entry *c2)
+{
+       u32 val;
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, index);
+
+       c2->index = index;
+
+       c2->tcam[0] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA0);
+       c2->tcam[1] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA1);
+       c2->tcam[2] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA2);
+       c2->tcam[3] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA3);
+       c2->tcam[4] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA4);
+
+       c2->act = mvpp2_read(priv, MVPP22_CLS_C2_ACT);
+
+       c2->attr[0] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR0);
+       c2->attr[1] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR1);
+       c2->attr[2] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR2);
+       c2->attr[3] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR3);
+
+       val = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_INV);
+       c2->valid = !(val & MVPP22_CLS_C2_TCAM_INV_BIT);
+}
+
 /* Initialize the flow table entries for the given flow */
-static void mvpp2_cls_flow_init(struct mvpp2 *priv, struct mvpp2_cls_flow *flow)
+static void mvpp2_cls_flow_init(struct mvpp2 *priv,
+                               const struct mvpp2_cls_flow *flow)
 {
        struct mvpp2_cls_flow_entry fe;
-       int i;
+       int i, pri = 0;
+
+       /* Assign default values to all entries in the flow */
+       for (i = MVPP2_CLS_FLT_FIRST(flow->flow_id);
+            i <= MVPP2_CLS_FLT_LAST(flow->flow_id); i++) {
+               memset(&fe, 0, sizeof(fe));
+               fe.index = i;
+               mvpp2_cls_flow_pri_set(&fe, pri++);
 
-       /* C2 lookup */
-       memset(&fe, 0, sizeof(fe));
-       fe.index = MVPP2_FLOW_C2_ENTRY(flow->flow_id);
+               if (i == MVPP2_CLS_FLT_LAST(flow->flow_id))
+                       mvpp2_cls_flow_last_set(&fe, 1);
+
+               mvpp2_cls_flow_write(priv, &fe);
+       }
+
+       /* RSS config C2 lookup */
+       mvpp2_cls_flow_read(priv, MVPP2_CLS_FLT_C2_RSS_ENTRY(flow->flow_id),
+                           &fe);
 
        mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C2);
        mvpp2_cls_flow_port_id_sel(&fe, true);
-       mvpp2_cls_flow_last_set(&fe, 0);
-       mvpp2_cls_flow_pri_set(&fe, 0);
-       mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_FIRST1);
+       mvpp2_cls_flow_lu_type_set(&fe, MVPP2_CLS_LU_ALL);
 
        /* Add all ports */
        for (i = 0; i < MVPP2_MAX_PORTS; i++)
@@ -509,22 +575,19 @@ static void mvpp2_cls_flow_init(struct mvpp2 *priv, struct mvpp2_cls_flow *flow)
 
        /* C3Hx lookups */
        for (i = 0; i < MVPP2_MAX_PORTS; i++) {
-               memset(&fe, 0, sizeof(fe));
-               fe.index = MVPP2_PORT_FLOW_HASH_ENTRY(i, flow->flow_id);
+               mvpp2_cls_flow_read(priv,
+                                   MVPP2_CLS_FLT_HASH_ENTRY(i, flow->flow_id),
+                                   &fe);
 
+               /* Set a default engine. Will be overwritten when setting the
+                * real HEK parameters
+                */
+               mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C3HA);
                mvpp2_cls_flow_port_id_sel(&fe, true);
-               mvpp2_cls_flow_pri_set(&fe, i + 1);
-               mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_MIDDLE);
                mvpp2_cls_flow_port_add(&fe, BIT(i));
 
                mvpp2_cls_flow_write(priv, &fe);
        }
-
-       /* Update the last entry */
-       mvpp2_cls_flow_last_set(&fe, 1);
-       mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_LAST);
-
-       mvpp2_cls_flow_write(priv, &fe);
 }
 
 /* Adds a field to the Header Extracted Key generation parameters*/
@@ -555,6 +618,9 @@ static int mvpp2_flow_set_hek_fields(struct mvpp2_cls_flow_entry *fe,
 
        for_each_set_bit(i, &hash_opts, MVPP22_CLS_HEK_N_FIELDS) {
                switch (BIT(i)) {
+               case MVPP22_CLS_HEK_OPT_MAC_DA:
+                       field_id = MVPP22_CLS_FIELD_MAC_DA;
+                       break;
                case MVPP22_CLS_HEK_OPT_VLAN:
                        field_id = MVPP22_CLS_FIELD_VLAN;
                        break;
@@ -586,9 +652,9 @@ static int mvpp2_flow_set_hek_fields(struct mvpp2_cls_flow_entry *fe,
        return 0;
 }
 
-struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow)
+const struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow)
 {
-       if (flow >= MVPP2_N_FLOWS)
+       if (flow >= MVPP2_N_PRS_FLOWS)
                return NULL;
 
        return &cls_flows[flow];
@@ -608,21 +674,17 @@ struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow)
 static int mvpp2_port_rss_hash_opts_set(struct mvpp2_port *port, int flow_type,
                                        u16 requested_opts)
 {
+       const struct mvpp2_cls_flow *flow;
        struct mvpp2_cls_flow_entry fe;
-       struct mvpp2_cls_flow *flow;
        int i, engine, flow_index;
        u16 hash_opts;
 
-       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+       for_each_cls_flow_id_with_type(i, flow_type) {
                flow = mvpp2_cls_flow_get(i);
                if (!flow)
                        return -EINVAL;
 
-               if (flow->flow_type != flow_type)
-                       continue;
-
-               flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id,
-                                                       flow->flow_id);
+               flow_index = MVPP2_CLS_FLT_HASH_ENTRY(port->id, flow->flow_id);
 
                mvpp2_cls_flow_read(port->priv, flow_index, &fe);
 
@@ -697,21 +759,17 @@ u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe)
  */
 static u16 mvpp2_port_rss_hash_opts_get(struct mvpp2_port *port, int flow_type)
 {
+       const struct mvpp2_cls_flow *flow;
        struct mvpp2_cls_flow_entry fe;
-       struct mvpp2_cls_flow *flow;
        int i, flow_index;
        u16 hash_opts = 0;
 
-       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+       for_each_cls_flow_id_with_type(i, flow_type) {
                flow = mvpp2_cls_flow_get(i);
                if (!flow)
                        return 0;
 
-               if (flow->flow_type != flow_type)
-                       continue;
-
-               flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id,
-                                                       flow->flow_id);
+               flow_index = MVPP2_CLS_FLT_HASH_ENTRY(port->id, flow->flow_id);
 
                mvpp2_cls_flow_read(port->priv, flow_index, &fe);
 
@@ -723,10 +781,10 @@ static u16 mvpp2_port_rss_hash_opts_get(struct mvpp2_port *port, int flow_type)
 
 static void mvpp2_cls_port_init_flows(struct mvpp2 *priv)
 {
-       struct mvpp2_cls_flow *flow;
+       const struct mvpp2_cls_flow *flow;
        int i;
 
-       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+       for (i = 0; i < MVPP2_N_PRS_FLOWS; i++) {
                flow = mvpp2_cls_flow_get(i);
                if (!flow)
                        break;
@@ -737,47 +795,6 @@ static void mvpp2_cls_port_init_flows(struct mvpp2 *priv)
        }
 }
 
-static void mvpp2_cls_c2_write(struct mvpp2 *priv,
-                              struct mvpp2_cls_c2_entry *c2)
-{
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2->index);
-
-       /* Write TCAM */
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA0, c2->tcam[0]);
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA1, c2->tcam[1]);
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA2, c2->tcam[2]);
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA3, c2->tcam[3]);
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA4, c2->tcam[4]);
-
-       mvpp2_write(priv, MVPP22_CLS_C2_ACT, c2->act);
-
-       mvpp2_write(priv, MVPP22_CLS_C2_ATTR0, c2->attr[0]);
-       mvpp2_write(priv, MVPP22_CLS_C2_ATTR1, c2->attr[1]);
-       mvpp2_write(priv, MVPP22_CLS_C2_ATTR2, c2->attr[2]);
-       mvpp2_write(priv, MVPP22_CLS_C2_ATTR3, c2->attr[3]);
-}
-
-void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
-                      struct mvpp2_cls_c2_entry *c2)
-{
-       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, index);
-
-       c2->index = index;
-
-       c2->tcam[0] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA0);
-       c2->tcam[1] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA1);
-       c2->tcam[2] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA2);
-       c2->tcam[3] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA3);
-       c2->tcam[4] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA4);
-
-       c2->act = mvpp2_read(priv, MVPP22_CLS_C2_ACT);
-
-       c2->attr[0] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR0);
-       c2->attr[1] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR1);
-       c2->attr[2] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR2);
-       c2->attr[3] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR3);
-}
-
 static void mvpp2_port_c2_cls_init(struct mvpp2_port *port)
 {
        struct mvpp2_cls_c2_entry c2;
@@ -791,6 +808,10 @@ static void mvpp2_port_c2_cls_init(struct mvpp2_port *port)
        c2.tcam[4] = MVPP22_CLS_C2_PORT_ID(pmap);
        c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_PORT_ID(pmap));
 
+       /* Match on Lookup Type */
+       c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK));
+       c2.tcam[4] |= MVPP22_CLS_C2_LU_TYPE(MVPP2_CLS_LU_ALL);
+
        /* Update RSS status after matching this entry */
        c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
 
@@ -809,6 +830,8 @@ static void mvpp2_port_c2_cls_init(struct mvpp2_port *port)
        c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
                      MVPP22_CLS_C2_ATTR0_QLOW(ql);
 
+       c2.valid = true;
+
        mvpp2_cls_c2_write(port->priv, &c2);
 }
 
@@ -817,6 +840,7 @@ void mvpp2_cls_init(struct mvpp2 *priv)
 {
        struct mvpp2_cls_lookup_entry le;
        struct mvpp2_cls_flow_entry fe;
+       struct mvpp2_cls_c2_entry c2;
        int index;
 
        /* Enable classifier */
@@ -840,6 +864,14 @@ void mvpp2_cls_init(struct mvpp2 *priv)
                mvpp2_cls_lookup_write(priv, &le);
        }
 
+       /* Clear C2 TCAM engine table */
+       memset(&c2, 0, sizeof(c2));
+       c2.valid = false;
+       for (index = 0; index < MVPP22_CLS_C2_N_ENTRIES; index++) {
+               c2.index = index;
+               mvpp2_cls_c2_write(priv, &c2);
+       }
+
        mvpp2_cls_port_init_flows(priv);
 }
 
@@ -902,12 +934,12 @@ static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port)
        mvpp2_cls_c2_write(port->priv, &c2);
 }
 
-void mvpp22_rss_enable(struct mvpp2_port *port)
+void mvpp22_port_rss_enable(struct mvpp2_port *port)
 {
        mvpp2_rss_port_c2_enable(port);
 }
 
-void mvpp22_rss_disable(struct mvpp2_port *port)
+void mvpp22_port_rss_disable(struct mvpp2_port *port)
 {
        mvpp2_rss_port_c2_disable(port);
 }
@@ -1037,7 +1069,7 @@ int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info)
        return 0;
 }
 
-void mvpp22_rss_port_init(struct mvpp2_port *port)
+void mvpp22_port_rss_init(struct mvpp2_port *port)
 {
        struct mvpp2 *priv = port->priv;
        int i;
index 089f05f298917ae394b9245cdb255259d269b3c7..96304ffc5d49efd8f6c25d245700252931486262 100644 (file)
@@ -71,14 +71,6 @@ enum mvpp2_cls_field_id {
        MVPP22_CLS_FIELD_L4DIP = 0x1e,
 };
 
-enum mvpp2_cls_flow_seq {
-       MVPP2_CLS_FLOW_SEQ_NORMAL = 0,
-       MVPP2_CLS_FLOW_SEQ_FIRST1,
-       MVPP2_CLS_FLOW_SEQ_FIRST2,
-       MVPP2_CLS_FLOW_SEQ_LAST,
-       MVPP2_CLS_FLOW_SEQ_MIDDLE
-};
-
 /* Classifier C2 engine constants */
 #define MVPP22_CLS_C2_TCAM_EN(data)            ((data) << 16)
 
@@ -105,34 +97,25 @@ enum mvpp22_cls_c2_fwd_action {
 
 struct mvpp2_cls_c2_entry {
        u32 index;
+       /* TCAM lookup key */
        u32 tcam[MVPP2_CLS_C2_TCAM_WORDS];
+       /* Actions to perform upon TCAM match */
        u32 act;
+       /* Attributes relative to the actions to perform */
        u32 attr[MVPP2_CLS_C2_ATTR_WORDS];
+       /* Entry validity */
+       u8 valid;
 };
 
 /* Classifier C2 engine entries */
-#define MVPP22_CLS_C2_RSS_ENTRY(port)  (port)
-#define MVPP22_CLS_C2_N_ENTRIES                MVPP2_MAX_PORTS
+#define MVPP22_CLS_C2_N_ENTRIES                256
 
-/* RSS flow entries in the flow table. We have 2 entries per port for RSS.
- *
- * The first performs a lookup using the C2 TCAM engine, to tag the
- * packet for software forwarding (needed for RSS), enable or disable RSS, and
- * assign the default rx queue.
- *
- * The second configures the hash generation, by specifying which fields of the
- * packet header are used to generate the hash, and specifies the relevant hash
- * engine to use.
- */
-#define MVPP22_RSS_FLOW_C2_OFFS                0
-#define MVPP22_RSS_FLOW_HASH_OFFS      1
-#define MVPP22_RSS_FLOW_SIZE           (MVPP22_RSS_FLOW_HASH_OFFS + 1)
+/* Number of per-port dedicated entries in the C2 TCAM */
+#define MVPP22_CLS_C2_PORT_RANGE       8
 
-#define MVPP22_RSS_FLOW_C2(port)       ((port) * MVPP22_RSS_FLOW_SIZE + \
-                                        MVPP22_RSS_FLOW_C2_OFFS)
-#define MVPP22_RSS_FLOW_HASH(port)     ((port) * MVPP22_RSS_FLOW_SIZE + \
-                                        MVPP22_RSS_FLOW_HASH_OFFS)
-#define MVPP22_RSS_FLOW_FIRST(port)    MVPP22_RSS_FLOW_C2(port)
+#define MVPP22_CLS_C2_PORT_FIRST(p)    (MVPP22_CLS_C2_N_ENTRIES - \
+                                       ((p) * MVPP22_CLS_C2_PORT_RANGE))
+#define MVPP22_CLS_C2_RSS_ENTRY(p)     (MVPP22_CLS_C2_PORT_FIRST(p) - 1)
 
 /* Packet flow ID */
 enum mvpp2_prs_flow {
@@ -162,6 +145,15 @@ enum mvpp2_prs_flow {
        MVPP2_FL_LAST,
 };
 
+enum mvpp2_cls_lu_type {
+       MVPP2_CLS_LU_ALL = 0,
+};
+
+/* LU Type defined for all engines, and specified in the flow table */
+#define MVPP2_CLS_LU_TYPE_MASK                 0x3f
+
+#define MVPP2_N_FLOWS          (MVPP2_FL_LAST - MVPP2_FL_START)
+
 struct mvpp2_cls_flow {
        /* The L2-L4 traffic flow type */
        int flow_type;
@@ -176,12 +168,37 @@ struct mvpp2_cls_flow {
        struct mvpp2_prs_result_info prs_ri;
 };
 
-#define MVPP2_N_FLOWS  52
+#define MVPP2_CLS_FLT_ENTRIES_PER_FLOW         (MVPP2_MAX_PORTS + 1)
+#define MVPP2_CLS_FLT_FIRST(id)                        (((id) - MVPP2_FL_START) * \
+                                                MVPP2_CLS_FLT_ENTRIES_PER_FLOW)
+#define MVPP2_CLS_FLT_C2_RSS_ENTRY(id)         (MVPP2_CLS_FLT_FIRST(id))
+#define MVPP2_CLS_FLT_HASH_ENTRY(port, id)     (MVPP2_CLS_FLT_C2_RSS_ENTRY(id) + (port) + 1)
+#define MVPP2_CLS_FLT_LAST(id)                 (MVPP2_CLS_FLT_FIRST(id) + \
+                                                MVPP2_CLS_FLT_ENTRIES_PER_FLOW - 1)
+
+/* Iterate on each classifier flow id. Sets 'i' to be the index of the first
+ * entry in the cls_flows table for each different flow_id.
+ * This relies on entries having the same flow_id in the cls_flows table being
+ * contiguous.
+ */
+#define for_each_cls_flow_id(i)                                                      \
+       for ((i) = 0; (i) < MVPP2_N_PRS_FLOWS; (i)++)                         \
+               if ((i) > 0 &&                                                \
+                   cls_flows[(i)].flow_id == cls_flows[(i) - 1].flow_id)       \
+                       continue;                                             \
+               else
+
+/* Iterate on each classifier flow that has a given flow_type. Sets 'i' to be
+ * the index of the first entry in the cls_flow table for each different flow_id
+ * that has the given flow_type. This allows to operate on all flows that
+ * matches a given ethtool flow type.
+ */
+#define for_each_cls_flow_id_with_type(i, type)                                      \
+       for_each_cls_flow_id((i))                                             \
+               if (cls_flows[(i)].flow_type != (type))                       \
+                       continue;                                             \
+               else
 
-#define MVPP2_ENTRIES_PER_FLOW                 (MVPP2_MAX_PORTS + 1)
-#define MVPP2_FLOW_C2_ENTRY(id)                        ((id) * MVPP2_ENTRIES_PER_FLOW)
-#define MVPP2_PORT_FLOW_HASH_ENTRY(port, id)   ((id) * MVPP2_ENTRIES_PER_FLOW + \
-                                               (port) + 1)
 struct mvpp2_cls_flow_entry {
        u32 index;
        u32 data[MVPP2_CLS_FLOWS_TBL_DATA_WORDS];
@@ -194,11 +211,10 @@ struct mvpp2_cls_lookup_entry {
 };
 
 void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table);
+void mvpp22_port_rss_init(struct mvpp2_port *port);
 
-void mvpp22_rss_port_init(struct mvpp2_port *port);
-
-void mvpp22_rss_enable(struct mvpp2_port *port);
-void mvpp22_rss_disable(struct mvpp2_port *port);
+void mvpp22_port_rss_enable(struct mvpp2_port *port);
+void mvpp22_port_rss_disable(struct mvpp2_port *port);
 
 int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info);
 int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info);
@@ -213,7 +229,7 @@ int mvpp2_cls_flow_eng_get(struct mvpp2_cls_flow_entry *fe);
 
 u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe);
 
-struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow);
+const struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow);
 
 u32 mvpp2_cls_flow_hits(struct mvpp2 *priv, int index);
 
index f9744a61e5dd6fc6282eddca38b2718dac899e12..0ee39ea47b6b63be44de58f5897eb44d054b1477 100644 (file)
@@ -18,22 +18,48 @@ struct mvpp2_dbgfs_prs_entry {
        struct mvpp2 *priv;
 };
 
+struct mvpp2_dbgfs_c2_entry {
+       int id;
+       struct mvpp2 *priv;
+};
+
 struct mvpp2_dbgfs_flow_entry {
        int flow;
        struct mvpp2 *priv;
 };
 
+struct mvpp2_dbgfs_flow_tbl_entry {
+       int id;
+       struct mvpp2 *priv;
+};
+
 struct mvpp2_dbgfs_port_flow_entry {
        struct mvpp2_port *port;
        struct mvpp2_dbgfs_flow_entry *dbg_fe;
 };
 
+struct mvpp2_dbgfs_entries {
+       /* Entries for Header Parser debug info */
+       struct mvpp2_dbgfs_prs_entry prs_entries[MVPP2_PRS_TCAM_SRAM_SIZE];
+
+       /* Entries for Classifier C2 engine debug info */
+       struct mvpp2_dbgfs_c2_entry c2_entries[MVPP22_CLS_C2_N_ENTRIES];
+
+       /* Entries for Classifier Flow Table debug info */
+       struct mvpp2_dbgfs_flow_tbl_entry flt_entries[MVPP2_CLS_FLOWS_TBL_SIZE];
+
+       /* Entries for Classifier flows debug info */
+       struct mvpp2_dbgfs_flow_entry flow_entries[MVPP2_N_PRS_FLOWS];
+
+       /* Entries for per-port flows debug info */
+       struct mvpp2_dbgfs_port_flow_entry port_flow_entries[MVPP2_MAX_PORTS];
+};
+
 static int mvpp2_dbgfs_flow_flt_hits_show(struct seq_file *s, void *unused)
 {
-       struct mvpp2_dbgfs_flow_entry *entry = s->private;
-       int id = MVPP2_FLOW_C2_ENTRY(entry->flow);
+       struct mvpp2_dbgfs_flow_tbl_entry *entry = s->private;
 
-       u32 hits = mvpp2_cls_flow_hits(entry->priv, id);
+       u32 hits = mvpp2_cls_flow_hits(entry->priv, entry->id);
 
        seq_printf(s, "%u\n", hits);
 
@@ -58,7 +84,7 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_dec_hits);
 static int mvpp2_dbgfs_flow_type_show(struct seq_file *s, void *unused)
 {
        struct mvpp2_dbgfs_flow_entry *entry = s->private;
-       struct mvpp2_cls_flow *f;
+       const struct mvpp2_cls_flow *f;
        const char *flow_name;
 
        f = mvpp2_cls_flow_get(entry->flow);
@@ -93,30 +119,12 @@ static int mvpp2_dbgfs_flow_type_show(struct seq_file *s, void *unused)
        return 0;
 }
 
-static int mvpp2_dbgfs_flow_type_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, mvpp2_dbgfs_flow_type_show, inode->i_private);
-}
-
-static int mvpp2_dbgfs_flow_type_release(struct inode *inode, struct file *file)
-{
-       struct seq_file *seq = file->private_data;
-       struct mvpp2_dbgfs_flow_entry *flow_entry = seq->private;
-
-       kfree(flow_entry);
-       return single_release(inode, file);
-}
-
-static const struct file_operations mvpp2_dbgfs_flow_type_fops = {
-       .open = mvpp2_dbgfs_flow_type_open,
-       .read = seq_read,
-       .release = mvpp2_dbgfs_flow_type_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_type);
 
 static int mvpp2_dbgfs_flow_id_show(struct seq_file *s, void *unused)
 {
-       struct mvpp2_dbgfs_flow_entry *entry = s->private;
-       struct mvpp2_cls_flow *f;
+       const struct mvpp2_dbgfs_flow_entry *entry = s->private;
+       const struct mvpp2_cls_flow *f;
 
        f = mvpp2_cls_flow_get(entry->flow);
        if (!f)
@@ -134,7 +142,7 @@ static int mvpp2_dbgfs_port_flow_hash_opt_show(struct seq_file *s, void *unused)
        struct mvpp2_dbgfs_port_flow_entry *entry = s->private;
        struct mvpp2_port *port = entry->port;
        struct mvpp2_cls_flow_entry fe;
-       struct mvpp2_cls_flow *f;
+       const struct mvpp2_cls_flow *f;
        int flow_index;
        u16 hash_opts;
 
@@ -142,7 +150,7 @@ static int mvpp2_dbgfs_port_flow_hash_opt_show(struct seq_file *s, void *unused)
        if (!f)
                return -EINVAL;
 
-       flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(entry->port->id, f->flow_id);
+       flow_index = MVPP2_CLS_FLT_HASH_ENTRY(entry->port->id, f->flow_id);
 
        mvpp2_cls_flow_read(port->priv, flow_index, &fe);
 
@@ -153,42 +161,21 @@ static int mvpp2_dbgfs_port_flow_hash_opt_show(struct seq_file *s, void *unused)
        return 0;
 }
 
-static int mvpp2_dbgfs_port_flow_hash_opt_open(struct inode *inode,
-                                              struct file *file)
-{
-       return single_open(file, mvpp2_dbgfs_port_flow_hash_opt_show,
-                          inode->i_private);
-}
-
-static int mvpp2_dbgfs_port_flow_hash_opt_release(struct inode *inode,
-                                                 struct file *file)
-{
-       struct seq_file *seq = file->private_data;
-       struct mvpp2_dbgfs_port_flow_entry *flow_entry = seq->private;
-
-       kfree(flow_entry);
-       return single_release(inode, file);
-}
-
-static const struct file_operations mvpp2_dbgfs_port_flow_hash_opt_fops = {
-       .open = mvpp2_dbgfs_port_flow_hash_opt_open,
-       .read = seq_read,
-       .release = mvpp2_dbgfs_port_flow_hash_opt_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_port_flow_hash_opt);
 
 static int mvpp2_dbgfs_port_flow_engine_show(struct seq_file *s, void *unused)
 {
        struct mvpp2_dbgfs_port_flow_entry *entry = s->private;
        struct mvpp2_port *port = entry->port;
        struct mvpp2_cls_flow_entry fe;
-       struct mvpp2_cls_flow *f;
+       const struct mvpp2_cls_flow *f;
        int flow_index, engine;
 
        f = mvpp2_cls_flow_get(entry->dbg_fe->flow);
        if (!f)
                return -EINVAL;
 
-       flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(entry->port->id, f->flow_id);
+       flow_index = MVPP2_CLS_FLT_HASH_ENTRY(entry->port->id, f->flow_id);
 
        mvpp2_cls_flow_read(port->priv, flow_index, &fe);
 
@@ -203,11 +190,10 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_port_flow_engine);
 
 static int mvpp2_dbgfs_flow_c2_hits_show(struct seq_file *s, void *unused)
 {
-       struct mvpp2_port *port = s->private;
+       struct mvpp2_dbgfs_c2_entry *entry = s->private;
        u32 hits;
 
-       hits = mvpp2_cls_c2_hit_count(port->priv,
-                                     MVPP22_CLS_C2_RSS_ENTRY(port->id));
+       hits = mvpp2_cls_c2_hit_count(entry->priv, entry->id);
 
        seq_printf(s, "%u\n", hits);
 
@@ -218,11 +204,11 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_c2_hits);
 
 static int mvpp2_dbgfs_flow_c2_rxq_show(struct seq_file *s, void *unused)
 {
-       struct mvpp2_port *port = s->private;
+       struct mvpp2_dbgfs_c2_entry *entry = s->private;
        struct mvpp2_cls_c2_entry c2;
        u8 qh, ql;
 
-       mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+       mvpp2_cls_c2_read(entry->priv, entry->id, &c2);
 
        qh = (c2.attr[0] >> MVPP22_CLS_C2_ATTR0_QHIGH_OFFS) &
             MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
@@ -239,11 +225,11 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_c2_rxq);
 
 static int mvpp2_dbgfs_flow_c2_enable_show(struct seq_file *s, void *unused)
 {
-       struct mvpp2_port *port = s->private;
+       struct mvpp2_dbgfs_c2_entry *entry = s->private;
        struct mvpp2_cls_c2_entry c2;
        int enabled;
 
-       mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+       mvpp2_cls_c2_read(entry->priv, entry->id, &c2);
 
        enabled = !!(c2.attr[2] & MVPP22_CLS_C2_ATTR2_RSS_EN);
 
@@ -456,25 +442,7 @@ static int mvpp2_dbgfs_prs_valid_show(struct seq_file *s, void *unused)
        return 0;
 }
 
-static int mvpp2_dbgfs_prs_valid_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, mvpp2_dbgfs_prs_valid_show, inode->i_private);
-}
-
-static int mvpp2_dbgfs_prs_valid_release(struct inode *inode, struct file *file)
-{
-       struct seq_file *seq = file->private_data;
-       struct mvpp2_dbgfs_prs_entry *entry = seq->private;
-
-       kfree(entry);
-       return single_release(inode, file);
-}
-
-static const struct file_operations mvpp2_dbgfs_prs_valid_fops = {
-       .open = mvpp2_dbgfs_prs_valid_open,
-       .read = seq_read,
-       .release = mvpp2_dbgfs_prs_valid_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_prs_valid);
 
 static int mvpp2_dbgfs_flow_port_init(struct dentry *parent,
                                      struct mvpp2_port *port,
@@ -487,10 +455,7 @@ static int mvpp2_dbgfs_flow_port_init(struct dentry *parent,
        if (IS_ERR(port_dir))
                return PTR_ERR(port_dir);
 
-       /* This will be freed by 'hash_opts' release op */
-       port_entry = kmalloc(sizeof(*port_entry), GFP_KERNEL);
-       if (!port_entry)
-               return -ENOMEM;
+       port_entry = &port->priv->dbgfs_entries->port_flow_entries[port->id];
 
        port_entry->port = port;
        port_entry->dbg_fe = entry;
@@ -518,17 +483,11 @@ static int mvpp2_dbgfs_flow_entry_init(struct dentry *parent,
        if (!flow_entry_dir)
                return -ENOMEM;
 
-       /* This will be freed by 'type' release op */
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-       if (!entry)
-               return -ENOMEM;
+       entry = &priv->dbgfs_entries->flow_entries[flow];
 
        entry->flow = flow;
        entry->priv = priv;
 
-       debugfs_create_file("flow_hits", 0444, flow_entry_dir, entry,
-                           &mvpp2_dbgfs_flow_flt_hits_fops);
-
        debugfs_create_file("dec_hits", 0444, flow_entry_dir, entry,
                            &mvpp2_dbgfs_flow_dec_hits_fops);
 
@@ -545,6 +504,7 @@ static int mvpp2_dbgfs_flow_entry_init(struct dentry *parent,
                if (ret)
                        return ret;
        }
+
        return 0;
 }
 
@@ -557,7 +517,7 @@ static int mvpp2_dbgfs_flow_init(struct dentry *parent, struct mvpp2 *priv)
        if (!flow_dir)
                return -ENOMEM;
 
-       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+       for (i = 0; i < MVPP2_N_PRS_FLOWS; i++) {
                ret = mvpp2_dbgfs_flow_entry_init(flow_dir, priv, i);
                if (ret)
                        return ret;
@@ -582,10 +542,7 @@ static int mvpp2_dbgfs_prs_entry_init(struct dentry *parent,
        if (!prs_entry_dir)
                return -ENOMEM;
 
-       /* The 'valid' entry's ops will free that */
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-       if (!entry)
-               return -ENOMEM;
+       entry = &priv->dbgfs_entries->prs_entries[tid];
 
        entry->tid = tid;
        entry->priv = priv;
@@ -630,6 +587,98 @@ static int mvpp2_dbgfs_prs_init(struct dentry *parent, struct mvpp2 *priv)
        return 0;
 }
 
+static int mvpp2_dbgfs_c2_entry_init(struct dentry *parent,
+                                    struct mvpp2 *priv, int id)
+{
+       struct mvpp2_dbgfs_c2_entry *entry;
+       struct dentry *c2_entry_dir;
+       char c2_entry_name[10];
+
+       if (id >= MVPP22_CLS_C2_N_ENTRIES)
+               return -EINVAL;
+
+       sprintf(c2_entry_name, "%03d", id);
+
+       c2_entry_dir = debugfs_create_dir(c2_entry_name, parent);
+       if (!c2_entry_dir)
+               return -ENOMEM;
+
+       entry = &priv->dbgfs_entries->c2_entries[id];
+
+       entry->id = id;
+       entry->priv = priv;
+
+       debugfs_create_file("hits", 0444, c2_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_c2_hits_fops);
+
+       debugfs_create_file("default_rxq", 0444, c2_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_c2_rxq_fops);
+
+       debugfs_create_file("rss_enable", 0444, c2_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_c2_enable_fops);
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_flow_tbl_entry_init(struct dentry *parent,
+                                          struct mvpp2 *priv, int id)
+{
+       struct mvpp2_dbgfs_flow_tbl_entry *entry;
+       struct dentry *flow_tbl_entry_dir;
+       char flow_tbl_entry_name[10];
+
+       if (id >= MVPP2_CLS_FLOWS_TBL_SIZE)
+               return -EINVAL;
+
+       sprintf(flow_tbl_entry_name, "%03d", id);
+
+       flow_tbl_entry_dir = debugfs_create_dir(flow_tbl_entry_name, parent);
+       if (!flow_tbl_entry_dir)
+               return -ENOMEM;
+
+       entry = &priv->dbgfs_entries->flt_entries[id];
+
+       entry->id = id;
+       entry->priv = priv;
+
+       debugfs_create_file("hits", 0444, flow_tbl_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_flt_hits_fops);
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_cls_init(struct dentry *parent, struct mvpp2 *priv)
+{
+       struct dentry *cls_dir, *c2_dir, *flow_tbl_dir;
+       int i, ret;
+
+       cls_dir = debugfs_create_dir("classifier", parent);
+       if (!cls_dir)
+               return -ENOMEM;
+
+       c2_dir = debugfs_create_dir("c2", cls_dir);
+       if (!c2_dir)
+               return -ENOMEM;
+
+       for (i = 0; i < MVPP22_CLS_C2_N_ENTRIES; i++) {
+               ret = mvpp2_dbgfs_c2_entry_init(c2_dir, priv, i);
+               if (ret)
+                       return ret;
+       }
+
+       flow_tbl_dir = debugfs_create_dir("flow_table", cls_dir);
+       if (!flow_tbl_dir)
+               return -ENOMEM;
+
+       for (i = 0; i < MVPP2_CLS_FLOWS_TBL_SIZE; i++) {
+               ret = mvpp2_dbgfs_flow_tbl_entry_init(flow_tbl_dir, priv, i);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static int mvpp2_dbgfs_port_init(struct dentry *parent,
                                 struct mvpp2_port *port)
 {
@@ -648,21 +697,14 @@ static int mvpp2_dbgfs_port_init(struct dentry *parent,
        debugfs_create_file("vid_filter", 0444, port_dir, port,
                            &mvpp2_dbgfs_port_vid_fops);
 
-       debugfs_create_file("c2_hits", 0444, port_dir, port,
-                           &mvpp2_dbgfs_flow_c2_hits_fops);
-
-       debugfs_create_file("default_rxq", 0444, port_dir, port,
-                           &mvpp2_dbgfs_flow_c2_rxq_fops);
-
-       debugfs_create_file("rss_enable", 0444, port_dir, port,
-                           &mvpp2_dbgfs_flow_c2_enable_fops);
-
        return 0;
 }
 
 void mvpp2_dbgfs_cleanup(struct mvpp2 *priv)
 {
        debugfs_remove_recursive(priv->dbgfs_dir);
+
+       kfree(priv->dbgfs_entries);
 }
 
 void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name)
@@ -682,11 +724,18 @@ void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name)
                return;
 
        priv->dbgfs_dir = mvpp2_dir;
+       priv->dbgfs_entries = kzalloc(sizeof(*priv->dbgfs_entries), GFP_KERNEL);
+       if (!priv->dbgfs_entries)
+               goto err;
 
        ret = mvpp2_dbgfs_prs_init(mvpp2_dir, priv);
        if (ret)
                goto err;
 
+       ret = mvpp2_dbgfs_cls_init(mvpp2_dir, priv);
+       if (ret)
+               goto err;
+
        for (i = 0; i < priv->port_count; i++) {
                ret = mvpp2_dbgfs_port_init(mvpp2_dir, priv->port_list[i]);
                if (ret)
index 25fbed2b8d94674d43133f327740048773827115..f128ea22b33958775d32ad176f5f21cad950889f 100644 (file)
@@ -3741,9 +3741,9 @@ static int mvpp2_set_features(struct net_device *dev,
 
        if (changed & NETIF_F_RXHASH) {
                if (features & NETIF_F_RXHASH)
-                       mvpp22_rss_enable(port);
+                       mvpp22_port_rss_enable(port);
                else
-                       mvpp22_rss_disable(port);
+                       mvpp22_port_rss_disable(port);
        }
 
        return 0;
@@ -4301,7 +4301,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
        mvpp2_cls_port_config(port);
 
        if (mvpp22_rss_is_supported())
-               mvpp22_rss_port_init(port);
+               mvpp22_port_rss_init(port);
 
        /* Provide an initial Rx packet size */
        port->pkt_size = MVPP2_RX_PKT_SIZE(port->dev->mtu);
@@ -4848,6 +4848,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        struct mvpp2_port *port;
        struct mvpp2_port_pcpu *port_pcpu;
        struct device_node *port_node = to_of_node(port_fwnode);
+       netdev_features_t features;
        struct net_device *dev;
        struct resource *res;
        struct phylink *phylink;
@@ -4856,7 +4857,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        unsigned long flags = 0;
        bool has_tx_irqs;
        u32 id;
-       int features;
        int phy_mode;
        int err, i;
 
index f02367faa58dbe44171454de6af50777ddc0ebf3..f628971988449a94898d8da72ffa3743d7a3d694 100644 (file)
@@ -205,6 +205,7 @@ enum bpf_return_type {
        RET_PTR_TO_MAP_VALUE_OR_NULL,   /* returns a pointer to map elem value or NULL */
        RET_PTR_TO_SOCKET_OR_NULL,      /* returns a pointer to a socket or NULL */
        RET_PTR_TO_TCP_SOCK_OR_NULL,    /* returns a pointer to a tcp_sock or NULL */
+       RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
index 929c8e537a14a517c0a3c7ca5b6b15353d622c30..837024512bafd92c3773282ac5362d826fc93502 100644 (file)
@@ -1478,13 +1478,27 @@ union bpf_attr {
  *             Grow or shrink the room for data in the packet associated to
  *             *skb* by *len_diff*, and according to the selected *mode*.
  *
- *             There is a single supported mode at this time:
+ *             There are two supported modes at this time:
+ *
+ *             * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ *               (room space is added or removed below the layer 2 header).
  *
  *             * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  *               (room space is added or removed below the layer 3 header).
  *
- *             All values for *flags* are reserved for future usage, and must
- *             be left at zero.
+ *             The following flags are supported at this time:
+ *
+ *             * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ *               Adjusting mss in this way is not allowed for datagrams.
+ *
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
+ *               Any new space is reserved to hold a tunnel header.
+ *               Configure skb offsets and other fields accordingly.
+ *
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
+ *               Use with ENCAP_L3 flags to further specify the tunnel type.
  *
  *             A call to this helper is susceptible to change the underlaying
  *             packet buffer. Therefore, at load time, all checks on pointers
@@ -2431,6 +2445,38 @@ union bpf_attr {
  *     Return
  *             A **struct bpf_sock** pointer on success, or **NULL** in
  *             case of failure.
+ *
+ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ *     Description
+ *             Look for TCP socket matching *tuple*, optionally in a child
+ *             network namespace *netns*. The return value must be checked,
+ *             and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ *             This function is identical to bpf_sk_lookup_tcp, except that it
+ *             also returns timewait or request sockets. Use bpf_sk_fullsock
+ *             or bpf_tcp_socket to access the full structure.
+ *
+ *             This helper is available only if the kernel was compiled with
+ *             **CONFIG_NET** configuration option.
+ *     Return
+ *             Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ *             For sockets with reuseport option, the **struct bpf_sock**
+ *             result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ *     Description
+ *             Check whether iph and th contain a valid SYN cookie ACK for
+ *             the listening socket in sk.
+ *
+ *             iph points to the start of the IPv4 or IPv6 header, while
+ *             iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
+ *
+ *             th points to the start of the TCP header, while th_len contains
+ *             sizeof(struct tcphdr).
+ *
+ *     Return
+ *             0 if iph and th are a valid SYN cookie ACK, or a negative error
+ *             otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -2531,7 +2577,9 @@ union bpf_attr {
        FN(sk_fullsock),                \
        FN(tcp_sock),                   \
        FN(skb_ecn_set_ce),             \
-       FN(get_listener_sock),
+       FN(get_listener_sock),          \
+       FN(skc_lookup_tcp),             \
+       FN(tcp_check_syncookie),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2590,9 +2638,18 @@ enum bpf_func_id {
 /* Current network namespace */
 #define BPF_F_CURRENT_NETNS            (-1L)
 
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO       (1ULL << 0)
+
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4   (1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6   (1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE    (1ULL << 3)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP    (1ULL << 4)
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
        BPF_ADJ_ROOM_NET,
+       BPF_ADJ_ROOM_MAC,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
index 86f9cd5d1c4e1a747e2173152c3e6e9adfef9be8..dffeec3706ce60a1c9779c00faedebe300da009f 100644 (file)
@@ -369,7 +369,8 @@ static bool is_release_function(enum bpf_func_id func_id)
 static bool is_acquire_function(enum bpf_func_id func_id)
 {
        return func_id == BPF_FUNC_sk_lookup_tcp ||
-               func_id == BPF_FUNC_sk_lookup_udp;
+               func_id == BPF_FUNC_sk_lookup_udp ||
+               func_id == BPF_FUNC_skc_lookup_tcp;
 }
 
 static bool is_ptr_cast_function(enum bpf_func_id func_id)
@@ -3147,19 +3148,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
        } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
                mark_reg_known_zero(env, regs, BPF_REG_0);
                regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
-               if (is_acquire_function(func_id)) {
-                       int id = acquire_reference_state(env, insn_idx);
-
-                       if (id < 0)
-                               return id;
-                       /* For mark_ptr_or_null_reg() */
-                       regs[BPF_REG_0].id = id;
-                       /* For release_reference() */
-                       regs[BPF_REG_0].ref_obj_id = id;
-               } else {
-                       /* For mark_ptr_or_null_reg() */
-                       regs[BPF_REG_0].id = ++env->id_gen;
-               }
+               regs[BPF_REG_0].id = ++env->id_gen;
+       } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
+               mark_reg_known_zero(env, regs, BPF_REG_0);
+               regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
+               regs[BPF_REG_0].id = ++env->id_gen;
        } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
                mark_reg_known_zero(env, regs, BPF_REG_0);
                regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
@@ -3170,9 +3163,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                return -EINVAL;
        }
 
-       if (is_ptr_cast_function(func_id))
+       if (is_ptr_cast_function(func_id)) {
                /* For release_reference() */
                regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+       } else if (is_acquire_function(func_id)) {
+               int id = acquire_reference_state(env, insn_idx);
+
+               if (id < 0)
+                       return id;
+               /* For mark_ptr_or_null_reg() */
+               regs[BPF_REG_0].id = id;
+               /* For release_reference() */
+               regs[BPF_REG_0].ref_obj_id = id;
+       }
 
        do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
 
index 647c63a7b25b6745e75a812b65a4052f3c72b690..22eb2edf55734f2f4db3e72d636aad90c1de4000 100644 (file)
@@ -2963,42 +2963,113 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
        }
 }
 
-static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK   (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
+                                        BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+
+#define BPF_F_ADJ_ROOM_MASK            (BPF_F_ADJ_ROOM_FIXED_GSO | \
+                                        BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
+                                        BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
+                                        BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+
+static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
+                           u64 flags)
 {
-       u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+       bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
+       u16 mac_len = 0, inner_net = 0, inner_trans = 0;
+       unsigned int gso_type = SKB_GSO_DODGY;
        int ret;
 
-       if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
-               return -ENOTSUPP;
+       if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+               /* udp gso_size delineates datagrams, only allow if fixed */
+               if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+                   !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+                       return -ENOTSUPP;
+       }
 
-       ret = skb_cow(skb, len_diff);
+       ret = skb_cow_head(skb, len_diff);
        if (unlikely(ret < 0))
                return ret;
 
+       if (encap) {
+               if (skb->protocol != htons(ETH_P_IP) &&
+                   skb->protocol != htons(ETH_P_IPV6))
+                       return -ENOTSUPP;
+
+               if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
+                   flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+                       return -EINVAL;
+
+               if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
+                   flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+                       return -EINVAL;
+
+               if (skb->encapsulation)
+                       return -EALREADY;
+
+               mac_len = skb->network_header - skb->mac_header;
+               inner_net = skb->network_header;
+               inner_trans = skb->transport_header;
+       }
+
        ret = bpf_skb_net_hdr_push(skb, off, len_diff);
        if (unlikely(ret < 0))
                return ret;
 
+       if (encap) {
+               /* inner mac == inner_net on l3 encap */
+               skb->inner_mac_header = inner_net;
+               skb->inner_network_header = inner_net;
+               skb->inner_transport_header = inner_trans;
+               skb_set_inner_protocol(skb, skb->protocol);
+
+               skb->encapsulation = 1;
+               skb_set_network_header(skb, mac_len);
+
+               if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+                       gso_type |= SKB_GSO_UDP_TUNNEL;
+               else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
+                       gso_type |= SKB_GSO_GRE;
+               else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+                       gso_type |= SKB_GSO_IPXIP6;
+               else
+                       gso_type |= SKB_GSO_IPXIP4;
+
+               if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
+                   flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
+                       int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
+                                       sizeof(struct ipv6hdr) :
+                                       sizeof(struct iphdr);
+
+                       skb_set_transport_header(skb, mac_len + nh_len);
+               }
+       }
+
        if (skb_is_gso(skb)) {
                struct skb_shared_info *shinfo = skb_shinfo(skb);
 
                /* Due to header grow, MSS needs to be downgraded. */
-               skb_decrease_gso_size(shinfo, len_diff);
+               if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+                       skb_decrease_gso_size(shinfo, len_diff);
+
                /* Header must be checked, and gso_segs recomputed. */
-               shinfo->gso_type |= SKB_GSO_DODGY;
+               shinfo->gso_type |= gso_type;
                shinfo->gso_segs = 0;
        }
 
        return 0;
 }
 
-static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
+static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
+                             u64 flags)
 {
-       u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
        int ret;
 
-       if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
-               return -ENOTSUPP;
+       if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+               /* udp gso_size delineates datagrams, only allow if fixed */
+               if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+                   !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+                       return -ENOTSUPP;
+       }
 
        ret = skb_unclone(skb, GFP_ATOMIC);
        if (unlikely(ret < 0))
@@ -3012,7 +3083,9 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
                struct skb_shared_info *shinfo = skb_shinfo(skb);
 
                /* Due to header shrink, MSS can be upgraded. */
-               skb_increase_gso_size(shinfo, len_diff);
+               if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+                       skb_increase_gso_size(shinfo, len_diff);
+
                /* Header must be checked, and gso_segs recomputed. */
                shinfo->gso_type |= SKB_GSO_DODGY;
                shinfo->gso_segs = 0;
@@ -3027,49 +3100,50 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb)
                          SKB_MAX_ALLOC;
 }
 
-static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
+BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+          u32, mode, u64, flags)
 {
-       bool trans_same = skb->transport_header == skb->network_header;
        u32 len_cur, len_diff_abs = abs(len_diff);
        u32 len_min = bpf_skb_net_base_len(skb);
        u32 len_max = __bpf_skb_max_len(skb);
        __be16 proto = skb->protocol;
        bool shrink = len_diff < 0;
+       u32 off;
        int ret;
 
+       if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
+               return -EINVAL;
        if (unlikely(len_diff_abs > 0xfffU))
                return -EFAULT;
        if (unlikely(proto != htons(ETH_P_IP) &&
                     proto != htons(ETH_P_IPV6)))
                return -ENOTSUPP;
 
+       off = skb_mac_header_len(skb);
+       switch (mode) {
+       case BPF_ADJ_ROOM_NET:
+               off += bpf_skb_net_base_len(skb);
+               break;
+       case BPF_ADJ_ROOM_MAC:
+               break;
+       default:
+               return -ENOTSUPP;
+       }
+
        len_cur = skb->len - skb_network_offset(skb);
-       if (skb_transport_header_was_set(skb) && !trans_same)
-               len_cur = skb_network_header_len(skb);
        if ((shrink && (len_diff_abs >= len_cur ||
                        len_cur - len_diff_abs < len_min)) ||
            (!shrink && (skb->len + len_diff_abs > len_max &&
                         !skb_is_gso(skb))))
                return -ENOTSUPP;
 
-       ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
-                      bpf_skb_net_grow(skb, len_diff_abs);
+       ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
+                      bpf_skb_net_grow(skb, off, len_diff_abs, flags);
 
        bpf_compute_data_pointers(skb);
        return ret;
 }
 
-BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
-          u32, mode, u64, flags)
-{
-       if (unlikely(flags))
-               return -EINVAL;
-       if (likely(mode == BPF_ADJ_ROOM_NET))
-               return bpf_skb_adjust_net(skb, len_diff);
-
-       return -ENOTSUPP;
-}
-
 static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
        .func           = bpf_skb_adjust_room,
        .gpl_only       = false,
@@ -5156,15 +5230,15 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
        return sk;
 }
 
-/* bpf_sk_lookup performs the core lookup for different types of sockets,
+/* bpf_skc_lookup performs the core lookup for different types of sockets,
  * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
  * Returns the socket as an 'unsigned long' to simplify the casting in the
  * callers to satisfy BPF_CALL declarations.
  */
-static unsigned long
-__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
-               struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
-               u64 flags)
+static struct sock *
+__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+                struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+                u64 flags)
 {
        struct sock *sk = NULL;
        u8 family = AF_UNSPEC;
@@ -5192,15 +5266,27 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
                put_net(net);
        }
 
+out:
+       return sk;
+}
+
+static struct sock *
+__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+               struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+               u64 flags)
+{
+       struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
+                                          ifindex, proto, netns_id, flags);
+
        if (sk)
                sk = sk_to_full_sk(sk);
-out:
-       return (unsigned long) sk;
+
+       return sk;
 }
 
-static unsigned long
-bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
-             u8 proto, u64 netns_id, u64 flags)
+static struct sock *
+bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+              u8 proto, u64 netns_id, u64 flags)
 {
        struct net *caller_net;
        int ifindex;
@@ -5213,14 +5299,47 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
                ifindex = 0;
        }
 
-       return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex,
-                             proto, netns_id, flags);
+       return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
+                               netns_id, flags);
+}
+
+static struct sock *
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+             u8 proto, u64 netns_id, u64 flags)
+{
+       struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
+                                        flags);
+
+       if (sk)
+               sk = sk_to_full_sk(sk);
+
+       return sk;
+}
+
+BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
+          struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+       return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
+                                            netns_id, flags);
 }
 
+static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
+       .func           = bpf_skc_lookup_tcp,
+       .gpl_only       = false,
+       .pkt_access     = true,
+       .ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+       .arg4_type      = ARG_ANYTHING,
+       .arg5_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
 {
-       return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags);
+       return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
+                                           netns_id, flags);
 }
 
 static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
@@ -5238,7 +5357,8 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
 BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
 {
-       return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags);
+       return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
+                                           netns_id, flags);
 }
 
 static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
@@ -5273,8 +5393,9 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
        struct net *caller_net = dev_net(ctx->rxq->dev);
        int ifindex = ctx->rxq->dev->ifindex;
 
-       return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
-                             IPPROTO_UDP, netns_id, flags);
+       return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+                                             ifindex, IPPROTO_UDP, netns_id,
+                                             flags);
 }
 
 static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
@@ -5289,14 +5410,38 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
        .arg5_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
+          struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+       struct net *caller_net = dev_net(ctx->rxq->dev);
+       int ifindex = ctx->rxq->dev->ifindex;
+
+       return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
+                                              ifindex, IPPROTO_TCP, netns_id,
+                                              flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
+       .func           = bpf_xdp_skc_lookup_tcp,
+       .gpl_only       = false,
+       .pkt_access     = true,
+       .ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+       .arg4_type      = ARG_ANYTHING,
+       .arg5_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
           struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
 {
        struct net *caller_net = dev_net(ctx->rxq->dev);
        int ifindex = ctx->rxq->dev->ifindex;
 
-       return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
-                             IPPROTO_TCP, netns_id, flags);
+       return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+                                             ifindex, IPPROTO_TCP, netns_id,
+                                             flags);
 }
 
 static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
@@ -5311,11 +5456,31 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
        .arg5_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+          struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+       return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
+                                              sock_net(ctx->sk), 0,
+                                              IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
+       .func           = bpf_sock_addr_skc_lookup_tcp,
+       .gpl_only       = false,
+       .ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+       .arg4_type      = ARG_ANYTHING,
+       .arg5_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
 {
-       return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
-                              IPPROTO_TCP, netns_id, flags);
+       return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+                                             sock_net(ctx->sk), 0, IPPROTO_TCP,
+                                             netns_id, flags);
 }
 
 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
@@ -5332,8 +5497,9 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
 BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
 {
-       return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
-                              IPPROTO_UDP, netns_id, flags);
+       return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+                                             sock_net(ctx->sk), 0, IPPROTO_UDP,
+                                             netns_id, flags);
 }
 
 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
@@ -5461,6 +5627,74 @@ static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
 };
+
+BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+          struct tcphdr *, th, u32, th_len)
+{
+#ifdef CONFIG_SYN_COOKIES
+       u32 cookie;
+       int ret;
+
+       if (unlikely(th_len < sizeof(*th)))
+               return -EINVAL;
+
+       /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
+       if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+               return -EINVAL;
+
+       if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+               return -EINVAL;
+
+       if (!th->ack || th->rst || th->syn)
+               return -ENOENT;
+
+       if (tcp_synq_no_recent_overflow(sk))
+               return -ENOENT;
+
+       cookie = ntohl(th->ack_seq) - 1;
+
+       switch (sk->sk_family) {
+       case AF_INET:
+               if (unlikely(iph_len < sizeof(struct iphdr)))
+                       return -EINVAL;
+
+               ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
+               break;
+
+#if IS_BUILTIN(CONFIG_IPV6)
+       case AF_INET6:
+               if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+                       return -EINVAL;
+
+               ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
+               break;
+#endif /* CONFIG_IPV6 */
+
+       default:
+               return -EPROTONOSUPPORT;
+       }
+
+       if (ret > 0)
+               return 0;
+
+       return -ENOENT;
+#else
+       return -ENOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
+       .func           = bpf_tcp_check_syncookie,
+       .gpl_only       = true,
+       .pkt_access     = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+       .arg4_type      = ARG_PTR_TO_MEM,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5586,6 +5820,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_sock_addr_sk_lookup_udp_proto;
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
+       case BPF_FUNC_skc_lookup_tcp:
+               return &bpf_sock_addr_skc_lookup_tcp_proto;
 #endif /* CONFIG_INET */
        default:
                return bpf_base_func_proto(func_id);
@@ -5719,6 +5955,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_tcp_sock_proto;
        case BPF_FUNC_get_listener_sock:
                return &bpf_get_listener_sock_proto;
+       case BPF_FUNC_skc_lookup_tcp:
+               return &bpf_skc_lookup_tcp_proto;
+       case BPF_FUNC_tcp_check_syncookie:
+               return &bpf_tcp_check_syncookie_proto;
+       case BPF_FUNC_skb_ecn_set_ce:
+               return &bpf_skb_ecn_set_ce_proto;
 #endif
        default:
                return bpf_base_func_proto(func_id);
@@ -5754,6 +5996,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_xdp_sk_lookup_tcp_proto;
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
+       case BPF_FUNC_skc_lookup_tcp:
+               return &bpf_xdp_skc_lookup_tcp_proto;
+       case BPF_FUNC_tcp_check_syncookie:
+               return &bpf_tcp_check_syncookie_proto;
 #endif
        default:
                return bpf_base_func_proto(func_id);
@@ -5846,6 +6092,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_sk_lookup_udp_proto;
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
+       case BPF_FUNC_skc_lookup_tcp:
+               return &bpf_skc_lookup_tcp_proto;
 #endif
        default:
                return bpf_base_func_proto(func_id);
index dbb817dbacfcaefd42daa9d55ca04addb05fc83a..59e40998e2493ba75cc72d112404ddfbd1df5655 100644 (file)
@@ -44,5 +44,6 @@ xdp_redirect_cpu
 xdp_redirect_map
 xdp_router_ipv4
 xdp_rxq_info
+xdp_sample_pkts
 xdp_tx_iptunnel
 xdpsock
index 929c8e537a14a517c0a3c7ca5b6b15353d622c30..837024512bafd92c3773282ac5362d826fc93502 100644 (file)
@@ -1478,13 +1478,27 @@ union bpf_attr {
  *             Grow or shrink the room for data in the packet associated to
  *             *skb* by *len_diff*, and according to the selected *mode*.
  *
- *             There is a single supported mode at this time:
+ *             There are two supported modes at this time:
+ *
+ *             * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ *               (room space is added or removed below the layer 2 header).
  *
  *             * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  *               (room space is added or removed below the layer 3 header).
  *
- *             All values for *flags* are reserved for future usage, and must
- *             be left at zero.
+ *             The following flags are supported at this time:
+ *
+ *             * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ *               Adjusting mss in this way is not allowed for datagrams.
+ *
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
+ *               Any new space is reserved to hold a tunnel header.
+ *               Configure skb offsets and other fields accordingly.
+ *
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
+ *               Use with ENCAP_L3 flags to further specify the tunnel type.
  *
  *             A call to this helper is susceptible to change the underlaying
  *             packet buffer. Therefore, at load time, all checks on pointers
@@ -2431,6 +2445,38 @@ union bpf_attr {
  *     Return
  *             A **struct bpf_sock** pointer on success, or **NULL** in
  *             case of failure.
+ *
+ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ *     Description
+ *             Look for TCP socket matching *tuple*, optionally in a child
+ *             network namespace *netns*. The return value must be checked,
+ *             and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ *             This function is identical to bpf_sk_lookup_tcp, except that it
+ *             also returns timewait or request sockets. Use bpf_sk_fullsock
+ *             or bpf_tcp_socket to access the full structure.
+ *
+ *             This helper is available only if the kernel was compiled with
+ *             **CONFIG_NET** configuration option.
+ *     Return
+ *             Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ *             For sockets with reuseport option, the **struct bpf_sock**
+ *             result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ *     Description
+ *             Check whether iph and th contain a valid SYN cookie ACK for
+ *             the listening socket in sk.
+ *
+ *             iph points to the start of the IPv4 or IPv6 header, while
+ *             iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
+ *
+ *             th points to the start of the TCP header, while th_len contains
+ *             sizeof(struct tcphdr).
+ *
+ *     Return
+ *             0 if iph and th are a valid SYN cookie ACK, or a negative error
+ *             otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -2531,7 +2577,9 @@ union bpf_attr {
        FN(sk_fullsock),                \
        FN(tcp_sock),                   \
        FN(skb_ecn_set_ce),             \
-       FN(get_listener_sock),
+       FN(get_listener_sock),          \
+       FN(skc_lookup_tcp),             \
+       FN(tcp_check_syncookie),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2590,9 +2638,18 @@ enum bpf_func_id {
 /* Current network namespace */
 #define BPF_F_CURRENT_NETNS            (-1L)
 
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO       (1ULL << 0)
+
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4   (1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6   (1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE    (1ULL << 3)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP    (1ULL << 4)
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
        BPF_ADJ_ROOM_NET,
+       BPF_ADJ_ROOM_MAC,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
index 3b74d23fffabe46e5fac0f78b4c28be5dce6d13c..41e8a689aa77744e91282bd14a98c9d16a20dc2a 100644 (file)
@@ -30,4 +30,5 @@ test_netcnt
 test_section_names
 test_tcpnotify_user
 test_libbpf
+test_tcp_check_syncookie_user
 alu32
index 2aed37ea61a4cba8f2e871686d7a94590cac3326..77b73b892136be4f96fa772ad9e24c9d8a77544c 100644 (file)
@@ -51,7 +51,10 @@ TEST_PROGS := test_kmod.sh \
        test_skb_cgroup_id.sh \
        test_flow_dissector.sh \
        test_xdp_vlan.sh \
-       test_lwt_ip_encap.sh
+       test_lwt_ip_encap.sh \
+       test_tcp_check_syncookie.sh \
+       test_tc_tunnel.sh \
+       test_tc_edt.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
        with_tunnels.sh \
@@ -60,7 +63,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
-       flow_dissector_load test_flow_dissector
+       flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user
 
 include ../lib.mk
 
@@ -69,7 +72,7 @@ TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
 all: $(TEST_CUSTOM_PROGS)
 
 $(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c
-       $(CC) -o $@ -static $< -Wl,--build-id
+       $(CC) -o $@ $< -Wl,--build-id
 
 BPFOBJ := $(OUTPUT)/libbpf.a
 
index c81fc350f7ad46ad60d53ac3dd8121059020f9a6..97d140961438fa5171175fa463a04301105771c6 100644 (file)
@@ -159,6 +159,11 @@ static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
                                             int size, unsigned long long netns_id,
                                             unsigned long long flags) =
        (void *) BPF_FUNC_sk_lookup_tcp;
+static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx,
+                                            struct bpf_sock_tuple *tuple,
+                                            int size, unsigned long long netns_id,
+                                            unsigned long long flags) =
+       (void *) BPF_FUNC_skc_lookup_tcp;
 static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
                                             struct bpf_sock_tuple *tuple,
                                             int size, unsigned long long netns_id,
@@ -184,6 +189,9 @@ static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
        (void *) BPF_FUNC_get_listener_sock;
 static int (*bpf_skb_ecn_set_ce)(void *ctx) =
        (void *) BPF_FUNC_skb_ecn_set_ce;
+static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk,
+           void *ip, int ip_len, void *tcp, int tcp_len) =
+       (void *) BPF_FUNC_tcp_check_syncookie;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -274,6 +282,9 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
 #elif defined(__TARGET_ARCH_s930x)
        #define bpf_target_s930x
        #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm)
+       #define bpf_target_arm
+       #define bpf_target_defined
 #elif defined(__TARGET_ARCH_arm64)
        #define bpf_target_arm64
        #define bpf_target_defined
@@ -296,6 +307,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
        #define bpf_target_x86
 #elif defined(__s390x__)
        #define bpf_target_s930x
+#elif defined(__arm__)
+       #define bpf_target_arm
 #elif defined(__aarch64__)
        #define bpf_target_arm64
 #elif defined(__mips__)
@@ -333,6 +346,19 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
 #define PT_REGS_SP(x) ((x)->gprs[15])
 #define PT_REGS_IP(x) ((x)->psw.addr)
 
+#elif defined(bpf_target_arm)
+
+#define PT_REGS_PARM1(x) ((x)->uregs[0])
+#define PT_REGS_PARM2(x) ((x)->uregs[1])
+#define PT_REGS_PARM3(x) ((x)->uregs[2])
+#define PT_REGS_PARM4(x) ((x)->uregs[3])
+#define PT_REGS_PARM5(x) ((x)->uregs[4])
+#define PT_REGS_RET(x) ((x)->uregs[14])
+#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->uregs[0])
+#define PT_REGS_SP(x) ((x)->uregs[13])
+#define PT_REGS_IP(x) ((x)->uregs[12])
+
 #elif defined(bpf_target_arm64)
 
 #define PT_REGS_PARM1(x) ((x)->regs[0])
index 37f947ec44ed91533489572b2bd053ded3f080cd..a42f4fc4dc11f6f1a4319847599ff9fb2728b32b 100644 (file)
@@ -23,3 +23,5 @@ CONFIG_LWTUNNEL=y
 CONFIG_BPF_STREAM_PARSER=y
 CONFIG_XDP_SOCKETS=y
 CONFIG_FTRACE_SYSCALLS=y
+CONFIG_IPV6_TUNNEL=y
+CONFIG_IPV6_GRE=y
index 8a114bb1c379040b81bd70c87e10ab8fd9401af1..1c1a2f75f3d828da1288ecaa52657851c0600be8 100644 (file)
@@ -1,13 +1,25 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
+static __u64 read_perf_max_sample_freq(void)
+{
+       __u64 sample_freq = 5000; /* fallback to 5000 on error */
+       FILE *f;
+
+       f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
+       if (f == NULL)
+               return sample_freq;
+       fscanf(f, "%llu", &sample_freq);
+       fclose(f);
+       return sample_freq;
+}
+
 void test_stacktrace_build_id_nmi(void)
 {
        int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
        const char *file = "./test_stacktrace_build_id.o";
        int err, pmu_fd, prog_fd;
        struct perf_event_attr attr = {
-               .sample_freq = 5000,
                .freq = 1,
                .type = PERF_TYPE_HARDWARE,
                .config = PERF_COUNT_HW_CPU_CYCLES,
@@ -20,6 +32,8 @@ void test_stacktrace_build_id_nmi(void)
        int build_id_matches = 0;
        int retry = 1;
 
+       attr.sample_freq = read_perf_max_sample_freq();
+
 retry:
        err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
        if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c
new file mode 100644 (file)
index 0000000..3af64c4
--- /dev/null
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* the maximum delay we are willing to add (drop packets beyond that) */
+#define TIME_HORIZON_NS (2000 * 1000 * 1000)
+#define NS_PER_SEC 1000000000
+#define ECN_HORIZON_NS 5000000
+#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
+
+/* flow_key => last_tstamp timestamp used */
+struct bpf_map_def SEC("maps") flow_map = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(uint32_t),
+       .value_size = sizeof(uint64_t),
+       .max_entries = 1,
+};
+
+static inline int throttle_flow(struct __sk_buff *skb)
+{
+       int key = 0;
+       uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key);
+       uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC /
+                       THROTTLE_RATE_BPS;
+       uint64_t now = bpf_ktime_get_ns();
+       uint64_t tstamp, next_tstamp = 0;
+
+       if (last_tstamp)
+               next_tstamp = *last_tstamp + delay_ns;
+
+       tstamp = skb->tstamp;
+       if (tstamp < now)
+               tstamp = now;
+
+       /* should we throttle? */
+       if (next_tstamp <= tstamp) {
+               if (bpf_map_update_elem(&flow_map, &key, &tstamp, BPF_ANY))
+                       return TC_ACT_SHOT;
+               return TC_ACT_OK;
+       }
+
+       /* do not queue past the time horizon */
+       if (next_tstamp - now >= TIME_HORIZON_NS)
+               return TC_ACT_SHOT;
+
+       /* set ecn bit, if needed */
+       if (next_tstamp - now >= ECN_HORIZON_NS)
+               bpf_skb_ecn_set_ce(skb);
+
+       if (bpf_map_update_elem(&flow_map, &key, &next_tstamp, BPF_EXIST))
+               return TC_ACT_SHOT;
+       skb->tstamp = next_tstamp;
+
+       return TC_ACT_OK;
+}
+
+static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp)
+{
+       void *data_end = (void *)(long)skb->data_end;
+
+       /* drop malformed packets */
+       if ((void *)(tcp + 1) > data_end)
+               return TC_ACT_SHOT;
+
+       if (tcp->dest == bpf_htons(9000))
+               return throttle_flow(skb);
+
+       return TC_ACT_OK;
+}
+
+static inline int handle_ipv4(struct __sk_buff *skb)
+{
+       void *data_end = (void *)(long)skb->data_end;
+       void *data = (void *)(long)skb->data;
+       struct iphdr *iph;
+       uint32_t ihl;
+
+       /* drop malformed packets */
+       if (data + sizeof(struct ethhdr) > data_end)
+               return TC_ACT_SHOT;
+       iph = (struct iphdr *)(data + sizeof(struct ethhdr));
+       if ((void *)(iph + 1) > data_end)
+               return TC_ACT_SHOT;
+       ihl = iph->ihl * 4;
+       if (((void *)iph) + ihl > data_end)
+               return TC_ACT_SHOT;
+
+       if (iph->protocol == IPPROTO_TCP)
+               return handle_tcp(skb, (struct tcphdr *)(((void *)iph) + ihl));
+
+       return TC_ACT_OK;
+}
+
+SEC("cls_test") int tc_prog(struct __sk_buff *skb)
+{
+       if (skb->protocol == bpf_htons(ETH_P_IP))
+               return handle_ipv4(skb);
+
+       return TC_ACT_OK;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
new file mode 100644 (file)
index 0000000..f541c2d
--- /dev/null
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* In-place tunneling */
+
+#include <stdbool.h>
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <linux/types.h>
+
+#include "bpf_endian.h"
+#include "bpf_helpers.h"
+
+static const int cfg_port = 8000;
+
+struct grev4hdr {
+       struct iphdr ip;
+       __be16 flags;
+       __be16 protocol;
+} __attribute__((packed));
+
+struct grev6hdr {
+       struct ipv6hdr ip;
+       __be16 flags;
+       __be16 protocol;
+} __attribute__((packed));
+
+static __always_inline void set_ipv4_csum(struct iphdr *iph)
+{
+       __u16 *iph16 = (__u16 *)iph;
+       __u32 csum;
+       int i;
+
+       iph->check = 0;
+
+#pragma clang loop unroll(full)
+       for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
+               csum += *iph16++;
+
+       iph->check = ~((csum & 0xffff) + (csum >> 16));
+}
+
+static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
+{
+       struct grev4hdr h_outer;
+       struct iphdr iph_inner;
+       struct tcphdr tcph;
+       __u64 flags;
+       int olen;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+                              sizeof(iph_inner)) < 0)
+               return TC_ACT_OK;
+
+       /* filter only packets we want */
+       if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
+               return TC_ACT_OK;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
+                              &tcph, sizeof(tcph)) < 0)
+               return TC_ACT_OK;
+
+       if (tcph.dest != __bpf_constant_htons(cfg_port))
+               return TC_ACT_OK;
+
+       flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
+       if (with_gre) {
+               flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
+               olen = sizeof(h_outer);
+       } else {
+               olen = sizeof(h_outer.ip);
+       }
+
+       /* add room between mac and network header */
+       if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
+               return TC_ACT_SHOT;
+
+       /* prepare new outer network header */
+       h_outer.ip = iph_inner;
+       h_outer.ip.tot_len = bpf_htons(olen +
+                                     bpf_htons(h_outer.ip.tot_len));
+       if (with_gre) {
+               h_outer.ip.protocol = IPPROTO_GRE;
+               h_outer.protocol = bpf_htons(ETH_P_IP);
+               h_outer.flags = 0;
+       } else {
+               h_outer.ip.protocol = IPPROTO_IPIP;
+       }
+
+       set_ipv4_csum((void *)&h_outer.ip);
+
+       /* store new outer network header */
+       if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
+                               BPF_F_INVALIDATE_HASH) < 0)
+               return TC_ACT_SHOT;
+
+       return TC_ACT_OK;
+}
+
+static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
+{
+       struct ipv6hdr iph_inner;
+       struct grev6hdr h_outer;
+       struct tcphdr tcph;
+       __u64 flags;
+       int olen;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+                              sizeof(iph_inner)) < 0)
+               return TC_ACT_OK;
+
+       /* filter only packets we want */
+       if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
+                              &tcph, sizeof(tcph)) < 0)
+               return TC_ACT_OK;
+
+       if (tcph.dest != __bpf_constant_htons(cfg_port))
+               return TC_ACT_OK;
+
+       flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
+       if (with_gre) {
+               flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
+               olen = sizeof(h_outer);
+       } else {
+               olen = sizeof(h_outer.ip);
+       }
+
+
+       /* add room between mac and network header */
+       if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
+               return TC_ACT_SHOT;
+
+       /* prepare new outer network header */
+       h_outer.ip = iph_inner;
+       h_outer.ip.payload_len = bpf_htons(olen +
+                                          bpf_ntohs(h_outer.ip.payload_len));
+       if (with_gre) {
+               h_outer.ip.nexthdr = IPPROTO_GRE;
+               h_outer.protocol = bpf_htons(ETH_P_IPV6);
+               h_outer.flags = 0;
+       } else {
+               h_outer.ip.nexthdr = IPPROTO_IPV6;
+       }
+
+       /* store new outer network header */
+       if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
+                               BPF_F_INVALIDATE_HASH) < 0)
+               return TC_ACT_SHOT;
+
+       return TC_ACT_OK;
+}
+
+SEC("encap_ipip")
+int __encap_ipip(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+               return encap_ipv4(skb, false);
+       else
+               return TC_ACT_OK;
+}
+
+SEC("encap_gre")
+int __encap_gre(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+               return encap_ipv4(skb, true);
+       else
+               return TC_ACT_OK;
+}
+
+SEC("encap_ip6tnl")
+int __encap_ip6tnl(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+               return encap_ipv6(skb, false);
+       else
+               return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre")
+int __encap_ip6gre(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+               return encap_ipv6(skb, true);
+       else
+               return TC_ACT_OK;
+}
+
+static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
+{
+       char buf[sizeof(struct grev6hdr)];
+       int olen;
+
+       switch (proto) {
+       case IPPROTO_IPIP:
+       case IPPROTO_IPV6:
+               olen = len;
+               break;
+       case IPPROTO_GRE:
+               olen = len + 4 /* gre hdr */;
+               break;
+       default:
+               return TC_ACT_OK;
+       }
+
+       if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
+                               BPF_F_ADJ_ROOM_FIXED_GSO))
+               return TC_ACT_SHOT;
+
+       return TC_ACT_OK;
+}
+
+static int decap_ipv4(struct __sk_buff *skb)
+{
+       struct iphdr iph_outer;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
+                              sizeof(iph_outer)) < 0)
+               return TC_ACT_OK;
+
+       if (iph_outer.ihl != 5)
+               return TC_ACT_OK;
+
+       return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
+                             iph_outer.protocol);
+}
+
+static int decap_ipv6(struct __sk_buff *skb)
+{
+       struct ipv6hdr iph_outer;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
+                              sizeof(iph_outer)) < 0)
+               return TC_ACT_OK;
+
+       return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
+                             iph_outer.nexthdr);
+}
+
+SEC("decap")
+int decap_f(struct __sk_buff *skb)
+{
+       switch (skb->protocol) {
+       case __bpf_constant_htons(ETH_P_IP):
+               return decap_ipv4(skb);
+       case __bpf_constant_htons(ETH_P_IPV6):
+               return decap_ipv6(skb);
+       default:
+               /* does not match, ignore */
+               return TC_ACT_OK;
+       }
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
new file mode 100644 (file)
index 0000000..1ab095b
--- /dev/null
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+// Copyright (c) 2019 Cloudflare
+
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <sys/socket.h>
+#include <linux/tcp.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct bpf_map_def SEC("maps") results = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(__u64),
+       .max_entries = 1,
+};
+
+static __always_inline void check_syncookie(void *ctx, void *data,
+                                           void *data_end)
+{
+       struct bpf_sock_tuple tup;
+       struct bpf_sock *sk;
+       struct ethhdr *ethh;
+       struct iphdr *ipv4h;
+       struct ipv6hdr *ipv6h;
+       struct tcphdr *tcph;
+       int ret;
+       __u32 key = 0;
+       __u64 value = 1;
+
+       ethh = data;
+       if (ethh + 1 > data_end)
+               return;
+
+       switch (bpf_ntohs(ethh->h_proto)) {
+       case ETH_P_IP:
+               ipv4h = data + sizeof(struct ethhdr);
+               if (ipv4h + 1 > data_end)
+                       return;
+
+               if (ipv4h->ihl != 5)
+                       return;
+
+               tcph = data + sizeof(struct ethhdr) + sizeof(struct iphdr);
+               if (tcph + 1 > data_end)
+                       return;
+
+               tup.ipv4.saddr = ipv4h->saddr;
+               tup.ipv4.daddr = ipv4h->daddr;
+               tup.ipv4.sport = tcph->source;
+               tup.ipv4.dport = tcph->dest;
+
+               sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv4),
+                                       BPF_F_CURRENT_NETNS, 0);
+               if (!sk)
+                       return;
+
+               if (sk->state != BPF_TCP_LISTEN)
+                       goto release;
+
+               ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h),
+                                             tcph, sizeof(*tcph));
+               break;
+
+       case ETH_P_IPV6:
+               ipv6h = data + sizeof(struct ethhdr);
+               if (ipv6h + 1 > data_end)
+                       return;
+
+               if (ipv6h->nexthdr != IPPROTO_TCP)
+                       return;
+
+               tcph = data + sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+               if (tcph + 1 > data_end)
+                       return;
+
+               memcpy(tup.ipv6.saddr, &ipv6h->saddr, sizeof(tup.ipv6.saddr));
+               memcpy(tup.ipv6.daddr, &ipv6h->daddr, sizeof(tup.ipv6.daddr));
+               tup.ipv6.sport = tcph->source;
+               tup.ipv6.dport = tcph->dest;
+
+               sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv6),
+                                       BPF_F_CURRENT_NETNS, 0);
+               if (!sk)
+                       return;
+
+               if (sk->state != BPF_TCP_LISTEN)
+                       goto release;
+
+               ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h),
+                                             tcph, sizeof(*tcph));
+               break;
+
+       default:
+               return;
+       }
+
+       if (ret == 0)
+               bpf_map_update_elem(&results, &key, &value, 0);
+
+release:
+       bpf_sk_release(sk);
+}
+
+SEC("clsact/check_syncookie")
+int check_syncookie_clsact(struct __sk_buff *skb)
+{
+       check_syncookie(skb, (void *)(long)skb->data,
+                       (void *)(long)skb->data_end);
+       return TC_ACT_OK;
+}
+
+SEC("xdp/check_syncookie")
+int check_syncookie_xdp(struct xdp_md *ctx)
+{
+       check_syncookie(ctx, (void *)(long)ctx->data,
+                       (void *)(long)ctx->data_end);
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
new file mode 100755 (executable)
index 0000000..f38567e
--- /dev/null
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test installs a TC bpf program that throttles a TCP flow
+# with dst port = 9000 down to 5MBps. Then it measures actual
+# throughput of the flow.
+
+if [[ $EUID -ne 0 ]]; then
+       echo "This script must be run as root"
+       echo "FAIL"
+       exit 1
+fi
+
+# check that nc, dd, and timeout are present
+command -v nc >/dev/null 2>&1 || \
+       { echo >&2 "nc is not available"; exit 1; }
+command -v dd >/dev/null 2>&1 || \
+       { echo >&2 "nc is not available"; exit 1; }
+command -v timeout >/dev/null 2>&1 || \
+       { echo >&2 "timeout is not available"; exit 1; }
+
+readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
+readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
+
+readonly IP_SRC="172.16.1.100"
+readonly IP_DST="172.16.2.100"
+
+cleanup()
+{
+       ip netns del ${NS_SRC}
+       ip netns del ${NS_DST}
+}
+
+trap cleanup EXIT
+
+set -e  # exit on error
+
+ip netns add "${NS_SRC}"
+ip netns add "${NS_DST}"
+ip link add veth_src type veth peer name veth_dst
+ip link set veth_src netns ${NS_SRC}
+ip link set veth_dst netns ${NS_DST}
+
+ip -netns ${NS_SRC} addr add ${IP_SRC}/24  dev veth_src
+ip -netns ${NS_DST} addr add ${IP_DST}/24  dev veth_dst
+
+ip -netns ${NS_SRC} link set dev veth_src up
+ip -netns ${NS_DST} link set dev veth_dst up
+
+ip -netns ${NS_SRC} route add ${IP_DST}/32  dev veth_src
+ip -netns ${NS_DST} route add ${IP_SRC}/32  dev veth_dst
+
+# set up TC on TX
+ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
+ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
+ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
+       bpf da obj test_tc_edt.o sec cls_test
+
+
+# start the listener
+ip netns exec ${NS_DST} bash -c \
+       "nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &"
+declare -i NC_PID=$!
+sleep 1
+
+declare -ir TIMEOUT=20
+declare -ir EXPECTED_BPS=5000000
+
+# run the load, capture RX bytes on DST
+declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \
+       cat /sys/class/net/veth_dst/statistics/rx_bytes )
+
+set +e
+ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \
+       bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
+set -e
+
+declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \
+       cat /sys/class/net/veth_dst/statistics/rx_bytes )
+
+declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT ))
+
+echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \
+       awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n",
+               $1, ($2-$3)*100.0/$3}'
+
+# Pass the test if the actual bps is within 1% of the expected bps.
+# The difference is usually about 0.1% on a 20-sec test, and ==> zero
+# the longer the test runs.
+declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \
+        awk 'function abs(x){return ((x < 0.0) ? -x : x)}
+             {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" }
+               else { print "0"} }' )
+if [ "${RES}" == "0" ] ; then
+       echo "PASS"
+else
+       echo "FAIL"
+       exit 1
+fi
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
new file mode 100755 (executable)
index 0000000..c805adb
--- /dev/null
@@ -0,0 +1,186 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# In-place tunneling
+
+# must match the port that the bpf program filters on
+readonly port=8000
+
+readonly ns_prefix="ns-$$-"
+readonly ns1="${ns_prefix}1"
+readonly ns2="${ns_prefix}2"
+
+readonly ns1_v4=192.168.1.1
+readonly ns2_v4=192.168.1.2
+readonly ns1_v6=fd::1
+readonly ns2_v6=fd::2
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+
+setup() {
+       ip netns add "${ns1}"
+       ip netns add "${ns2}"
+
+       ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \
+             peer name veth2 mtu 1500 netns "${ns2}"
+
+       ip netns exec "${ns1}" ethtool -K veth1 tso off
+
+       ip -netns "${ns1}" link set veth1 up
+       ip -netns "${ns2}" link set veth2 up
+
+       ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
+       ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
+       ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
+       ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
+
+       # clamp route to reserve room for tunnel headers
+       ip -netns "${ns1}" -4 route flush table main
+       ip -netns "${ns1}" -6 route flush table main
+       ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1476 dev veth1
+       ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1456 dev veth1
+
+       sleep 1
+
+       dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
+}
+
+cleanup() {
+       ip netns del "${ns2}"
+       ip netns del "${ns1}"
+
+       if [[ -f "${outfile}" ]]; then
+               rm "${outfile}"
+       fi
+       if [[ -f "${infile}" ]]; then
+               rm "${infile}"
+       fi
+}
+
+server_listen() {
+       ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+       server_pid=$!
+       sleep 0.2
+}
+
+client_connect() {
+       ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}"
+       echo $?
+}
+
+verify_data() {
+       wait "${server_pid}"
+       # sha1sum returns two fields [sha1] [filepath]
+       # convert to bash array and access first elem
+       insum=($(sha1sum ${infile}))
+       outsum=($(sha1sum ${outfile}))
+       if [[ "${insum[0]}" != "${outsum[0]}" ]]; then
+               echo "data mismatch"
+               exit 1
+       fi
+}
+
+set -e
+
+# no arguments: automated test, run all
+if [[ "$#" -eq "0" ]]; then
+       echo "ipip"
+       $0 ipv4 ipip 100
+
+       echo "ip6ip6"
+       $0 ipv6 ip6tnl 100
+
+       echo "ip gre"
+       $0 ipv4 gre 100
+
+       echo "ip6 gre"
+       $0 ipv6 ip6gre 100
+
+       echo "ip gre gso"
+       $0 ipv4 gre 2000
+
+       echo "ip6 gre gso"
+       $0 ipv6 ip6gre 2000
+
+       echo "OK. All tests passed"
+       exit 0
+fi
+
+if [[ "$#" -ne "3" ]]; then
+       echo "Usage: $0"
+       echo "   or: $0 <ipv4|ipv6> <tuntype> <data_len>"
+       exit 1
+fi
+
+case "$1" in
+"ipv4")
+       readonly addr1="${ns1_v4}"
+       readonly addr2="${ns2_v4}"
+       readonly netcat_opt=-4
+       ;;
+"ipv6")
+       readonly addr1="${ns1_v6}"
+       readonly addr2="${ns2_v6}"
+       readonly netcat_opt=-6
+       ;;
+*)
+       echo "unknown arg: $1"
+       exit 1
+       ;;
+esac
+
+readonly tuntype=$2
+readonly datalen=$3
+
+echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}"
+
+trap cleanup EXIT
+
+setup
+
+# basic communication works
+echo "test basic connectivity"
+server_listen
+client_connect
+verify_data
+
+# clientside, insert bpf program to encap all TCP to port ${port}
+# client can no longer connect
+ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
+ip netns exec "${ns1}" tc filter add dev veth1 egress \
+       bpf direct-action object-file ./test_tc_tunnel.o \
+       section "encap_${tuntype}"
+echo "test bpf encap without decap (expect failure)"
+server_listen
+! client_connect
+
+# serverside, insert decap module
+# server is still running
+# client can connect again
+ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
+       remote "${addr1}" local "${addr2}"
+# Because packets are decapped by the tunnel they arrive on testtun0 from
+# the IP stack perspective.  Ensure reverse path filtering is disabled
+# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
+# expected veth2 (veth2 is where 192.168.1.2 is configured).
+ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+# rp needs to be disabled for both all and testtun0 as the rp value is
+# selected as the max of the "all" and device-specific values.
+ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
+ip netns exec "${ns2}" ip link set dev testtun0 up
+echo "test bpf encap with tunnel device decap"
+client_connect
+verify_data
+
+# serverside, use BPF for decap
+ip netns exec "${ns2}" ip link del dev testtun0
+ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
+ip netns exec "${ns2}" tc filter add dev veth2 ingress \
+       bpf direct-action object-file ./test_tc_tunnel.o section decap
+server_listen
+echo "test bpf encap with bpf decap"
+client_connect
+verify_data
+
+echo OK
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
new file mode 100755 (executable)
index 0000000..d48e517
--- /dev/null
@@ -0,0 +1,81 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Facebook
+# Copyright (c) 2019 Cloudflare
+
+set -eu
+
+wait_for_ip()
+{
+       local _i
+       printf "Wait for IP %s to become available " "$1"
+       for _i in $(seq ${MAX_PING_TRIES}); do
+               printf "."
+               if ns1_exec ping -c 1 -W 1 "$1" >/dev/null 2>&1; then
+                       echo " OK"
+                       return
+               fi
+               sleep 1
+       done
+       echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+       exit 1
+}
+
+get_prog_id()
+{
+       awk '/ id / {sub(/.* id /, "", $0); print($1)}'
+}
+
+ns1_exec()
+{
+       ip netns exec ns1 "$@"
+}
+
+setup()
+{
+       ip netns add ns1
+       ns1_exec ip link set lo up
+
+       ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
+
+       wait_for_ip 127.0.0.1
+       wait_for_ip ::1
+}
+
+cleanup()
+{
+       ip netns del ns1 2>/dev/null || :
+}
+
+main()
+{
+       trap cleanup EXIT 2 3 6 15
+       setup
+
+       printf "Testing clsact..."
+       ns1_exec tc qdisc add dev "${TEST_IF}" clsact
+       ns1_exec tc filter add dev "${TEST_IF}" ingress \
+               bpf obj "${BPF_PROG_OBJ}" sec "${CLSACT_SECTION}" da
+
+       BPF_PROG_ID=$(ns1_exec tc filter show dev "${TEST_IF}" ingress | \
+                     get_prog_id)
+       ns1_exec "${PROG}" "${BPF_PROG_ID}"
+       ns1_exec tc qdisc del dev "${TEST_IF}" clsact
+
+       printf "Testing XDP..."
+       ns1_exec ip link set "${TEST_IF}" xdp \
+               object "${BPF_PROG_OBJ}" section "${XDP_SECTION}"
+       BPF_PROG_ID=$(ns1_exec ip link show "${TEST_IF}" | get_prog_id)
+       ns1_exec "${PROG}" "${BPF_PROG_ID}"
+}
+
+DIR=$(dirname $0)
+TEST_IF=lo
+MAX_PING_TRIES=5
+BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
+CLSACT_SECTION="clsact/check_syncookie"
+XDP_SECTION="xdp/check_syncookie"
+BPF_PROG_ID=0
+PROG="${DIR}/test_tcp_check_syncookie_user"
+
+main
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
new file mode 100644 (file)
index 0000000..87829c8
--- /dev/null
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+// Copyright (c) 2019 Cloudflare
+
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+static int start_server(const struct sockaddr *addr, socklen_t len)
+{
+       int fd;
+
+       fd = socket(addr->sa_family, SOCK_STREAM, 0);
+       if (fd == -1) {
+               log_err("Failed to create server socket");
+               goto out;
+       }
+
+       if (bind(fd, addr, len) == -1) {
+               log_err("Failed to bind server socket");
+               goto close_out;
+       }
+
+       if (listen(fd, 128) == -1) {
+               log_err("Failed to listen on server socket");
+               goto close_out;
+       }
+
+       goto out;
+
+close_out:
+       close(fd);
+       fd = -1;
+out:
+       return fd;
+}
+
+static int connect_to_server(int server_fd)
+{
+       struct sockaddr_storage addr;
+       socklen_t len = sizeof(addr);
+       int fd = -1;
+
+       if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+               log_err("Failed to get server addr");
+               goto out;
+       }
+
+       fd = socket(addr.ss_family, SOCK_STREAM, 0);
+       if (fd == -1) {
+               log_err("Failed to create client socket");
+               goto out;
+       }
+
+       if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+               log_err("Fail to connect to server");
+               goto close_out;
+       }
+
+       goto out;
+
+close_out:
+       close(fd);
+       fd = -1;
+out:
+       return fd;
+}
+
+static int get_map_fd_by_prog_id(int prog_id)
+{
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       __u32 map_ids[1];
+       int prog_fd = -1;
+       int map_fd = -1;
+
+       prog_fd = bpf_prog_get_fd_by_id(prog_id);
+       if (prog_fd < 0) {
+               log_err("Failed to get fd by prog id %d", prog_id);
+               goto err;
+       }
+
+       info.nr_map_ids = 1;
+       info.map_ids = (__u64)(unsigned long)map_ids;
+
+       if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+               log_err("Failed to get info by prog fd %d", prog_fd);
+               goto err;
+       }
+
+       if (!info.nr_map_ids) {
+               log_err("No maps found for prog fd %d", prog_fd);
+               goto err;
+       }
+
+       map_fd = bpf_map_get_fd_by_id(map_ids[0]);
+       if (map_fd < 0)
+               log_err("Failed to get fd by map id %d", map_ids[0]);
+err:
+       if (prog_fd >= 0)
+               close(prog_fd);
+       return map_fd;
+}
+
+static int run_test(int server_fd, int results_fd)
+{
+       int client = -1, srv_client = -1;
+       int ret = 0;
+       __u32 key = 0;
+       __u64 value = 0;
+
+       if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) {
+               log_err("Can't clear results");
+               goto err;
+       }
+
+       client = connect_to_server(server_fd);
+       if (client == -1)
+               goto err;
+
+       srv_client = accept(server_fd, NULL, 0);
+       if (srv_client == -1) {
+               log_err("Can't accept connection");
+               goto err;
+       }
+
+       if (bpf_map_lookup_elem(results_fd, &key, &value) < 0) {
+               log_err("Can't lookup result");
+               goto err;
+       }
+
+       if (value != 1) {
+               log_err("Didn't match syncookie: %llu", value);
+               goto err;
+       }
+
+       goto out;
+
+err:
+       ret = 1;
+out:
+       close(client);
+       close(srv_client);
+       return ret;
+}
+
+int main(int argc, char **argv)
+{
+       struct sockaddr_in addr4;
+       struct sockaddr_in6 addr6;
+       int server = -1;
+       int server_v6 = -1;
+       int results = -1;
+       int err = 0;
+
+       if (argc < 2) {
+               fprintf(stderr, "Usage: %s prog_id\n", argv[0]);
+               exit(1);
+       }
+
+       results = get_map_fd_by_prog_id(atoi(argv[1]));
+       if (results < 0) {
+               log_err("Can't get map");
+               goto err;
+       }
+
+       memset(&addr4, 0, sizeof(addr4));
+       addr4.sin_family = AF_INET;
+       addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+       addr4.sin_port = 0;
+
+       memset(&addr6, 0, sizeof(addr6));
+       addr6.sin6_family = AF_INET6;
+       addr6.sin6_addr = in6addr_loopback;
+       addr6.sin6_port = 0;
+
+       server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
+       if (server == -1)
+               goto err;
+
+       server_v6 = start_server((const struct sockaddr *)&addr6,
+                                sizeof(addr6));
+       if (server_v6 == -1)
+               goto err;
+
+       if (run_test(server, results))
+               goto err;
+
+       if (run_test(server_v6, results))
+               goto err;
+
+       printf("ok\n");
+       goto out;
+err:
+       err = 1;
+out:
+       close(server);
+       close(server_v6);
+       close(results);
+       return err;
+}
index 477a9dcf9ffff4b5b47a73759ba00d92d13b0106..19b5d03acc2a83d8425f5356dc53b2165ee96a1a 100644 (file)
@@ -198,7 +198,7 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 }
 
 /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
-#define BPF_SK_LOOKUP                                                  \
+#define BPF_SK_LOOKUP(func)                                            \
        /* struct bpf_sock_tuple tuple = {} */                          \
        BPF_MOV64_IMM(BPF_REG_2, 0),                                    \
        BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),                  \
@@ -207,13 +207,13 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
        BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32),                \
        BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40),                \
        BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48),                \
-       /* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */       \
+       /* sk = func(ctx, &tuple, sizeof tuple, 0, 0) */                \
        BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),                           \
        BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),                         \
        BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),        \
        BPF_MOV64_IMM(BPF_REG_4, 0),                                    \
        BPF_MOV64_IMM(BPF_REG_5, 0),                                    \
-       BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp)
+       BPF_EMIT_CALL(BPF_FUNC_ ## func)
 
 /* BPF_DIRECT_PKT_R2 contains 7 instructions, it initializes default return
  * value into 0 and does necessary preparation for direct packet access
index 9de8b7cb4e6df6b5929c915566dff74163efbcf2..db781052758d3774b42cb891b86b09320e3d95d5 100644 (file)
@@ -7,11 +7,19 @@
 
 #define BUF_SIZE 256
 
+static __attribute__((noinline))
+void urandom_read(int fd, int count)
+{
+       char buf[BUF_SIZE];
+       int i;
+
+       for (i = 0; i < count; ++i)
+               read(fd, buf, BUF_SIZE);
+}
+
 int main(int argc, char *argv[])
 {
        int fd = open("/dev/urandom", O_RDONLY);
-       int i;
-       char buf[BUF_SIZE];
        int count = 4;
 
        if (fd < 0)
@@ -20,8 +28,7 @@ int main(int argc, char *argv[])
        if (argc == 2)
                count = atoi(argv[1]);
 
-       for (i = 0; i < count; ++i)
-               read(fd, buf, BUF_SIZE);
+       urandom_read(fd, count);
 
        close(fd);
        return 0;
index 923f2110072d6f1f4a124824228c082d43f094d8..ebcbf154c4600d7c9f8fc6d1acc63061cbaaa1df 100644 (file)
@@ -1,7 +1,18 @@
 {
        "reference tracking: leak potential reference",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .errstr = "Unreleased reference",
+       .result = REJECT,
+},
+{
+       "reference tracking: leak potential reference to sock_common",
+       .insns = {
+       BPF_SK_LOOKUP(skc_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
        BPF_EXIT_INSN(),
        },
@@ -12,7 +23,7 @@
 {
        "reference tracking: leak potential reference on stack",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
        BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
        BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
@@ -26,7 +37,7 @@
 {
        "reference tracking: leak potential reference on stack 2",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
        BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
        BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
 {
        "reference tracking: zero potential reference",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
+       BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .errstr = "Unreleased reference",
+       .result = REJECT,
+},
+{
+       "reference tracking: zero potential reference to sock_common",
+       .insns = {
+       BPF_SK_LOOKUP(skc_lookup_tcp),
        BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
        BPF_EXIT_INSN(),
        },
@@ -52,7 +74,7 @@
 {
        "reference tracking: copy and zero potential references",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */
@@ -65,7 +87,7 @@
 {
        "reference tracking: release reference without check",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        /* reference in r0 may be NULL */
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_MOV64_IMM(BPF_REG_2, 0),
        .errstr = "type=sock_or_null expected=sock",
        .result = REJECT,
 },
+{
+       "reference tracking: release reference to sock_common without check",
+       .insns = {
+       BPF_SK_LOOKUP(skc_lookup_tcp),
+       /* reference in r0 may be NULL */
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_MOV64_IMM(BPF_REG_2, 0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .errstr = "type=sock_common_or_null expected=sock",
+       .result = REJECT,
+},
 {
        "reference tracking: release reference",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = ACCEPT,
+},
+{
+       "reference tracking: release reference to sock_common",
+       .insns = {
+       BPF_SK_LOOKUP(skc_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
        BPF_EMIT_CALL(BPF_FUNC_sk_release),
 {
        "reference tracking: release reference 2",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
        BPF_EXIT_INSN(),
 {
        "reference tracking: release reference twice",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 {
        "reference tracking: release reference twice inside branch",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */
        BPF_EXIT_INSN(),
        BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
                    offsetof(struct __sk_buff, mark)),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */
        /* Leak reference in R0 */
        BPF_EXIT_INSN(),
        BPF_EXIT_INSN(),
        BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
                    offsetof(struct __sk_buff, mark)),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 {
        "reference tracking in call: free reference in subprog",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
        BPF_MOV64_IMM(BPF_REG_0, 0),
 {
        "reference tracking in call: free reference in subprog and outside",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
 
        /* subprog 1 */
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_4),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        /* spill unchecked sk_ptr into stack of caller */
        BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_EXIT_INSN(),
 
        /* subprog 1 */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_EXIT_INSN(), /* return sk */
        },
        .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        BPF_EXIT_INSN(),
 
        /* subprog 2 */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        BPF_EXIT_INSN(),
 
        /* subprog 2 */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        "reference tracking: allow LD_ABS",
        .insns = {
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
        BPF_EMIT_CALL(BPF_FUNC_sk_release),
        "reference tracking: forbid LD_ABS while holding reference",
        .insns = {
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_LD_ABS(BPF_B, 0),
        BPF_LD_ABS(BPF_H, 0),
        BPF_LD_ABS(BPF_W, 0),
        "reference tracking: allow LD_IND",
        .insns = {
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
        BPF_EMIT_CALL(BPF_FUNC_sk_release),
        "reference tracking: forbid LD_IND while holding reference",
        .insns = {
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
        BPF_MOV64_IMM(BPF_REG_7, 1),
        BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
        "reference tracking: check reference or tail call",
        .insns = {
        BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        /* if (sk) bpf_sk_release() */
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
        "reference tracking: release reference then tail call",
        .insns = {
        BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        /* if (sk) bpf_sk_release() */
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
        .insns = {
        BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
        /* Look up socket and store in REG_6 */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        /* bpf_tail_call() */
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
        BPF_MOV64_IMM(BPF_REG_3, 2),
        .insns = {
        BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
        /* Look up socket and store in REG_6 */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
        /* if (!sk) goto end */
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
 {
        "reference tracking: mangle and release sock_or_null",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 {
        "reference tracking: mangle and release sock",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
        BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
 {
        "reference tracking: access member",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
        BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
 {
        "reference tracking: write to member",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
 {
        "reference tracking: invalid 64-bit access of member",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
        BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
 {
        "reference tracking: access after release",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
        BPF_EMIT_CALL(BPF_FUNC_sk_release),
 {
        "reference tracking: use ptr from bpf_tcp_sock() after release",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
        BPF_EXIT_INSN(),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 {
        "reference tracking: use ptr from bpf_sk_fullsock() after release",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
        BPF_EXIT_INSN(),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 {
        "reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
        BPF_EXIT_INSN(),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 {
        "reference tracking: use sk after bpf_sk_release(tp)",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
        BPF_EXIT_INSN(),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 {
        "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
        BPF_EXIT_INSN(),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 {
        "reference tracking: bpf_sk_release(listen_sk)",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
        BPF_EXIT_INSN(),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
        /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
        "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)",
        .insns = {
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
        BPF_EXIT_INSN(),
        BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
index dbaf5be947b2be8ea3007cae023ef94be600a553..91bb77c24a2ef3bb392ce2763e90d7a05f2af34d 100644 (file)
        .insns = {
        BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
        /* struct bpf_sock *sock = bpf_sock_lookup(...); */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
        /* u64 foo; */
        /* void *target = &foo; */
        .insns = {
        BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
        /* struct bpf_sock *sock = bpf_sock_lookup(...); */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
        /* u64 foo; */
        /* void *target = &foo; */
        .insns = {
        BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
        /* struct bpf_sock *sock = bpf_sock_lookup(...); */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
        /* u64 foo; */
        /* void *target = &foo; */
        .insns = {
        BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
        /* struct bpf_sock *sock = bpf_sock_lookup(...); */
-       BPF_SK_LOOKUP,
+       BPF_SK_LOOKUP(sk_lookup_tcp),
        BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
        /* u64 foo; */
        /* void *target = &foo; */