]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
udp: Support UDP fraglist GRO/GSO.
authorSteffen Klassert <steffen.klassert@secunet.com>
Sat, 25 Jan 2020 10:26:45 +0000 (11:26 +0100)
committerDavid S. Miller <davem@davemloft.net>
Mon, 27 Jan 2020 10:00:21 +0000 (11:00 +0100)
This patch extends UDP GRO to support fraglist GRO/GSO
by using the previously introduced infrastructure.
If the feature is enabled, all UDP packets are going to
fraglist GRO (local input and forward).

After validating the csum,  we mark ip_summed as
CHECKSUM_UNNECESSARY for fraglist GRO packets to
make sure that the csum is not touched.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/udp.h
net/ipv4/udp_offload.c
net/ipv6/udp_offload.c

index bad74f7808311e7f4970f949efe67fd30514a2eb..44e0e52b585ce8700c8966cd8e52e7d28f23cffb 100644 (file)
@@ -167,7 +167,7 @@ typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
                                     __be16 dport);
 
 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
-                               struct udphdr *uh, udp_lookup_t lookup);
+                               struct udphdr *uh, struct sock *sk);
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
index b25e42100ceb051264f42cf17bb74b820c7962aa..1a98583a79f4d96a7de2c3b8585299293e0846b5 100644 (file)
@@ -184,6 +184,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
+static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
+                                             netdev_features_t features)
+{
+       unsigned int mss = skb_shinfo(skb)->gso_size;
+
+       skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+       if (IS_ERR(skb))
+               return skb;
+
+       udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
+
+       return skb;
+}
+
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
                                  netdev_features_t features)
 {
@@ -196,6 +210,9 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
        __sum16 check;
        __be16 newlen;
 
+       if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+               return __udp_gso_segment_list(gso_skb, features);
+
        mss = skb_shinfo(gso_skb)->gso_size;
        if (gso_skb->len <= sizeof(*uh) + mss)
                return ERR_PTR(-EINVAL);
@@ -354,6 +371,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
        struct udphdr *uh2;
        struct sk_buff *p;
        unsigned int ulen;
+       int ret = 0;
 
        /* requires non zero csum, for symmetry with GSO */
        if (!uh->check) {
@@ -369,7 +387,6 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
        }
        /* pull encapsulating udp header */
        skb_gro_pull(skb, sizeof(struct udphdr));
-       skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 
        list_for_each_entry(p, head, list) {
                if (!NAPI_GRO_CB(p)->same_flow)
@@ -383,14 +400,40 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
                        continue;
                }
 
+               if (NAPI_GRO_CB(skb)->is_flist != NAPI_GRO_CB(p)->is_flist) {
+                       NAPI_GRO_CB(skb)->flush = 1;
+                       return p;
+               }
+
                /* Terminate the flow on len mismatch or if it grow "too much".
                 * Under small packet flood GRO count could elsewhere grow a lot
                 * leading to excessive truesize values.
                 * On len mismatch merge the first packet shorter than gso_size,
                 * otherwise complete the GRO packet.
                 */
-               if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) ||
-                   ulen != ntohs(uh2->len) ||
+               if (ulen > ntohs(uh2->len)) {
+                       pp = p;
+               } else {
+                       if (NAPI_GRO_CB(skb)->is_flist) {
+                               if (!pskb_may_pull(skb, skb_gro_offset(skb))) {
+                                       NAPI_GRO_CB(skb)->flush = 1;
+                                       return NULL;
+                               }
+                               if ((skb->ip_summed != p->ip_summed) ||
+                                   (skb->csum_level != p->csum_level)) {
+                                       NAPI_GRO_CB(skb)->flush = 1;
+                                       return NULL;
+                               }
+                               ret = skb_gro_receive_list(p, skb);
+                       } else {
+                               skb_gro_postpull_rcsum(skb, uh,
+                                                      sizeof(struct udphdr));
+
+                               ret = skb_gro_receive(p, skb);
+                       }
+               }
+
+               if (ret || ulen != ntohs(uh2->len) ||
                    NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
                        pp = p;
 
@@ -401,36 +444,29 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
        return NULL;
 }
 
-INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
-                                                  __be16 sport, __be16 dport));
 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
-                               struct udphdr *uh, udp_lookup_t lookup)
+                               struct udphdr *uh, struct sock *sk)
 {
        struct sk_buff *pp = NULL;
        struct sk_buff *p;
        struct udphdr *uh2;
        unsigned int off = skb_gro_offset(skb);
        int flush = 1;
-       struct sock *sk;
 
-       rcu_read_lock();
-       sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
-                               udp4_lib_lookup_skb, skb, uh->source, uh->dest);
-       if (!sk)
-               goto out_unlock;
+       if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
+               NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
 
-       if (udp_sk(sk)->gro_enabled) {
+       if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
                pp = call_gro_receive(udp_gro_receive_segment, head, skb);
-               rcu_read_unlock();
                return pp;
        }
 
-       if (NAPI_GRO_CB(skb)->encap_mark ||
+       if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
            (skb->ip_summed != CHECKSUM_PARTIAL &&
             NAPI_GRO_CB(skb)->csum_cnt == 0 &&
             !NAPI_GRO_CB(skb)->csum_valid) ||
            !udp_sk(sk)->gro_receive)
-               goto out_unlock;
+               goto out;
 
        /* mark that this skb passed once through the tunnel gro layer */
        NAPI_GRO_CB(skb)->encap_mark = 1;
@@ -457,8 +493,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
        skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
        pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
 
-out_unlock:
-       rcu_read_unlock();
+out:
        skb_gro_flush_final(skb, pp, flush);
        return pp;
 }
@@ -468,8 +503,10 @@ INDIRECT_CALLABLE_SCOPE
 struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
        struct udphdr *uh = udp_gro_udphdr(skb);
+       struct sk_buff *pp;
+       struct sock *sk;
 
-       if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
+       if (unlikely(!uh))
                goto flush;
 
        /* Don't bother verifying checksum if we're going to flush anyway. */
@@ -484,7 +521,11 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
                                             inet_gro_compute_pseudo);
 skip:
        NAPI_GRO_CB(skb)->is_ipv6 = 0;
-       return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb);
+       rcu_read_lock();
+       sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+       pp = udp_gro_receive(head, skb, uh, sk);
+       rcu_read_unlock();
+       return pp;
 
 flush:
        NAPI_GRO_CB(skb)->flush = 1;
@@ -517,9 +558,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
        rcu_read_lock();
        sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
                                udp4_lib_lookup_skb, skb, uh->source, uh->dest);
-       if (sk && udp_sk(sk)->gro_enabled) {
-               err = udp_gro_complete_segment(skb);
-       } else if (sk && udp_sk(sk)->gro_complete) {
+       if (sk && udp_sk(sk)->gro_complete) {
                skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
                                        : SKB_GSO_UDP_TUNNEL;
 
@@ -529,6 +568,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
                skb->encapsulation = 1;
                err = udp_sk(sk)->gro_complete(sk, skb,
                                nhoff + sizeof(struct udphdr));
+       } else {
+               err = udp_gro_complete_segment(skb);
        }
        rcu_read_unlock();
 
@@ -544,6 +585,23 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
        const struct iphdr *iph = ip_hdr(skb);
        struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
+       if (NAPI_GRO_CB(skb)->is_flist) {
+               uh->len = htons(skb->len - nhoff);
+
+               skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+               if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+                       if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+                               skb->csum_level++;
+               } else {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       skb->csum_level = 0;
+               }
+
+               return 0;
+       }
+
        if (uh->check)
                uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
                                          iph->daddr, 0);
index f0d5fc27d0b562ac91138fa5ad7857313f29c008..584157a0775969b2af74f4f4f820b78f64133f9b 100644 (file)
@@ -115,8 +115,10 @@ INDIRECT_CALLABLE_SCOPE
 struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
        struct udphdr *uh = udp_gro_udphdr(skb);
+       struct sk_buff *pp;
+       struct sock *sk;
 
-       if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key))
+       if (unlikely(!uh))
                goto flush;
 
        /* Don't bother verifying checksum if we're going to flush anyway. */
@@ -132,7 +134,11 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 
 skip:
        NAPI_GRO_CB(skb)->is_ipv6 = 1;
-       return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb);
+       rcu_read_lock();
+       sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+       pp = udp_gro_receive(head, skb, uh, sk);
+       rcu_read_unlock();
+       return pp;
 
 flush:
        NAPI_GRO_CB(skb)->flush = 1;
@@ -144,6 +150,23 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
        struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
+       if (NAPI_GRO_CB(skb)->is_flist) {
+               uh->len = htons(skb->len - nhoff);
+
+               skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+               if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+                       if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+                               skb->csum_level++;
+               } else {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       skb->csum_level = 0;
+               }
+
+               return 0;
+       }
+
        if (uh->check)
                uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
                                          &ipv6h->daddr, 0);