]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - net/core/skbuff.c
Merge branch 'for-linus-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad...
[linux.git] / net / core / skbuff.c
index c8cd99c3603f7874a9f8b5841d4117ba4ec4e5f2..6f1e31f674a3b12902cf1e2cbcf98132177554ec 100644 (file)
@@ -59,6 +59,7 @@
 #include <linux/errqueue.h>
 #include <linux/prefetch.h>
 #include <linux/if_vlan.h>
+#include <linux/mpls.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
+#include <net/mpls.h>
 
 #include <linux/uaccess.h>
 #include <trace/events/skb.h>
 #include <linux/highmem.h>
 #include <linux/capability.h>
 #include <linux/user_namespace.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include "datagram.h"
 
@@ -365,18 +368,20 @@ struct napi_alloc_cache {
 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
 static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
 
-static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
+static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
-       struct page_frag_cache *nc;
-       unsigned long flags;
-       void *data;
+       struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 
-       local_irq_save(flags);
-       nc = this_cpu_ptr(&netdev_alloc_cache);
-       data = page_frag_alloc(nc, fragsz, gfp_mask);
-       local_irq_restore(flags);
-       return data;
+       return page_frag_alloc(&nc->page, fragsz, gfp_mask);
+}
+
+void *napi_alloc_frag(unsigned int fragsz)
+{
+       fragsz = SKB_DATA_ALIGN(fragsz);
+
+       return __napi_alloc_frag(fragsz, GFP_ATOMIC);
 }
+EXPORT_SYMBOL(napi_alloc_frag);
 
 /**
  * netdev_alloc_frag - allocate a page fragment
@@ -387,26 +392,21 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  */
 void *netdev_alloc_frag(unsigned int fragsz)
 {
-       fragsz = SKB_DATA_ALIGN(fragsz);
-
-       return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
-}
-EXPORT_SYMBOL(netdev_alloc_frag);
-
-static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
-{
-       struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
-
-       return page_frag_alloc(&nc->page, fragsz, gfp_mask);
-}
+       struct page_frag_cache *nc;
+       void *data;
 
-void *napi_alloc_frag(unsigned int fragsz)
-{
        fragsz = SKB_DATA_ALIGN(fragsz);
-
-       return __napi_alloc_frag(fragsz, GFP_ATOMIC);
+       if (in_irq() || irqs_disabled()) {
+               nc = this_cpu_ptr(&netdev_alloc_cache);
+               data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
+       } else {
+               local_bh_disable();
+               data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
+               local_bh_enable();
+       }
+       return data;
 }
-EXPORT_SYMBOL(napi_alloc_frag);
+EXPORT_SYMBOL(netdev_alloc_frag);
 
 /**
  *     __netdev_alloc_skb - allocate an skbuff for rx on a specific device
@@ -425,7 +425,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
                                   gfp_t gfp_mask)
 {
        struct page_frag_cache *nc;
-       unsigned long flags;
        struct sk_buff *skb;
        bool pfmemalloc;
        void *data;
@@ -446,13 +445,17 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
        if (sk_memalloc_socks())
                gfp_mask |= __GFP_MEMALLOC;
 
-       local_irq_save(flags);
-
-       nc = this_cpu_ptr(&netdev_alloc_cache);
-       data = page_frag_alloc(nc, len, gfp_mask);
-       pfmemalloc = nc->pfmemalloc;
-
-       local_irq_restore(flags);
+       if (in_irq() || irqs_disabled()) {
+               nc = this_cpu_ptr(&netdev_alloc_cache);
+               data = page_frag_alloc(nc, len, gfp_mask);
+               pfmemalloc = nc->pfmemalloc;
+       } else {
+               local_bh_disable();
+               nc = this_cpu_ptr(&napi_alloc_cache.page);
+               data = page_frag_alloc(nc, len, gfp_mask);
+               pfmemalloc = nc->pfmemalloc;
+               local_bh_enable();
+       }
 
        if (unlikely(!data))
                return NULL;
@@ -706,6 +709,105 @@ void kfree_skb_list(struct sk_buff *segs)
 }
 EXPORT_SYMBOL(kfree_skb_list);
 
+/* Dump skb information and contents.
+ *
+ * Must only be called from net_ratelimit()-ed paths.
+ *
+ * Dumps up to can_dump_full whole packets if full_pkt, headers otherwise.
+ */
+void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
+{
+       static atomic_t can_dump_full = ATOMIC_INIT(5);
+       struct skb_shared_info *sh = skb_shinfo(skb);
+       struct net_device *dev = skb->dev;
+       struct sock *sk = skb->sk;
+       struct sk_buff *list_skb;
+       bool has_mac, has_trans;
+       int headroom, tailroom;
+       int i, len, seg_len;
+
+       if (full_pkt)
+               full_pkt = atomic_dec_if_positive(&can_dump_full) >= 0;
+
+       if (full_pkt)
+               len = skb->len;
+       else
+               len = min_t(int, skb->len, MAX_HEADER + 128);
+
+       headroom = skb_headroom(skb);
+       tailroom = skb_tailroom(skb);
+
+       has_mac = skb_mac_header_was_set(skb);
+       has_trans = skb_transport_header_was_set(skb);
+
+       printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
+              "mac=(%d,%d) net=(%d,%d) trans=%d\n"
+              "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
+              "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
+              "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
+              level, skb->len, headroom, skb_headlen(skb), tailroom,
+              has_mac ? skb->mac_header : -1,
+              has_mac ? skb_mac_header_len(skb) : -1,
+              skb->network_header,
+              has_trans ? skb_network_header_len(skb) : -1,
+              has_trans ? skb->transport_header : -1,
+              sh->tx_flags, sh->nr_frags,
+              sh->gso_size, sh->gso_type, sh->gso_segs,
+              skb->csum, skb->ip_summed, skb->csum_complete_sw,
+              skb->csum_valid, skb->csum_level,
+              skb->hash, skb->sw_hash, skb->l4_hash,
+              ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
+
+       if (dev)
+               printk("%sdev name=%s feat=0x%pNF\n",
+                      level, dev->name, &dev->features);
+       if (sk)
+               printk("%ssk family=%hu type=%hu proto=%hu\n",
+                      level, sk->sk_family, sk->sk_type, sk->sk_protocol);
+
+       if (full_pkt && headroom)
+               print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET,
+                              16, 1, skb->head, headroom, false);
+
+       seg_len = min_t(int, skb_headlen(skb), len);
+       if (seg_len)
+               print_hex_dump(level, "skb linear:   ", DUMP_PREFIX_OFFSET,
+                              16, 1, skb->data, seg_len, false);
+       len -= seg_len;
+
+       if (full_pkt && tailroom)
+               print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET,
+                              16, 1, skb_tail_pointer(skb), tailroom, false);
+
+       for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+               u32 p_off, p_len, copied;
+               struct page *p;
+               u8 *vaddr;
+
+               skb_frag_foreach_page(frag, frag->page_offset,
+                                     skb_frag_size(frag), p, p_off, p_len,
+                                     copied) {
+                       seg_len = min_t(int, p_len, len);
+                       vaddr = kmap_atomic(p);
+                       print_hex_dump(level, "skb frag:     ",
+                                      DUMP_PREFIX_OFFSET,
+                                      16, 1, vaddr + p_off, seg_len, false);
+                       kunmap_atomic(vaddr);
+                       len -= seg_len;
+                       if (!len)
+                               break;
+               }
+       }
+
+       if (full_pkt && skb_has_frag_list(skb)) {
+               printk("skb fraglist:\n");
+               skb_walk_frags(skb, list_skb)
+                       skb_dump(level, list_skb, true);
+       }
+}
+EXPORT_SYMBOL(skb_dump);
+
 /**
  *     skb_tx_error - report an sk_buff xmit error
  *     @skb: buffer that triggered an error
@@ -908,6 +1010,31 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 #undef C
 }
 
+/**
+ * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
+ * @first: first sk_buff of the msg
+ */
+struct sk_buff *alloc_skb_for_msg(struct sk_buff *first)
+{
+       struct sk_buff *n;
+
+       n = alloc_skb(0, GFP_ATOMIC);
+       if (!n)
+               return NULL;
+
+       n->len = first->len;
+       n->data_len = first->len;
+       n->truesize = first->truesize;
+
+       skb_shinfo(n)->frag_list = first;
+
+       __copy_skb_header(n, first);
+       n->destructor = NULL;
+
+       return n;
+}
+EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
+
 /**
  *     skb_morph       -       morph one skb into another
  *     @dst: the skb to receive the contents
@@ -2508,7 +2635,8 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
        if (copy > 0) {
                if (copy > len)
                        copy = len;
-               csum = ops->update(skb->data + offset, copy, csum);
+               csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
+                                      skb->data + offset, copy, csum);
                if ((len -= copy) == 0)
                        return csum;
                offset += copy;
@@ -2535,9 +2663,13 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
                                              frag->page_offset + offset - start,
                                              copy, p, p_off, p_len, copied) {
                                vaddr = kmap_atomic(p);
-                               csum2 = ops->update(vaddr + p_off, p_len, 0);
+                               csum2 = INDIRECT_CALL_1(ops->update,
+                                                       csum_partial_ext,
+                                                       vaddr + p_off, p_len, 0);
                                kunmap_atomic(vaddr);
-                               csum = ops->combine(csum, csum2, pos, p_len);
+                               csum = INDIRECT_CALL_1(ops->combine,
+                                                      csum_block_add_ext, csum,
+                                                      csum2, pos, p_len);
                                pos += p_len;
                        }
 
@@ -2560,7 +2692,8 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
                                copy = len;
                        csum2 = __skb_checksum(frag_iter, offset - start,
                                               copy, 0, ops);
-                       csum = ops->combine(csum, csum2, pos, copy);
+                       csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
+                                              csum, csum2, pos, copy);
                        if ((len -= copy) == 0)
                                return csum;
                        offset += copy;
@@ -5294,6 +5427,173 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
 }
 EXPORT_SYMBOL(skb_vlan_push);
 
+/* Update the ethertype of hdr and the skb csum value if required. */
+static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
+                            __be16 ethertype)
+{
+       if (skb->ip_summed == CHECKSUM_COMPLETE) {
+               __be16 diff[] = { ~hdr->h_proto, ethertype };
+
+               skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
+       }
+
+       hdr->h_proto = ethertype;
+}
+
+/**
+ * skb_mpls_push() - push a new MPLS header after the mac header
+ *
+ * @skb: buffer
+ * @mpls_lse: MPLS label stack entry to push
+ * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
+ *
+ * Expects skb->data at mac header.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto)
+{
+       struct mpls_shim_hdr *lse;
+       int err;
+
+       if (unlikely(!eth_p_mpls(mpls_proto)))
+               return -EINVAL;
+
+       /* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */
+       if (skb->encapsulation)
+               return -EINVAL;
+
+       err = skb_cow_head(skb, MPLS_HLEN);
+       if (unlikely(err))
+               return err;
+
+       if (!skb->inner_protocol) {
+               skb_set_inner_network_header(skb, skb->mac_len);
+               skb_set_inner_protocol(skb, skb->protocol);
+       }
+
+       skb_push(skb, MPLS_HLEN);
+       memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
+               skb->mac_len);
+       skb_reset_mac_header(skb);
+       skb_set_network_header(skb, skb->mac_len);
+
+       lse = mpls_hdr(skb);
+       lse->label_stack_entry = mpls_lse;
+       skb_postpush_rcsum(skb, lse, MPLS_HLEN);
+
+       if (skb->dev && skb->dev->type == ARPHRD_ETHER)
+               skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
+       skb->protocol = mpls_proto;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(skb_mpls_push);
+
+/**
+ * skb_mpls_pop() - pop the outermost MPLS header
+ *
+ * @skb: buffer
+ * @next_proto: ethertype of header after popped MPLS header
+ *
+ * Expects skb->data at mac header.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto)
+{
+       int err;
+
+       if (unlikely(!eth_p_mpls(skb->protocol)))
+               return -EINVAL;
+
+       err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
+       if (unlikely(err))
+               return err;
+
+       skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
+       memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
+               skb->mac_len);
+
+       __skb_pull(skb, MPLS_HLEN);
+       skb_reset_mac_header(skb);
+       skb_set_network_header(skb, skb->mac_len);
+
+       if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
+               struct ethhdr *hdr;
+
+               /* use mpls_hdr() to get ethertype to account for VLANs. */
+               hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
+               skb_mod_eth_type(skb, hdr, next_proto);
+       }
+       skb->protocol = next_proto;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(skb_mpls_pop);
+
+/**
+ * skb_mpls_update_lse() - modify outermost MPLS header and update csum
+ *
+ * @skb: buffer
+ * @mpls_lse: new MPLS label stack entry to update to
+ *
+ * Expects skb->data at mac header.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse)
+{
+       int err;
+
+       if (unlikely(!eth_p_mpls(skb->protocol)))
+               return -EINVAL;
+
+       err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
+       if (unlikely(err))
+               return err;
+
+       if (skb->ip_summed == CHECKSUM_COMPLETE) {
+               __be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse };
+
+               skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
+       }
+
+       mpls_hdr(skb)->label_stack_entry = mpls_lse;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(skb_mpls_update_lse);
+
+/**
+ * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header
+ *
+ * @skb: buffer
+ *
+ * Expects skb->data at mac header.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_mpls_dec_ttl(struct sk_buff *skb)
+{
+       u32 lse;
+       u8 ttl;
+
+       if (unlikely(!eth_p_mpls(skb->protocol)))
+               return -EINVAL;
+
+       lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
+       ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+       if (!--ttl)
+               return -EINVAL;
+
+       lse &= ~MPLS_LS_TTL_MASK;
+       lse |= ttl << MPLS_LS_TTL_SHIFT;
+
+       return skb_mpls_update_lse(skb, cpu_to_be32(lse));
+}
+EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
+
 /**
  * alloc_skb_with_frags - allocate skb with page frags
  *