]> asedeno.scripts.mit.edu Git - linux.git/blob - net/ipv4/ip_gre.c
powerpc/4xx: Delete an unnecessary return statement in two functions
[linux.git] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
52
53 /*
54    Problems & solutions
55    --------------------
56
57    1. The most important issue is detecting local dead loops.
58    They would cause complete host lockup in transmit, which
59    would be "resolved" by stack overflow or, if queueing is enabled,
60    with infinite looping in net_bh.
61
62    We cannot track such dead loops during route installation,
63    it is infeasible task. The most general solutions would be
64    to keep skb->encapsulation counter (sort of local ttl),
65    and silently drop packet when it expires. It is a good
66    solution, but it supposes maintaining new variable in ALL
67    skb, even if no tunneling is used.
68
69    Current solution: xmit_recursion breaks dead loops. This is a percpu
70    counter, since when we enter the first ndo_xmit(), cpu migration is
71    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
72
73    2. Networking dead loops would not kill routers, but would really
74    kill network. IP hop limit plays role of "t->recursion" in this case,
75    if we copy it from packet being encapsulated to upper header.
76    It is very good solution, but it introduces two problems:
77
78    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79      do not work over tunnels.
80    - traceroute does not work. I planned to relay ICMP from tunnel,
81      so that this problem would be solved and traceroute output
82      would even more informative. This idea appeared to be wrong:
83      only Linux complies to rfc1812 now (yes, guys, Linux is the only
84      true router now :-)), all routers (at least, in neighbourhood of mine)
85      return only 8 bytes of payload. It is the end.
86
87    Hence, if we want that OSPF worked or traceroute said something reasonable,
88    we should search for another solution.
89
90    One of them is to parse packet trying to detect inner encapsulation
91    made by our node. It is difficult or even impossible, especially,
92    taking into account fragmentation. TO be short, ttl is not solution at all.
93
94    Current solution: The solution was UNEXPECTEDLY SIMPLE.
95    We force DF flag on tunnels with preconfigured hop limit,
96    that is ALL. :-) Well, it does not remove the problem completely,
97    but exponential growth of network traffic is changed to linear
98    (branches, that exceed pmtu are pruned) and tunnel mtu
99    rapidly degrades to value <68, where looping stops.
100    Yes, it is not good if there exists a router in the loop,
101    which does not force DF, even when encapsulating packets have DF set.
102    But it is not our problem! Nobody could accuse us, we made
103    all that we could make. Even if it is your gated who injected
104    fatal route to network, even if it were you who configured
105    fatal static route: you are innocent. :-)
106
107    Alexey Kuznetsov.
108  */
109
110 static bool log_ecn_error = true;
111 module_param(log_ecn_error, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115 static int ipgre_tunnel_init(struct net_device *dev);
116 static void erspan_build_header(struct sk_buff *skb,
117                                 u32 id, u32 index,
118                                 bool truncate, bool is_ipv4);
119
120 static unsigned int ipgre_net_id __read_mostly;
121 static unsigned int gre_tap_net_id __read_mostly;
122 static unsigned int erspan_net_id __read_mostly;
123
124 static void ipgre_err(struct sk_buff *skb, u32 info,
125                       const struct tnl_ptk_info *tpi)
126 {
127
128         /* All the routers (except for Linux) return only
129            8 bytes of packet payload. It means, that precise relaying of
130            ICMP in the real Internet is absolutely infeasible.
131
132            Moreover, Cisco "wise men" put GRE key to the third word
133            in GRE header. It makes impossible maintaining even soft
134            state for keyed GRE tunnels with enabled checksum. Tell
135            them "thank you".
136
137            Well, I wonder, rfc1812 was written by Cisco employee,
138            what the hell these idiots break standards established
139            by themselves???
140            */
141         struct net *net = dev_net(skb->dev);
142         struct ip_tunnel_net *itn;
143         const struct iphdr *iph;
144         const int type = icmp_hdr(skb)->type;
145         const int code = icmp_hdr(skb)->code;
146         unsigned int data_len = 0;
147         struct ip_tunnel *t;
148
149         switch (type) {
150         default:
151         case ICMP_PARAMETERPROB:
152                 return;
153
154         case ICMP_DEST_UNREACH:
155                 switch (code) {
156                 case ICMP_SR_FAILED:
157                 case ICMP_PORT_UNREACH:
158                         /* Impossible event. */
159                         return;
160                 default:
161                         /* All others are translated to HOST_UNREACH.
162                            rfc2003 contains "deep thoughts" about NET_UNREACH,
163                            I believe they are just ether pollution. --ANK
164                          */
165                         break;
166                 }
167                 break;
168
169         case ICMP_TIME_EXCEEDED:
170                 if (code != ICMP_EXC_TTL)
171                         return;
172                 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
173                 break;
174
175         case ICMP_REDIRECT:
176                 break;
177         }
178
179         if (tpi->proto == htons(ETH_P_TEB))
180                 itn = net_generic(net, gre_tap_net_id);
181         else if (tpi->proto == htons(ETH_P_ERSPAN) ||
182                  tpi->proto == htons(ETH_P_ERSPAN2))
183                 itn = net_generic(net, erspan_net_id);
184         else
185                 itn = net_generic(net, ipgre_net_id);
186
187         iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
188         t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
189                              iph->daddr, iph->saddr, tpi->key);
190
191         if (!t)
192                 return;
193
194 #if IS_ENABLED(CONFIG_IPV6)
195        if (tpi->proto == htons(ETH_P_IPV6) &&
196            !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
197                                        type, data_len))
198                return;
199 #endif
200
201         if (t->parms.iph.daddr == 0 ||
202             ipv4_is_multicast(t->parms.iph.daddr))
203                 return;
204
205         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
206                 return;
207
208         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
209                 t->err_count++;
210         else
211                 t->err_count = 1;
212         t->err_time = jiffies;
213 }
214
215 static void gre_err(struct sk_buff *skb, u32 info)
216 {
217         /* All the routers (except for Linux) return only
218          * 8 bytes of packet payload. It means, that precise relaying of
219          * ICMP in the real Internet is absolutely infeasible.
220          *
221          * Moreover, Cisco "wise men" put GRE key to the third word
222          * in GRE header. It makes impossible maintaining even soft
223          * state for keyed
224          * GRE tunnels with enabled checksum. Tell them "thank you".
225          *
226          * Well, I wonder, rfc1812 was written by Cisco employee,
227          * what the hell these idiots break standards established
228          * by themselves???
229          */
230
231         const struct iphdr *iph = (struct iphdr *)skb->data;
232         const int type = icmp_hdr(skb)->type;
233         const int code = icmp_hdr(skb)->code;
234         struct tnl_ptk_info tpi;
235
236         if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
237                              iph->ihl * 4) < 0)
238                 return;
239
240         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
241                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
242                                  skb->dev->ifindex, IPPROTO_GRE);
243                 return;
244         }
245         if (type == ICMP_REDIRECT) {
246                 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
247                               IPPROTO_GRE);
248                 return;
249         }
250
251         ipgre_err(skb, info, &tpi);
252 }
253
254 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
255                       int gre_hdr_len)
256 {
257         struct net *net = dev_net(skb->dev);
258         struct metadata_dst *tun_dst = NULL;
259         struct erspan_base_hdr *ershdr;
260         struct erspan_metadata *pkt_md;
261         struct ip_tunnel_net *itn;
262         struct ip_tunnel *tunnel;
263         const struct iphdr *iph;
264         struct erspan_md2 *md2;
265         int ver;
266         int len;
267
268         itn = net_generic(net, erspan_net_id);
269         len = gre_hdr_len + sizeof(*ershdr);
270
271         /* Check based hdr len */
272         if (unlikely(!pskb_may_pull(skb, len)))
273                 return PACKET_REJECT;
274
275         iph = ip_hdr(skb);
276         ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
277         ver = ershdr->ver;
278
279         /* The original GRE header does not have key field,
280          * Use ERSPAN 10-bit session ID as key.
281          */
282         tpi->key = cpu_to_be32(get_session_id(ershdr));
283         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
284                                   tpi->flags | TUNNEL_KEY,
285                                   iph->saddr, iph->daddr, tpi->key);
286
287         if (tunnel) {
288                 len = gre_hdr_len + erspan_hdr_len(ver);
289                 if (unlikely(!pskb_may_pull(skb, len)))
290                         return PACKET_REJECT;
291
292                 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
293                 pkt_md = (struct erspan_metadata *)(ershdr + 1);
294
295                 if (__iptunnel_pull_header(skb,
296                                            len,
297                                            htons(ETH_P_TEB),
298                                            false, false) < 0)
299                         goto drop;
300
301                 if (tunnel->collect_md) {
302                         struct ip_tunnel_info *info;
303                         struct erspan_metadata *md;
304                         __be64 tun_id;
305                         __be16 flags;
306
307                         tpi->flags |= TUNNEL_KEY;
308                         flags = tpi->flags;
309                         tun_id = key32_to_tunnel_id(tpi->key);
310
311                         tun_dst = ip_tun_rx_dst(skb, flags,
312                                                 tun_id, sizeof(*md));
313                         if (!tun_dst)
314                                 return PACKET_REJECT;
315
316                         md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
317                         md->version = ver;
318                         md2 = &md->u.md2;
319                         memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
320                                                        ERSPAN_V2_MDSIZE);
321
322                         info = &tun_dst->u.tun_info;
323                         info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
324                         info->options_len = sizeof(*md);
325                 }
326
327                 skb_reset_mac_header(skb);
328                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
329                 return PACKET_RCVD;
330         }
331         return PACKET_REJECT;
332
333 drop:
334         kfree_skb(skb);
335         return PACKET_RCVD;
336 }
337
338 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
339                        struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
340 {
341         struct metadata_dst *tun_dst = NULL;
342         const struct iphdr *iph;
343         struct ip_tunnel *tunnel;
344
345         iph = ip_hdr(skb);
346         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
347                                   iph->saddr, iph->daddr, tpi->key);
348
349         if (tunnel) {
350                 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
351                                            raw_proto, false) < 0)
352                         goto drop;
353
354                 if (tunnel->dev->type != ARPHRD_NONE)
355                         skb_pop_mac_header(skb);
356                 else
357                         skb_reset_mac_header(skb);
358                 if (tunnel->collect_md) {
359                         __be16 flags;
360                         __be64 tun_id;
361
362                         flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
363                         tun_id = key32_to_tunnel_id(tpi->key);
364                         tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
365                         if (!tun_dst)
366                                 return PACKET_REJECT;
367                 }
368
369                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
370                 return PACKET_RCVD;
371         }
372         return PACKET_NEXT;
373
374 drop:
375         kfree_skb(skb);
376         return PACKET_RCVD;
377 }
378
379 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
380                      int hdr_len)
381 {
382         struct net *net = dev_net(skb->dev);
383         struct ip_tunnel_net *itn;
384         int res;
385
386         if (tpi->proto == htons(ETH_P_TEB))
387                 itn = net_generic(net, gre_tap_net_id);
388         else
389                 itn = net_generic(net, ipgre_net_id);
390
391         res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
392         if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
393                 /* ipgre tunnels in collect metadata mode should receive
394                  * also ETH_P_TEB traffic.
395                  */
396                 itn = net_generic(net, ipgre_net_id);
397                 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
398         }
399         return res;
400 }
401
402 static int gre_rcv(struct sk_buff *skb)
403 {
404         struct tnl_ptk_info tpi;
405         bool csum_err = false;
406         int hdr_len;
407
408 #ifdef CONFIG_NET_IPGRE_BROADCAST
409         if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
410                 /* Looped back packet, drop it! */
411                 if (rt_is_output_route(skb_rtable(skb)))
412                         goto drop;
413         }
414 #endif
415
416         hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
417         if (hdr_len < 0)
418                 goto drop;
419
420         if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
421                      tpi.proto == htons(ETH_P_ERSPAN2))) {
422                 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
423                         return 0;
424                 goto out;
425         }
426
427         if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
428                 return 0;
429
430 out:
431         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
432 drop:
433         kfree_skb(skb);
434         return 0;
435 }
436
437 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
438                        const struct iphdr *tnl_params,
439                        __be16 proto)
440 {
441         struct ip_tunnel *tunnel = netdev_priv(dev);
442
443         if (tunnel->parms.o_flags & TUNNEL_SEQ)
444                 tunnel->o_seqno++;
445
446         /* Push GRE header. */
447         gre_build_header(skb, tunnel->tun_hlen,
448                          tunnel->parms.o_flags, proto, tunnel->parms.o_key,
449                          htonl(tunnel->o_seqno));
450
451         ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
452 }
453
454 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
455 {
456         return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
457 }
458
459 static struct rtable *gre_get_rt(struct sk_buff *skb,
460                                  struct net_device *dev,
461                                  struct flowi4 *fl,
462                                  const struct ip_tunnel_key *key)
463 {
464         struct net *net = dev_net(dev);
465
466         memset(fl, 0, sizeof(*fl));
467         fl->daddr = key->u.ipv4.dst;
468         fl->saddr = key->u.ipv4.src;
469         fl->flowi4_tos = RT_TOS(key->tos);
470         fl->flowi4_mark = skb->mark;
471         fl->flowi4_proto = IPPROTO_GRE;
472
473         return ip_route_output_key(net, fl);
474 }
475
476 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
477                                       struct net_device *dev,
478                                       struct flowi4 *fl,
479                                       int tunnel_hlen)
480 {
481         struct ip_tunnel_info *tun_info;
482         const struct ip_tunnel_key *key;
483         struct rtable *rt = NULL;
484         int min_headroom;
485         bool use_cache;
486         int err;
487
488         tun_info = skb_tunnel_info(skb);
489         key = &tun_info->key;
490         use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
491
492         if (use_cache)
493                 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
494         if (!rt) {
495                 rt = gre_get_rt(skb, dev, fl, key);
496                 if (IS_ERR(rt))
497                         goto err_free_skb;
498                 if (use_cache)
499                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
500                                           fl->saddr);
501         }
502
503         min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
504                         + tunnel_hlen + sizeof(struct iphdr);
505         if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
506                 int head_delta = SKB_DATA_ALIGN(min_headroom -
507                                                 skb_headroom(skb) +
508                                                 16);
509                 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
510                                        0, GFP_ATOMIC);
511                 if (unlikely(err))
512                         goto err_free_rt;
513         }
514         return rt;
515
516 err_free_rt:
517         ip_rt_put(rt);
518 err_free_skb:
519         kfree_skb(skb);
520         dev->stats.tx_dropped++;
521         return NULL;
522 }
523
524 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
525                         __be16 proto)
526 {
527         struct ip_tunnel *tunnel = netdev_priv(dev);
528         struct ip_tunnel_info *tun_info;
529         const struct ip_tunnel_key *key;
530         struct rtable *rt = NULL;
531         struct flowi4 fl;
532         int tunnel_hlen;
533         __be16 df, flags;
534
535         tun_info = skb_tunnel_info(skb);
536         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
537                      ip_tunnel_info_af(tun_info) != AF_INET))
538                 goto err_free_skb;
539
540         key = &tun_info->key;
541         tunnel_hlen = gre_calc_hlen(key->tun_flags);
542
543         rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
544         if (!rt)
545                 return;
546
547         /* Push Tunnel header. */
548         if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
549                 goto err_free_rt;
550
551         flags = tun_info->key.tun_flags &
552                 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
553         gre_build_header(skb, tunnel_hlen, flags, proto,
554                          tunnel_id_to_key32(tun_info->key.tun_id),
555                          (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
556
557         df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
558
559         iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
560                       key->tos, key->ttl, df, false);
561         return;
562
563 err_free_rt:
564         ip_rt_put(rt);
565 err_free_skb:
566         kfree_skb(skb);
567         dev->stats.tx_dropped++;
568 }
569
570 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
571                            __be16 proto)
572 {
573         struct ip_tunnel *tunnel = netdev_priv(dev);
574         struct ip_tunnel_info *tun_info;
575         const struct ip_tunnel_key *key;
576         struct erspan_metadata *md;
577         struct rtable *rt = NULL;
578         bool truncate = false;
579         struct flowi4 fl;
580         int tunnel_hlen;
581         int version;
582         __be16 df;
583         int nhoff;
584         int thoff;
585
586         tun_info = skb_tunnel_info(skb);
587         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
588                      ip_tunnel_info_af(tun_info) != AF_INET))
589                 goto err_free_skb;
590
591         key = &tun_info->key;
592         if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
593                 goto err_free_rt;
594         md = ip_tunnel_info_opts(tun_info);
595         if (!md)
596                 goto err_free_rt;
597
598         /* ERSPAN has fixed 8 byte GRE header */
599         version = md->version;
600         tunnel_hlen = 8 + erspan_hdr_len(version);
601
602         rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
603         if (!rt)
604                 return;
605
606         if (gre_handle_offloads(skb, false))
607                 goto err_free_rt;
608
609         if (skb->len > dev->mtu + dev->hard_header_len) {
610                 pskb_trim(skb, dev->mtu + dev->hard_header_len);
611                 truncate = true;
612         }
613
614         nhoff = skb_network_header(skb) - skb_mac_header(skb);
615         if (skb->protocol == htons(ETH_P_IP) &&
616             (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
617                 truncate = true;
618
619         thoff = skb_transport_header(skb) - skb_mac_header(skb);
620         if (skb->protocol == htons(ETH_P_IPV6) &&
621             (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
622                 truncate = true;
623
624         if (version == 1) {
625                 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
626                                     ntohl(md->u.index), truncate, true);
627         } else if (version == 2) {
628                 erspan_build_header_v2(skb,
629                                        ntohl(tunnel_id_to_key32(key->tun_id)),
630                                        md->u.md2.dir,
631                                        get_hwid(&md->u.md2),
632                                        truncate, true);
633         } else {
634                 goto err_free_rt;
635         }
636
637         gre_build_header(skb, 8, TUNNEL_SEQ,
638                          htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
639
640         df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
641
642         iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
643                       key->tos, key->ttl, df, false);
644         return;
645
646 err_free_rt:
647         ip_rt_put(rt);
648 err_free_skb:
649         kfree_skb(skb);
650         dev->stats.tx_dropped++;
651 }
652
653 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
654 {
655         struct ip_tunnel_info *info = skb_tunnel_info(skb);
656         struct rtable *rt;
657         struct flowi4 fl4;
658
659         if (ip_tunnel_info_af(info) != AF_INET)
660                 return -EINVAL;
661
662         rt = gre_get_rt(skb, dev, &fl4, &info->key);
663         if (IS_ERR(rt))
664                 return PTR_ERR(rt);
665
666         ip_rt_put(rt);
667         info->key.u.ipv4.src = fl4.saddr;
668         return 0;
669 }
670
671 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
672                               struct net_device *dev)
673 {
674         struct ip_tunnel *tunnel = netdev_priv(dev);
675         const struct iphdr *tnl_params;
676
677         if (tunnel->collect_md) {
678                 gre_fb_xmit(skb, dev, skb->protocol);
679                 return NETDEV_TX_OK;
680         }
681
682         if (dev->header_ops) {
683                 /* Need space for new headers */
684                 if (skb_cow_head(skb, dev->needed_headroom -
685                                       (tunnel->hlen + sizeof(struct iphdr))))
686                         goto free_skb;
687
688                 tnl_params = (const struct iphdr *)skb->data;
689
690                 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
691                  * to gre header.
692                  */
693                 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
694                 skb_reset_mac_header(skb);
695         } else {
696                 if (skb_cow_head(skb, dev->needed_headroom))
697                         goto free_skb;
698
699                 tnl_params = &tunnel->parms.iph;
700         }
701
702         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
703                 goto free_skb;
704
705         __gre_xmit(skb, dev, tnl_params, skb->protocol);
706         return NETDEV_TX_OK;
707
708 free_skb:
709         kfree_skb(skb);
710         dev->stats.tx_dropped++;
711         return NETDEV_TX_OK;
712 }
713
714 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
715                                struct net_device *dev)
716 {
717         struct ip_tunnel *tunnel = netdev_priv(dev);
718         bool truncate = false;
719
720         if (tunnel->collect_md) {
721                 erspan_fb_xmit(skb, dev, skb->protocol);
722                 return NETDEV_TX_OK;
723         }
724
725         if (gre_handle_offloads(skb, false))
726                 goto free_skb;
727
728         if (skb_cow_head(skb, dev->needed_headroom))
729                 goto free_skb;
730
731         if (skb->len > dev->mtu + dev->hard_header_len) {
732                 pskb_trim(skb, dev->mtu + dev->hard_header_len);
733                 truncate = true;
734         }
735
736         /* Push ERSPAN header */
737         if (tunnel->erspan_ver == 1)
738                 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
739                                     tunnel->index,
740                                     truncate, true);
741         else if (tunnel->erspan_ver == 2)
742                 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
743                                        tunnel->dir, tunnel->hwid,
744                                        truncate, true);
745         else
746                 goto free_skb;
747
748         tunnel->parms.o_flags &= ~TUNNEL_KEY;
749         __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
750         return NETDEV_TX_OK;
751
752 free_skb:
753         kfree_skb(skb);
754         dev->stats.tx_dropped++;
755         return NETDEV_TX_OK;
756 }
757
758 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
759                                 struct net_device *dev)
760 {
761         struct ip_tunnel *tunnel = netdev_priv(dev);
762
763         if (tunnel->collect_md) {
764                 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
765                 return NETDEV_TX_OK;
766         }
767
768         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
769                 goto free_skb;
770
771         if (skb_cow_head(skb, dev->needed_headroom))
772                 goto free_skb;
773
774         __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
775         return NETDEV_TX_OK;
776
777 free_skb:
778         kfree_skb(skb);
779         dev->stats.tx_dropped++;
780         return NETDEV_TX_OK;
781 }
782
783 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
784 {
785         struct ip_tunnel *tunnel = netdev_priv(dev);
786         int len;
787
788         len = tunnel->tun_hlen;
789         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
790         len = tunnel->tun_hlen - len;
791         tunnel->hlen = tunnel->hlen + len;
792
793         dev->needed_headroom = dev->needed_headroom + len;
794         if (set_mtu)
795                 dev->mtu = max_t(int, dev->mtu - len, 68);
796
797         if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
798                 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
799                     tunnel->encap.type == TUNNEL_ENCAP_NONE) {
800                         dev->features |= NETIF_F_GSO_SOFTWARE;
801                         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
802                 } else {
803                         dev->features &= ~NETIF_F_GSO_SOFTWARE;
804                         dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
805                 }
806                 dev->features |= NETIF_F_LLTX;
807         } else {
808                 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
809                 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
810         }
811 }
812
813 static int ipgre_tunnel_ioctl(struct net_device *dev,
814                               struct ifreq *ifr, int cmd)
815 {
816         struct ip_tunnel_parm p;
817         int err;
818
819         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
820                 return -EFAULT;
821
822         if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
823                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
824                     p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
825                     ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
826                         return -EINVAL;
827         }
828
829         p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
830         p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
831
832         err = ip_tunnel_ioctl(dev, &p, cmd);
833         if (err)
834                 return err;
835
836         if (cmd == SIOCCHGTUNNEL) {
837                 struct ip_tunnel *t = netdev_priv(dev);
838
839                 t->parms.i_flags = p.i_flags;
840                 t->parms.o_flags = p.o_flags;
841
842                 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
843                         ipgre_link_update(dev, true);
844         }
845
846         p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
847         p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
848
849         if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
850                 return -EFAULT;
851
852         return 0;
853 }
854
855 /* Nice toy. Unfortunately, useless in real life :-)
856    It allows to construct virtual multiprotocol broadcast "LAN"
857    over the Internet, provided multicast routing is tuned.
858
859
860    I have no idea was this bicycle invented before me,
861    so that I had to set ARPHRD_IPGRE to a random value.
862    I have an impression, that Cisco could make something similar,
863    but this feature is apparently missing in IOS<=11.2(8).
864
865    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
866    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
867
868    ping -t 255 224.66.66.66
869
870    If nobody answers, mbone does not work.
871
872    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
873    ip addr add 10.66.66.<somewhat>/24 dev Universe
874    ifconfig Universe up
875    ifconfig Universe add fe80::<Your_real_addr>/10
876    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
877    ftp 10.66.66.66
878    ...
879    ftp fec0:6666:6666::193.233.7.65
880    ...
881  */
882 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
883                         unsigned short type,
884                         const void *daddr, const void *saddr, unsigned int len)
885 {
886         struct ip_tunnel *t = netdev_priv(dev);
887         struct iphdr *iph;
888         struct gre_base_hdr *greh;
889
890         iph = skb_push(skb, t->hlen + sizeof(*iph));
891         greh = (struct gre_base_hdr *)(iph+1);
892         greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
893         greh->protocol = htons(type);
894
895         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
896
897         /* Set the source hardware address. */
898         if (saddr)
899                 memcpy(&iph->saddr, saddr, 4);
900         if (daddr)
901                 memcpy(&iph->daddr, daddr, 4);
902         if (iph->daddr)
903                 return t->hlen + sizeof(*iph);
904
905         return -(t->hlen + sizeof(*iph));
906 }
907
908 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
909 {
910         const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
911         memcpy(haddr, &iph->saddr, 4);
912         return 4;
913 }
914
915 static const struct header_ops ipgre_header_ops = {
916         .create = ipgre_header,
917         .parse  = ipgre_header_parse,
918 };
919
920 #ifdef CONFIG_NET_IPGRE_BROADCAST
921 static int ipgre_open(struct net_device *dev)
922 {
923         struct ip_tunnel *t = netdev_priv(dev);
924
925         if (ipv4_is_multicast(t->parms.iph.daddr)) {
926                 struct flowi4 fl4;
927                 struct rtable *rt;
928
929                 rt = ip_route_output_gre(t->net, &fl4,
930                                          t->parms.iph.daddr,
931                                          t->parms.iph.saddr,
932                                          t->parms.o_key,
933                                          RT_TOS(t->parms.iph.tos),
934                                          t->parms.link);
935                 if (IS_ERR(rt))
936                         return -EADDRNOTAVAIL;
937                 dev = rt->dst.dev;
938                 ip_rt_put(rt);
939                 if (!__in_dev_get_rtnl(dev))
940                         return -EADDRNOTAVAIL;
941                 t->mlink = dev->ifindex;
942                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
943         }
944         return 0;
945 }
946
947 static int ipgre_close(struct net_device *dev)
948 {
949         struct ip_tunnel *t = netdev_priv(dev);
950
951         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
952                 struct in_device *in_dev;
953                 in_dev = inetdev_by_index(t->net, t->mlink);
954                 if (in_dev)
955                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
956         }
957         return 0;
958 }
959 #endif
960
961 static const struct net_device_ops ipgre_netdev_ops = {
962         .ndo_init               = ipgre_tunnel_init,
963         .ndo_uninit             = ip_tunnel_uninit,
964 #ifdef CONFIG_NET_IPGRE_BROADCAST
965         .ndo_open               = ipgre_open,
966         .ndo_stop               = ipgre_close,
967 #endif
968         .ndo_start_xmit         = ipgre_xmit,
969         .ndo_do_ioctl           = ipgre_tunnel_ioctl,
970         .ndo_change_mtu         = ip_tunnel_change_mtu,
971         .ndo_get_stats64        = ip_tunnel_get_stats64,
972         .ndo_get_iflink         = ip_tunnel_get_iflink,
973 };
974
975 #define GRE_FEATURES (NETIF_F_SG |              \
976                       NETIF_F_FRAGLIST |        \
977                       NETIF_F_HIGHDMA |         \
978                       NETIF_F_HW_CSUM)
979
980 static void ipgre_tunnel_setup(struct net_device *dev)
981 {
982         dev->netdev_ops         = &ipgre_netdev_ops;
983         dev->type               = ARPHRD_IPGRE;
984         ip_tunnel_setup(dev, ipgre_net_id);
985 }
986
987 static void __gre_tunnel_init(struct net_device *dev)
988 {
989         struct ip_tunnel *tunnel;
990
991         tunnel = netdev_priv(dev);
992         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
993         tunnel->parms.iph.protocol = IPPROTO_GRE;
994
995         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
996
997         dev->features           |= GRE_FEATURES;
998         dev->hw_features        |= GRE_FEATURES;
999
1000         if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
1001                 /* TCP offload with GRE SEQ is not supported, nor
1002                  * can we support 2 levels of outer headers requiring
1003                  * an update.
1004                  */
1005                 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
1006                     (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
1007                         dev->features    |= NETIF_F_GSO_SOFTWARE;
1008                         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1009                 }
1010
1011                 /* Can use a lockless transmit, unless we generate
1012                  * output sequences
1013                  */
1014                 dev->features |= NETIF_F_LLTX;
1015         }
1016 }
1017
1018 static int ipgre_tunnel_init(struct net_device *dev)
1019 {
1020         struct ip_tunnel *tunnel = netdev_priv(dev);
1021         struct iphdr *iph = &tunnel->parms.iph;
1022
1023         __gre_tunnel_init(dev);
1024
1025         memcpy(dev->dev_addr, &iph->saddr, 4);
1026         memcpy(dev->broadcast, &iph->daddr, 4);
1027
1028         dev->flags              = IFF_NOARP;
1029         netif_keep_dst(dev);
1030         dev->addr_len           = 4;
1031
1032         if (iph->daddr && !tunnel->collect_md) {
1033 #ifdef CONFIG_NET_IPGRE_BROADCAST
1034                 if (ipv4_is_multicast(iph->daddr)) {
1035                         if (!iph->saddr)
1036                                 return -EINVAL;
1037                         dev->flags = IFF_BROADCAST;
1038                         dev->header_ops = &ipgre_header_ops;
1039                 }
1040 #endif
1041         } else if (!tunnel->collect_md) {
1042                 dev->header_ops = &ipgre_header_ops;
1043         }
1044
1045         return ip_tunnel_init(dev);
1046 }
1047
1048 static const struct gre_protocol ipgre_protocol = {
1049         .handler     = gre_rcv,
1050         .err_handler = gre_err,
1051 };
1052
1053 static int __net_init ipgre_init_net(struct net *net)
1054 {
1055         return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1056 }
1057
1058 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1059 {
1060         ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1061 }
1062
1063 static struct pernet_operations ipgre_net_ops = {
1064         .init = ipgre_init_net,
1065         .exit_batch = ipgre_exit_batch_net,
1066         .id   = &ipgre_net_id,
1067         .size = sizeof(struct ip_tunnel_net),
1068 };
1069
1070 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1071                                  struct netlink_ext_ack *extack)
1072 {
1073         __be16 flags;
1074
1075         if (!data)
1076                 return 0;
1077
1078         flags = 0;
1079         if (data[IFLA_GRE_IFLAGS])
1080                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1081         if (data[IFLA_GRE_OFLAGS])
1082                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1083         if (flags & (GRE_VERSION|GRE_ROUTING))
1084                 return -EINVAL;
1085
1086         if (data[IFLA_GRE_COLLECT_METADATA] &&
1087             data[IFLA_GRE_ENCAP_TYPE] &&
1088             nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1089                 return -EINVAL;
1090
1091         return 0;
1092 }
1093
1094 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1095                               struct netlink_ext_ack *extack)
1096 {
1097         __be32 daddr;
1098
1099         if (tb[IFLA_ADDRESS]) {
1100                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1101                         return -EINVAL;
1102                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1103                         return -EADDRNOTAVAIL;
1104         }
1105
1106         if (!data)
1107                 goto out;
1108
1109         if (data[IFLA_GRE_REMOTE]) {
1110                 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1111                 if (!daddr)
1112                         return -EINVAL;
1113         }
1114
1115 out:
1116         return ipgre_tunnel_validate(tb, data, extack);
1117 }
1118
1119 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1120                            struct netlink_ext_ack *extack)
1121 {
1122         __be16 flags = 0;
1123         int ret;
1124
1125         if (!data)
1126                 return 0;
1127
1128         ret = ipgre_tap_validate(tb, data, extack);
1129         if (ret)
1130                 return ret;
1131
1132         /* ERSPAN should only have GRE sequence and key flag */
1133         if (data[IFLA_GRE_OFLAGS])
1134                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1135         if (data[IFLA_GRE_IFLAGS])
1136                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1137         if (!data[IFLA_GRE_COLLECT_METADATA] &&
1138             flags != (GRE_SEQ | GRE_KEY))
1139                 return -EINVAL;
1140
1141         /* ERSPAN Session ID only has 10-bit. Since we reuse
1142          * 32-bit key field as ID, check it's range.
1143          */
1144         if (data[IFLA_GRE_IKEY] &&
1145             (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1146                 return -EINVAL;
1147
1148         if (data[IFLA_GRE_OKEY] &&
1149             (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1150                 return -EINVAL;
1151
1152         return 0;
1153 }
1154
1155 static int ipgre_netlink_parms(struct net_device *dev,
1156                                 struct nlattr *data[],
1157                                 struct nlattr *tb[],
1158                                 struct ip_tunnel_parm *parms,
1159                                 __u32 *fwmark)
1160 {
1161         struct ip_tunnel *t = netdev_priv(dev);
1162
1163         memset(parms, 0, sizeof(*parms));
1164
1165         parms->iph.protocol = IPPROTO_GRE;
1166
1167         if (!data)
1168                 return 0;
1169
1170         if (data[IFLA_GRE_LINK])
1171                 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1172
1173         if (data[IFLA_GRE_IFLAGS])
1174                 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1175
1176         if (data[IFLA_GRE_OFLAGS])
1177                 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1178
1179         if (data[IFLA_GRE_IKEY])
1180                 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1181
1182         if (data[IFLA_GRE_OKEY])
1183                 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1184
1185         if (data[IFLA_GRE_LOCAL])
1186                 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1187
1188         if (data[IFLA_GRE_REMOTE])
1189                 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1190
1191         if (data[IFLA_GRE_TTL])
1192                 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1193
1194         if (data[IFLA_GRE_TOS])
1195                 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1196
1197         if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1198                 if (t->ignore_df)
1199                         return -EINVAL;
1200                 parms->iph.frag_off = htons(IP_DF);
1201         }
1202
1203         if (data[IFLA_GRE_COLLECT_METADATA]) {
1204                 t->collect_md = true;
1205                 if (dev->type == ARPHRD_IPGRE)
1206                         dev->type = ARPHRD_NONE;
1207         }
1208
1209         if (data[IFLA_GRE_IGNORE_DF]) {
1210                 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1211                   && (parms->iph.frag_off & htons(IP_DF)))
1212                         return -EINVAL;
1213                 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1214         }
1215
1216         if (data[IFLA_GRE_FWMARK])
1217                 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1218
1219         if (data[IFLA_GRE_ERSPAN_VER]) {
1220                 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1221
1222                 if (t->erspan_ver != 1 && t->erspan_ver != 2)
1223                         return -EINVAL;
1224         }
1225
1226         if (t->erspan_ver == 1) {
1227                 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1228                         t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1229                         if (t->index & ~INDEX_MASK)
1230                                 return -EINVAL;
1231                 }
1232         } else if (t->erspan_ver == 2) {
1233                 if (data[IFLA_GRE_ERSPAN_DIR]) {
1234                         t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1235                         if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1236                                 return -EINVAL;
1237                 }
1238                 if (data[IFLA_GRE_ERSPAN_HWID]) {
1239                         t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1240                         if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1241                                 return -EINVAL;
1242                 }
1243         }
1244
1245         return 0;
1246 }
1247
1248 /* This function returns true when ENCAP attributes are present in the nl msg */
1249 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1250                                       struct ip_tunnel_encap *ipencap)
1251 {
1252         bool ret = false;
1253
1254         memset(ipencap, 0, sizeof(*ipencap));
1255
1256         if (!data)
1257                 return ret;
1258
1259         if (data[IFLA_GRE_ENCAP_TYPE]) {
1260                 ret = true;
1261                 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1262         }
1263
1264         if (data[IFLA_GRE_ENCAP_FLAGS]) {
1265                 ret = true;
1266                 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1267         }
1268
1269         if (data[IFLA_GRE_ENCAP_SPORT]) {
1270                 ret = true;
1271                 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1272         }
1273
1274         if (data[IFLA_GRE_ENCAP_DPORT]) {
1275                 ret = true;
1276                 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1277         }
1278
1279         return ret;
1280 }
1281
1282 static int gre_tap_init(struct net_device *dev)
1283 {
1284         __gre_tunnel_init(dev);
1285         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1286         netif_keep_dst(dev);
1287
1288         return ip_tunnel_init(dev);
1289 }
1290
1291 static const struct net_device_ops gre_tap_netdev_ops = {
1292         .ndo_init               = gre_tap_init,
1293         .ndo_uninit             = ip_tunnel_uninit,
1294         .ndo_start_xmit         = gre_tap_xmit,
1295         .ndo_set_mac_address    = eth_mac_addr,
1296         .ndo_validate_addr      = eth_validate_addr,
1297         .ndo_change_mtu         = ip_tunnel_change_mtu,
1298         .ndo_get_stats64        = ip_tunnel_get_stats64,
1299         .ndo_get_iflink         = ip_tunnel_get_iflink,
1300         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1301 };
1302
1303 static int erspan_tunnel_init(struct net_device *dev)
1304 {
1305         struct ip_tunnel *tunnel = netdev_priv(dev);
1306
1307         tunnel->tun_hlen = 8;
1308         tunnel->parms.iph.protocol = IPPROTO_GRE;
1309         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1310                        erspan_hdr_len(tunnel->erspan_ver);
1311
1312         dev->features           |= GRE_FEATURES;
1313         dev->hw_features        |= GRE_FEATURES;
1314         dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
1315         netif_keep_dst(dev);
1316
1317         return ip_tunnel_init(dev);
1318 }
1319
1320 static const struct net_device_ops erspan_netdev_ops = {
1321         .ndo_init               = erspan_tunnel_init,
1322         .ndo_uninit             = ip_tunnel_uninit,
1323         .ndo_start_xmit         = erspan_xmit,
1324         .ndo_set_mac_address    = eth_mac_addr,
1325         .ndo_validate_addr      = eth_validate_addr,
1326         .ndo_change_mtu         = ip_tunnel_change_mtu,
1327         .ndo_get_stats64        = ip_tunnel_get_stats64,
1328         .ndo_get_iflink         = ip_tunnel_get_iflink,
1329         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1330 };
1331
1332 static void ipgre_tap_setup(struct net_device *dev)
1333 {
1334         ether_setup(dev);
1335         dev->max_mtu = 0;
1336         dev->netdev_ops = &gre_tap_netdev_ops;
1337         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1338         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1339         ip_tunnel_setup(dev, gre_tap_net_id);
1340 }
1341
1342 bool is_gretap_dev(const struct net_device *dev)
1343 {
1344         return dev->netdev_ops == &gre_tap_netdev_ops;
1345 }
1346 EXPORT_SYMBOL_GPL(is_gretap_dev);
1347
1348 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1349                          struct nlattr *tb[], struct nlattr *data[],
1350                          struct netlink_ext_ack *extack)
1351 {
1352         struct ip_tunnel_parm p;
1353         struct ip_tunnel_encap ipencap;
1354         __u32 fwmark = 0;
1355         int err;
1356
1357         if (ipgre_netlink_encap_parms(data, &ipencap)) {
1358                 struct ip_tunnel *t = netdev_priv(dev);
1359                 err = ip_tunnel_encap_setup(t, &ipencap);
1360
1361                 if (err < 0)
1362                         return err;
1363         }
1364
1365         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1366         if (err < 0)
1367                 return err;
1368         return ip_tunnel_newlink(dev, tb, &p, fwmark);
1369 }
1370
1371 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1372                             struct nlattr *data[],
1373                             struct netlink_ext_ack *extack)
1374 {
1375         struct ip_tunnel *t = netdev_priv(dev);
1376         struct ip_tunnel_encap ipencap;
1377         __u32 fwmark = t->fwmark;
1378         struct ip_tunnel_parm p;
1379         int err;
1380
1381         if (ipgre_netlink_encap_parms(data, &ipencap)) {
1382                 err = ip_tunnel_encap_setup(t, &ipencap);
1383
1384                 if (err < 0)
1385                         return err;
1386         }
1387
1388         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1389         if (err < 0)
1390                 return err;
1391
1392         err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1393         if (err < 0)
1394                 return err;
1395
1396         t->parms.i_flags = p.i_flags;
1397         t->parms.o_flags = p.o_flags;
1398
1399         if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1400                 ipgre_link_update(dev, !tb[IFLA_MTU]);
1401
1402         return 0;
1403 }
1404
1405 static size_t ipgre_get_size(const struct net_device *dev)
1406 {
1407         return
1408                 /* IFLA_GRE_LINK */
1409                 nla_total_size(4) +
1410                 /* IFLA_GRE_IFLAGS */
1411                 nla_total_size(2) +
1412                 /* IFLA_GRE_OFLAGS */
1413                 nla_total_size(2) +
1414                 /* IFLA_GRE_IKEY */
1415                 nla_total_size(4) +
1416                 /* IFLA_GRE_OKEY */
1417                 nla_total_size(4) +
1418                 /* IFLA_GRE_LOCAL */
1419                 nla_total_size(4) +
1420                 /* IFLA_GRE_REMOTE */
1421                 nla_total_size(4) +
1422                 /* IFLA_GRE_TTL */
1423                 nla_total_size(1) +
1424                 /* IFLA_GRE_TOS */
1425                 nla_total_size(1) +
1426                 /* IFLA_GRE_PMTUDISC */
1427                 nla_total_size(1) +
1428                 /* IFLA_GRE_ENCAP_TYPE */
1429                 nla_total_size(2) +
1430                 /* IFLA_GRE_ENCAP_FLAGS */
1431                 nla_total_size(2) +
1432                 /* IFLA_GRE_ENCAP_SPORT */
1433                 nla_total_size(2) +
1434                 /* IFLA_GRE_ENCAP_DPORT */
1435                 nla_total_size(2) +
1436                 /* IFLA_GRE_COLLECT_METADATA */
1437                 nla_total_size(0) +
1438                 /* IFLA_GRE_IGNORE_DF */
1439                 nla_total_size(1) +
1440                 /* IFLA_GRE_FWMARK */
1441                 nla_total_size(4) +
1442                 /* IFLA_GRE_ERSPAN_INDEX */
1443                 nla_total_size(4) +
1444                 /* IFLA_GRE_ERSPAN_VER */
1445                 nla_total_size(1) +
1446                 /* IFLA_GRE_ERSPAN_DIR */
1447                 nla_total_size(1) +
1448                 /* IFLA_GRE_ERSPAN_HWID */
1449                 nla_total_size(2) +
1450                 0;
1451 }
1452
1453 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1454 {
1455         struct ip_tunnel *t = netdev_priv(dev);
1456         struct ip_tunnel_parm *p = &t->parms;
1457
1458         if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1459             nla_put_be16(skb, IFLA_GRE_IFLAGS,
1460                          gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1461             nla_put_be16(skb, IFLA_GRE_OFLAGS,
1462                          gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1463             nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1464             nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1465             nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1466             nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1467             nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1468             nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1469             nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1470                        !!(p->iph.frag_off & htons(IP_DF))) ||
1471             nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1472                 goto nla_put_failure;
1473
1474         if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1475                         t->encap.type) ||
1476             nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1477                          t->encap.sport) ||
1478             nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1479                          t->encap.dport) ||
1480             nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1481                         t->encap.flags))
1482                 goto nla_put_failure;
1483
1484         if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1485                 goto nla_put_failure;
1486
1487         if (t->collect_md) {
1488                 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1489                         goto nla_put_failure;
1490         }
1491
1492         if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1493                 goto nla_put_failure;
1494
1495         if (t->erspan_ver == 1) {
1496                 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1497                         goto nla_put_failure;
1498         } else if (t->erspan_ver == 2) {
1499                 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1500                         goto nla_put_failure;
1501                 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1502                         goto nla_put_failure;
1503         }
1504
1505         return 0;
1506
1507 nla_put_failure:
1508         return -EMSGSIZE;
1509 }
1510
1511 static void erspan_setup(struct net_device *dev)
1512 {
1513         struct ip_tunnel *t = netdev_priv(dev);
1514
1515         ether_setup(dev);
1516         dev->netdev_ops = &erspan_netdev_ops;
1517         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1518         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1519         ip_tunnel_setup(dev, erspan_net_id);
1520         t->erspan_ver = 1;
1521 }
1522
1523 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1524         [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1525         [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1526         [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1527         [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1528         [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1529         [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1530         [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1531         [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1532         [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1533         [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1534         [IFLA_GRE_ENCAP_TYPE]   = { .type = NLA_U16 },
1535         [IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
1536         [IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
1537         [IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
1538         [IFLA_GRE_COLLECT_METADATA]     = { .type = NLA_FLAG },
1539         [IFLA_GRE_IGNORE_DF]    = { .type = NLA_U8 },
1540         [IFLA_GRE_FWMARK]       = { .type = NLA_U32 },
1541         [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1542         [IFLA_GRE_ERSPAN_VER]   = { .type = NLA_U8 },
1543         [IFLA_GRE_ERSPAN_DIR]   = { .type = NLA_U8 },
1544         [IFLA_GRE_ERSPAN_HWID]  = { .type = NLA_U16 },
1545 };
1546
1547 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1548         .kind           = "gre",
1549         .maxtype        = IFLA_GRE_MAX,
1550         .policy         = ipgre_policy,
1551         .priv_size      = sizeof(struct ip_tunnel),
1552         .setup          = ipgre_tunnel_setup,
1553         .validate       = ipgre_tunnel_validate,
1554         .newlink        = ipgre_newlink,
1555         .changelink     = ipgre_changelink,
1556         .dellink        = ip_tunnel_dellink,
1557         .get_size       = ipgre_get_size,
1558         .fill_info      = ipgre_fill_info,
1559         .get_link_net   = ip_tunnel_get_link_net,
1560 };
1561
1562 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1563         .kind           = "gretap",
1564         .maxtype        = IFLA_GRE_MAX,
1565         .policy         = ipgre_policy,
1566         .priv_size      = sizeof(struct ip_tunnel),
1567         .setup          = ipgre_tap_setup,
1568         .validate       = ipgre_tap_validate,
1569         .newlink        = ipgre_newlink,
1570         .changelink     = ipgre_changelink,
1571         .dellink        = ip_tunnel_dellink,
1572         .get_size       = ipgre_get_size,
1573         .fill_info      = ipgre_fill_info,
1574         .get_link_net   = ip_tunnel_get_link_net,
1575 };
1576
1577 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1578         .kind           = "erspan",
1579         .maxtype        = IFLA_GRE_MAX,
1580         .policy         = ipgre_policy,
1581         .priv_size      = sizeof(struct ip_tunnel),
1582         .setup          = erspan_setup,
1583         .validate       = erspan_validate,
1584         .newlink        = ipgre_newlink,
1585         .changelink     = ipgre_changelink,
1586         .dellink        = ip_tunnel_dellink,
1587         .get_size       = ipgre_get_size,
1588         .fill_info      = ipgre_fill_info,
1589         .get_link_net   = ip_tunnel_get_link_net,
1590 };
1591
1592 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1593                                         u8 name_assign_type)
1594 {
1595         struct nlattr *tb[IFLA_MAX + 1];
1596         struct net_device *dev;
1597         LIST_HEAD(list_kill);
1598         struct ip_tunnel *t;
1599         int err;
1600
1601         memset(&tb, 0, sizeof(tb));
1602
1603         dev = rtnl_create_link(net, name, name_assign_type,
1604                                &ipgre_tap_ops, tb);
1605         if (IS_ERR(dev))
1606                 return dev;
1607
1608         /* Configure flow based GRE device. */
1609         t = netdev_priv(dev);
1610         t->collect_md = true;
1611
1612         err = ipgre_newlink(net, dev, tb, NULL, NULL);
1613         if (err < 0) {
1614                 free_netdev(dev);
1615                 return ERR_PTR(err);
1616         }
1617
1618         /* openvswitch users expect packet sizes to be unrestricted,
1619          * so set the largest MTU we can.
1620          */
1621         err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1622         if (err)
1623                 goto out;
1624
1625         err = rtnl_configure_link(dev, NULL);
1626         if (err < 0)
1627                 goto out;
1628
1629         return dev;
1630 out:
1631         ip_tunnel_dellink(dev, &list_kill);
1632         unregister_netdevice_many(&list_kill);
1633         return ERR_PTR(err);
1634 }
1635 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1636
1637 static int __net_init ipgre_tap_init_net(struct net *net)
1638 {
1639         return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1640 }
1641
1642 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1643 {
1644         ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1645 }
1646
1647 static struct pernet_operations ipgre_tap_net_ops = {
1648         .init = ipgre_tap_init_net,
1649         .exit_batch = ipgre_tap_exit_batch_net,
1650         .id   = &gre_tap_net_id,
1651         .size = sizeof(struct ip_tunnel_net),
1652 };
1653
1654 static int __net_init erspan_init_net(struct net *net)
1655 {
1656         return ip_tunnel_init_net(net, erspan_net_id,
1657                                   &erspan_link_ops, "erspan0");
1658 }
1659
1660 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1661 {
1662         ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1663 }
1664
1665 static struct pernet_operations erspan_net_ops = {
1666         .init = erspan_init_net,
1667         .exit_batch = erspan_exit_batch_net,
1668         .id   = &erspan_net_id,
1669         .size = sizeof(struct ip_tunnel_net),
1670 };
1671
1672 static int __init ipgre_init(void)
1673 {
1674         int err;
1675
1676         pr_info("GRE over IPv4 tunneling driver\n");
1677
1678         err = register_pernet_device(&ipgre_net_ops);
1679         if (err < 0)
1680                 return err;
1681
1682         err = register_pernet_device(&ipgre_tap_net_ops);
1683         if (err < 0)
1684                 goto pnet_tap_failed;
1685
1686         err = register_pernet_device(&erspan_net_ops);
1687         if (err < 0)
1688                 goto pnet_erspan_failed;
1689
1690         err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1691         if (err < 0) {
1692                 pr_info("%s: can't add protocol\n", __func__);
1693                 goto add_proto_failed;
1694         }
1695
1696         err = rtnl_link_register(&ipgre_link_ops);
1697         if (err < 0)
1698                 goto rtnl_link_failed;
1699
1700         err = rtnl_link_register(&ipgre_tap_ops);
1701         if (err < 0)
1702                 goto tap_ops_failed;
1703
1704         err = rtnl_link_register(&erspan_link_ops);
1705         if (err < 0)
1706                 goto erspan_link_failed;
1707
1708         return 0;
1709
1710 erspan_link_failed:
1711         rtnl_link_unregister(&ipgre_tap_ops);
1712 tap_ops_failed:
1713         rtnl_link_unregister(&ipgre_link_ops);
1714 rtnl_link_failed:
1715         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1716 add_proto_failed:
1717         unregister_pernet_device(&erspan_net_ops);
1718 pnet_erspan_failed:
1719         unregister_pernet_device(&ipgre_tap_net_ops);
1720 pnet_tap_failed:
1721         unregister_pernet_device(&ipgre_net_ops);
1722         return err;
1723 }
1724
1725 static void __exit ipgre_fini(void)
1726 {
1727         rtnl_link_unregister(&ipgre_tap_ops);
1728         rtnl_link_unregister(&ipgre_link_ops);
1729         rtnl_link_unregister(&erspan_link_ops);
1730         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1731         unregister_pernet_device(&ipgre_tap_net_ops);
1732         unregister_pernet_device(&ipgre_net_ops);
1733         unregister_pernet_device(&erspan_net_ops);
1734 }
1735
1736 module_init(ipgre_init);
1737 module_exit(ipgre_fini);
1738 MODULE_LICENSE("GPL");
1739 MODULE_ALIAS_RTNL_LINK("gre");
1740 MODULE_ALIAS_RTNL_LINK("gretap");
1741 MODULE_ALIAS_RTNL_LINK("erspan");
1742 MODULE_ALIAS_NETDEV("gre0");
1743 MODULE_ALIAS_NETDEV("gretap0");
1744 MODULE_ALIAS_NETDEV("erspan0");