]> asedeno.scripts.mit.edu Git - linux.git/blob - net/ipv6/af_inet6.c
55138f0d2b9d0c144a5a3abaa73e994aa7eb0975
[linux.git] / net / ipv6 / af_inet6.c
1 /*
2  *      PF_INET6 socket protocol family
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Adapted from linux/net/ipv4/af_inet.c
9  *
10  *      Fixes:
11  *      piggy, Karl Knutson     :       Socket protocol table
12  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
13  *      Arnaldo Melo            :       check proc_net_create return, cleanups
14  *
15  *      This program is free software; you can redistribute it and/or
16  *      modify it under the terms of the GNU General Public License
17  *      as published by the Free Software Foundation; either version
18  *      2 of the License, or (at your option) any later version.
19  */
20
21 #define pr_fmt(fmt) "IPv6: " fmt
22
23 #include <linux/module.h>
24 #include <linux/capability.h>
25 #include <linux/errno.h>
26 #include <linux/types.h>
27 #include <linux/socket.h>
28 #include <linux/in.h>
29 #include <linux/kernel.h>
30 #include <linux/timer.h>
31 #include <linux/string.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/fcntl.h>
35 #include <linux/mm.h>
36 #include <linux/interrupt.h>
37 #include <linux/proc_fs.h>
38 #include <linux/stat.h>
39 #include <linux/init.h>
40 #include <linux/slab.h>
41
42 #include <linux/inet.h>
43 #include <linux/netdevice.h>
44 #include <linux/icmpv6.h>
45 #include <linux/netfilter_ipv6.h>
46
47 #include <net/ip.h>
48 #include <net/ipv6.h>
49 #include <net/udp.h>
50 #include <net/udplite.h>
51 #include <net/tcp.h>
52 #include <net/ping.h>
53 #include <net/protocol.h>
54 #include <net/inet_common.h>
55 #include <net/route.h>
56 #include <net/transp_v6.h>
57 #include <net/ip6_route.h>
58 #include <net/addrconf.h>
59 #include <net/ipv6_stubs.h>
60 #include <net/ndisc.h>
61 #ifdef CONFIG_IPV6_TUNNEL
62 #include <net/ip6_tunnel.h>
63 #endif
64 #include <net/calipso.h>
65 #include <net/seg6.h>
66
67 #include <linux/uaccess.h>
68 #include <linux/mroute6.h>
69
70 #include "ip6_offload.h"
71
72 MODULE_AUTHOR("Cast of dozens");
73 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
74 MODULE_LICENSE("GPL");
75
76 /* The inetsw6 table contains everything that inet6_create needs to
77  * build a new socket.
78  */
79 static struct list_head inetsw6[SOCK_MAX];
80 static DEFINE_SPINLOCK(inetsw6_lock);
81
82 struct ipv6_params ipv6_defaults = {
83         .disable_ipv6 = 0,
84         .autoconf = 1,
85 };
86
87 static int disable_ipv6_mod;
88
89 module_param_named(disable, disable_ipv6_mod, int, 0444);
90 MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
91
92 module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444);
93 MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
94
95 module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
96 MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
97
98 bool ipv6_mod_enabled(void)
99 {
100         return disable_ipv6_mod == 0;
101 }
102 EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
103
104 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
105 {
106         const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
107
108         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
109 }
110
111 static int inet6_create(struct net *net, struct socket *sock, int protocol,
112                         int kern)
113 {
114         struct inet_sock *inet;
115         struct ipv6_pinfo *np;
116         struct sock *sk;
117         struct inet_protosw *answer;
118         struct proto *answer_prot;
119         unsigned char answer_flags;
120         int try_loading_module = 0;
121         int err;
122
123         if (protocol < 0 || protocol >= IPPROTO_MAX)
124                 return -EINVAL;
125
126         /* Look for the requested type/protocol pair. */
127 lookup_protocol:
128         err = -ESOCKTNOSUPPORT;
129         rcu_read_lock();
130         list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
131
132                 err = 0;
133                 /* Check the non-wild match. */
134                 if (protocol == answer->protocol) {
135                         if (protocol != IPPROTO_IP)
136                                 break;
137                 } else {
138                         /* Check for the two wild cases. */
139                         if (IPPROTO_IP == protocol) {
140                                 protocol = answer->protocol;
141                                 break;
142                         }
143                         if (IPPROTO_IP == answer->protocol)
144                                 break;
145                 }
146                 err = -EPROTONOSUPPORT;
147         }
148
149         if (err) {
150                 if (try_loading_module < 2) {
151                         rcu_read_unlock();
152                         /*
153                          * Be more specific, e.g. net-pf-10-proto-132-type-1
154                          * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM)
155                          */
156                         if (++try_loading_module == 1)
157                                 request_module("net-pf-%d-proto-%d-type-%d",
158                                                 PF_INET6, protocol, sock->type);
159                         /*
160                          * Fall back to generic, e.g. net-pf-10-proto-132
161                          * (net-pf-PF_INET6-proto-IPPROTO_SCTP)
162                          */
163                         else
164                                 request_module("net-pf-%d-proto-%d",
165                                                 PF_INET6, protocol);
166                         goto lookup_protocol;
167                 } else
168                         goto out_rcu_unlock;
169         }
170
171         err = -EPERM;
172         if (sock->type == SOCK_RAW && !kern &&
173             !ns_capable(net->user_ns, CAP_NET_RAW))
174                 goto out_rcu_unlock;
175
176         sock->ops = answer->ops;
177         answer_prot = answer->prot;
178         answer_flags = answer->flags;
179         rcu_read_unlock();
180
181         WARN_ON(!answer_prot->slab);
182
183         err = -ENOBUFS;
184         sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern);
185         if (!sk)
186                 goto out;
187
188         sock_init_data(sock, sk);
189
190         err = 0;
191         if (INET_PROTOSW_REUSE & answer_flags)
192                 sk->sk_reuse = SK_CAN_REUSE;
193
194         inet = inet_sk(sk);
195         inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
196
197         if (SOCK_RAW == sock->type) {
198                 inet->inet_num = protocol;
199                 if (IPPROTO_RAW == protocol)
200                         inet->hdrincl = 1;
201         }
202
203         sk->sk_destruct         = inet_sock_destruct;
204         sk->sk_family           = PF_INET6;
205         sk->sk_protocol         = protocol;
206
207         sk->sk_backlog_rcv      = answer->prot->backlog_rcv;
208
209         inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
210         np->hop_limit   = -1;
211         np->mcast_hops  = IPV6_DEFAULT_MCASTHOPS;
212         np->mc_loop     = 1;
213         np->mc_all      = 1;
214         np->pmtudisc    = IPV6_PMTUDISC_WANT;
215         np->repflow     = net->ipv6.sysctl.flowlabel_reflect;
216         sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
217
218         /* Init the ipv4 part of the socket since we can have sockets
219          * using v6 API for ipv4.
220          */
221         inet->uc_ttl    = -1;
222
223         inet->mc_loop   = 1;
224         inet->mc_ttl    = 1;
225         inet->mc_index  = 0;
226         inet->mc_list   = NULL;
227         inet->rcv_tos   = 0;
228
229         if (net->ipv4.sysctl_ip_no_pmtu_disc)
230                 inet->pmtudisc = IP_PMTUDISC_DONT;
231         else
232                 inet->pmtudisc = IP_PMTUDISC_WANT;
233         /*
234          * Increment only the relevant sk_prot->socks debug field, this changes
235          * the previous behaviour of incrementing both the equivalent to
236          * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
237          *
238          * This allows better debug granularity as we'll know exactly how many
239          * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6
240          * transport protocol socks. -acme
241          */
242         sk_refcnt_debug_inc(sk);
243
244         if (inet->inet_num) {
245                 /* It assumes that any protocol which allows
246                  * the user to assign a number at socket
247                  * creation time automatically shares.
248                  */
249                 inet->inet_sport = htons(inet->inet_num);
250                 err = sk->sk_prot->hash(sk);
251                 if (err) {
252                         sk_common_release(sk);
253                         goto out;
254                 }
255         }
256         if (sk->sk_prot->init) {
257                 err = sk->sk_prot->init(sk);
258                 if (err) {
259                         sk_common_release(sk);
260                         goto out;
261                 }
262         }
263
264         if (!kern) {
265                 err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
266                 if (err) {
267                         sk_common_release(sk);
268                         goto out;
269                 }
270         }
271 out:
272         return err;
273 out_rcu_unlock:
274         rcu_read_unlock();
275         goto out;
276 }
277
278 static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
279                         bool force_bind_address_no_port, bool with_lock)
280 {
281         struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
282         struct inet_sock *inet = inet_sk(sk);
283         struct ipv6_pinfo *np = inet6_sk(sk);
284         struct net *net = sock_net(sk);
285         __be32 v4addr = 0;
286         unsigned short snum;
287         bool saved_ipv6only;
288         int addr_type = 0;
289         int err = 0;
290
291         if (addr->sin6_family != AF_INET6)
292                 return -EAFNOSUPPORT;
293
294         addr_type = ipv6_addr_type(&addr->sin6_addr);
295         if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
296                 return -EINVAL;
297
298         snum = ntohs(addr->sin6_port);
299         if (snum && snum < inet_prot_sock(net) &&
300             !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
301                 return -EACCES;
302
303         if (with_lock)
304                 lock_sock(sk);
305
306         /* Check these errors (active socket, double bind). */
307         if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
308                 err = -EINVAL;
309                 goto out;
310         }
311
312         /* Check if the address belongs to the host. */
313         if (addr_type == IPV6_ADDR_MAPPED) {
314                 struct net_device *dev = NULL;
315                 int chk_addr_ret;
316
317                 /* Binding to v4-mapped address on a v6-only socket
318                  * makes no sense
319                  */
320                 if (sk->sk_ipv6only) {
321                         err = -EINVAL;
322                         goto out;
323                 }
324
325                 rcu_read_lock();
326                 if (sk->sk_bound_dev_if) {
327                         dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
328                         if (!dev) {
329                                 err = -ENODEV;
330                                 goto out_unlock;
331                         }
332                 }
333
334                 /* Reproduce AF_INET checks to make the bindings consistent */
335                 v4addr = addr->sin6_addr.s6_addr32[3];
336                 chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr);
337                 rcu_read_unlock();
338
339                 if (!inet_can_nonlocal_bind(net, inet) &&
340                     v4addr != htonl(INADDR_ANY) &&
341                     chk_addr_ret != RTN_LOCAL &&
342                     chk_addr_ret != RTN_MULTICAST &&
343                     chk_addr_ret != RTN_BROADCAST) {
344                         err = -EADDRNOTAVAIL;
345                         goto out;
346                 }
347         } else {
348                 if (addr_type != IPV6_ADDR_ANY) {
349                         struct net_device *dev = NULL;
350
351                         rcu_read_lock();
352                         if (__ipv6_addr_needs_scope_id(addr_type)) {
353                                 if (addr_len >= sizeof(struct sockaddr_in6) &&
354                                     addr->sin6_scope_id) {
355                                         /* Override any existing binding, if another one
356                                          * is supplied by user.
357                                          */
358                                         sk->sk_bound_dev_if = addr->sin6_scope_id;
359                                 }
360
361                                 /* Binding to link-local address requires an interface */
362                                 if (!sk->sk_bound_dev_if) {
363                                         err = -EINVAL;
364                                         goto out_unlock;
365                                 }
366                         }
367
368                         if (sk->sk_bound_dev_if) {
369                                 dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
370                                 if (!dev) {
371                                         err = -ENODEV;
372                                         goto out_unlock;
373                                 }
374                         }
375
376                         /* ipv4 addr of the socket is invalid.  Only the
377                          * unspecified and mapped address have a v4 equivalent.
378                          */
379                         v4addr = LOOPBACK4_IPV6;
380                         if (!(addr_type & IPV6_ADDR_MULTICAST)) {
381                                 if (!ipv6_can_nonlocal_bind(net, inet) &&
382                                     !ipv6_chk_addr(net, &addr->sin6_addr,
383                                                    dev, 0)) {
384                                         err = -EADDRNOTAVAIL;
385                                         goto out_unlock;
386                                 }
387                         }
388                         rcu_read_unlock();
389                 }
390         }
391
392         inet->inet_rcv_saddr = v4addr;
393         inet->inet_saddr = v4addr;
394
395         sk->sk_v6_rcv_saddr = addr->sin6_addr;
396
397         if (!(addr_type & IPV6_ADDR_MULTICAST))
398                 np->saddr = addr->sin6_addr;
399
400         saved_ipv6only = sk->sk_ipv6only;
401         if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
402                 sk->sk_ipv6only = 1;
403
404         /* Make sure we are allowed to bind here. */
405         if (snum || !(inet->bind_address_no_port ||
406                       force_bind_address_no_port)) {
407                 if (sk->sk_prot->get_port(sk, snum)) {
408                         sk->sk_ipv6only = saved_ipv6only;
409                         inet_reset_saddr(sk);
410                         err = -EADDRINUSE;
411                         goto out;
412                 }
413                 err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
414                 if (err) {
415                         sk->sk_ipv6only = saved_ipv6only;
416                         inet_reset_saddr(sk);
417                         goto out;
418                 }
419         }
420
421         if (addr_type != IPV6_ADDR_ANY)
422                 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
423         if (snum)
424                 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
425         inet->inet_sport = htons(inet->inet_num);
426         inet->inet_dport = 0;
427         inet->inet_daddr = 0;
428 out:
429         if (with_lock)
430                 release_sock(sk);
431         return err;
432 out_unlock:
433         rcu_read_unlock();
434         goto out;
435 }
436
437 /* bind for INET6 API */
438 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
439 {
440         struct sock *sk = sock->sk;
441         int err = 0;
442
443         /* If the socket has its own bind function then use it. */
444         if (sk->sk_prot->bind)
445                 return sk->sk_prot->bind(sk, uaddr, addr_len);
446
447         if (addr_len < SIN6_LEN_RFC2133)
448                 return -EINVAL;
449
450         /* BPF prog is run before any checks are done so that if the prog
451          * changes context in a wrong way it will be caught.
452          */
453         err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
454         if (err)
455                 return err;
456
457         return __inet6_bind(sk, uaddr, addr_len, false, true);
458 }
459 EXPORT_SYMBOL(inet6_bind);
460
461 int inet6_release(struct socket *sock)
462 {
463         struct sock *sk = sock->sk;
464
465         if (!sk)
466                 return -EINVAL;
467
468         /* Free mc lists */
469         ipv6_sock_mc_close(sk);
470
471         /* Free ac lists */
472         ipv6_sock_ac_close(sk);
473
474         return inet_release(sock);
475 }
476 EXPORT_SYMBOL(inet6_release);
477
478 void inet6_destroy_sock(struct sock *sk)
479 {
480         struct ipv6_pinfo *np = inet6_sk(sk);
481         struct sk_buff *skb;
482         struct ipv6_txoptions *opt;
483
484         /* Release rx options */
485
486         skb = xchg(&np->pktoptions, NULL);
487         kfree_skb(skb);
488
489         skb = xchg(&np->rxpmtu, NULL);
490         kfree_skb(skb);
491
492         /* Free flowlabels */
493         fl6_free_socklist(sk);
494
495         /* Free tx options */
496
497         opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
498         if (opt) {
499                 atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
500                 txopt_put(opt);
501         }
502 }
503 EXPORT_SYMBOL_GPL(inet6_destroy_sock);
504
505 /*
506  *      This does both peername and sockname.
507  */
508
509 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
510                  int peer)
511 {
512         struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
513         struct sock *sk = sock->sk;
514         struct inet_sock *inet = inet_sk(sk);
515         struct ipv6_pinfo *np = inet6_sk(sk);
516
517         sin->sin6_family = AF_INET6;
518         sin->sin6_flowinfo = 0;
519         sin->sin6_scope_id = 0;
520         if (peer) {
521                 if (!inet->inet_dport)
522                         return -ENOTCONN;
523                 if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
524                     peer == 1)
525                         return -ENOTCONN;
526                 sin->sin6_port = inet->inet_dport;
527                 sin->sin6_addr = sk->sk_v6_daddr;
528                 if (np->sndflow)
529                         sin->sin6_flowinfo = np->flow_label;
530         } else {
531                 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
532                         sin->sin6_addr = np->saddr;
533                 else
534                         sin->sin6_addr = sk->sk_v6_rcv_saddr;
535
536                 sin->sin6_port = inet->inet_sport;
537         }
538         sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
539                                                  sk->sk_bound_dev_if);
540         return sizeof(*sin);
541 }
542 EXPORT_SYMBOL(inet6_getname);
543
544 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
545 {
546         struct sock *sk = sock->sk;
547         struct net *net = sock_net(sk);
548
549         switch (cmd) {
550         case SIOCADDRT:
551         case SIOCDELRT:
552
553                 return ipv6_route_ioctl(net, cmd, (void __user *)arg);
554
555         case SIOCSIFADDR:
556                 return addrconf_add_ifaddr(net, (void __user *) arg);
557         case SIOCDIFADDR:
558                 return addrconf_del_ifaddr(net, (void __user *) arg);
559         case SIOCSIFDSTADDR:
560                 return addrconf_set_dstaddr(net, (void __user *) arg);
561         default:
562                 if (!sk->sk_prot->ioctl)
563                         return -ENOIOCTLCMD;
564                 return sk->sk_prot->ioctl(sk, cmd, arg);
565         }
566         /*NOTREACHED*/
567         return 0;
568 }
569 EXPORT_SYMBOL(inet6_ioctl);
570
571 const struct proto_ops inet6_stream_ops = {
572         .family            = PF_INET6,
573         .owner             = THIS_MODULE,
574         .release           = inet6_release,
575         .bind              = inet6_bind,
576         .connect           = inet_stream_connect,       /* ok           */
577         .socketpair        = sock_no_socketpair,        /* a do nothing */
578         .accept            = inet_accept,               /* ok           */
579         .getname           = inet6_getname,
580         .poll              = tcp_poll,                  /* ok           */
581         .ioctl             = inet6_ioctl,               /* must change  */
582         .gettstamp         = sock_gettstamp,
583         .listen            = inet_listen,               /* ok           */
584         .shutdown          = inet_shutdown,             /* ok           */
585         .setsockopt        = sock_common_setsockopt,    /* ok           */
586         .getsockopt        = sock_common_getsockopt,    /* ok           */
587         .sendmsg           = inet_sendmsg,              /* ok           */
588         .recvmsg           = inet_recvmsg,              /* ok           */
589 #ifdef CONFIG_MMU
590         .mmap              = tcp_mmap,
591 #endif
592         .sendpage          = inet_sendpage,
593         .sendmsg_locked    = tcp_sendmsg_locked,
594         .sendpage_locked   = tcp_sendpage_locked,
595         .splice_read       = tcp_splice_read,
596         .read_sock         = tcp_read_sock,
597         .peek_len          = tcp_peek_len,
598 #ifdef CONFIG_COMPAT
599         .compat_setsockopt = compat_sock_common_setsockopt,
600         .compat_getsockopt = compat_sock_common_getsockopt,
601 #endif
602         .set_rcvlowat      = tcp_set_rcvlowat,
603 };
604
605 const struct proto_ops inet6_dgram_ops = {
606         .family            = PF_INET6,
607         .owner             = THIS_MODULE,
608         .release           = inet6_release,
609         .bind              = inet6_bind,
610         .connect           = inet_dgram_connect,        /* ok           */
611         .socketpair        = sock_no_socketpair,        /* a do nothing */
612         .accept            = sock_no_accept,            /* a do nothing */
613         .getname           = inet6_getname,
614         .poll              = udp_poll,                  /* ok           */
615         .ioctl             = inet6_ioctl,               /* must change  */
616         .gettstamp         = sock_gettstamp,
617         .listen            = sock_no_listen,            /* ok           */
618         .shutdown          = inet_shutdown,             /* ok           */
619         .setsockopt        = sock_common_setsockopt,    /* ok           */
620         .getsockopt        = sock_common_getsockopt,    /* ok           */
621         .sendmsg           = inet_sendmsg,              /* ok           */
622         .recvmsg           = inet_recvmsg,              /* ok           */
623         .mmap              = sock_no_mmap,
624         .sendpage          = sock_no_sendpage,
625         .set_peek_off      = sk_set_peek_off,
626 #ifdef CONFIG_COMPAT
627         .compat_setsockopt = compat_sock_common_setsockopt,
628         .compat_getsockopt = compat_sock_common_getsockopt,
629 #endif
630 };
631
632 static const struct net_proto_family inet6_family_ops = {
633         .family = PF_INET6,
634         .create = inet6_create,
635         .owner  = THIS_MODULE,
636 };
637
638 int inet6_register_protosw(struct inet_protosw *p)
639 {
640         struct list_head *lh;
641         struct inet_protosw *answer;
642         struct list_head *last_perm;
643         int protocol = p->protocol;
644         int ret;
645
646         spin_lock_bh(&inetsw6_lock);
647
648         ret = -EINVAL;
649         if (p->type >= SOCK_MAX)
650                 goto out_illegal;
651
652         /* If we are trying to override a permanent protocol, bail. */
653         answer = NULL;
654         ret = -EPERM;
655         last_perm = &inetsw6[p->type];
656         list_for_each(lh, &inetsw6[p->type]) {
657                 answer = list_entry(lh, struct inet_protosw, list);
658
659                 /* Check only the non-wild match. */
660                 if (INET_PROTOSW_PERMANENT & answer->flags) {
661                         if (protocol == answer->protocol)
662                                 break;
663                         last_perm = lh;
664                 }
665
666                 answer = NULL;
667         }
668         if (answer)
669                 goto out_permanent;
670
671         /* Add the new entry after the last permanent entry if any, so that
672          * the new entry does not override a permanent entry when matched with
673          * a wild-card protocol. But it is allowed to override any existing
674          * non-permanent entry.  This means that when we remove this entry, the
675          * system automatically returns to the old behavior.
676          */
677         list_add_rcu(&p->list, last_perm);
678         ret = 0;
679 out:
680         spin_unlock_bh(&inetsw6_lock);
681         return ret;
682
683 out_permanent:
684         pr_err("Attempt to override permanent protocol %d\n", protocol);
685         goto out;
686
687 out_illegal:
688         pr_err("Ignoring attempt to register invalid socket type %d\n",
689                p->type);
690         goto out;
691 }
692 EXPORT_SYMBOL(inet6_register_protosw);
693
694 void
695 inet6_unregister_protosw(struct inet_protosw *p)
696 {
697         if (INET_PROTOSW_PERMANENT & p->flags) {
698                 pr_err("Attempt to unregister permanent protocol %d\n",
699                        p->protocol);
700         } else {
701                 spin_lock_bh(&inetsw6_lock);
702                 list_del_rcu(&p->list);
703                 spin_unlock_bh(&inetsw6_lock);
704
705                 synchronize_net();
706         }
707 }
708 EXPORT_SYMBOL(inet6_unregister_protosw);
709
710 int inet6_sk_rebuild_header(struct sock *sk)
711 {
712         struct ipv6_pinfo *np = inet6_sk(sk);
713         struct dst_entry *dst;
714
715         dst = __sk_dst_check(sk, np->dst_cookie);
716
717         if (!dst) {
718                 struct inet_sock *inet = inet_sk(sk);
719                 struct in6_addr *final_p, final;
720                 struct flowi6 fl6;
721
722                 memset(&fl6, 0, sizeof(fl6));
723                 fl6.flowi6_proto = sk->sk_protocol;
724                 fl6.daddr = sk->sk_v6_daddr;
725                 fl6.saddr = np->saddr;
726                 fl6.flowlabel = np->flow_label;
727                 fl6.flowi6_oif = sk->sk_bound_dev_if;
728                 fl6.flowi6_mark = sk->sk_mark;
729                 fl6.fl6_dport = inet->inet_dport;
730                 fl6.fl6_sport = inet->inet_sport;
731                 fl6.flowi6_uid = sk->sk_uid;
732                 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
733
734                 rcu_read_lock();
735                 final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
736                                          &final);
737                 rcu_read_unlock();
738
739                 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
740                 if (IS_ERR(dst)) {
741                         sk->sk_route_caps = 0;
742                         sk->sk_err_soft = -PTR_ERR(dst);
743                         return PTR_ERR(dst);
744                 }
745
746                 ip6_dst_store(sk, dst, NULL, NULL);
747         }
748
749         return 0;
750 }
751 EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
752
753 bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
754                        const struct inet6_skb_parm *opt)
755 {
756         const struct ipv6_pinfo *np = inet6_sk(sk);
757
758         if (np->rxopt.all) {
759                 if (((opt->flags & IP6SKB_HOPBYHOP) &&
760                      (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
761                     (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
762                      np->rxopt.bits.rxflow) ||
763                     (opt->srcrt && (np->rxopt.bits.srcrt ||
764                      np->rxopt.bits.osrcrt)) ||
765                     ((opt->dst1 || opt->dst0) &&
766                      (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
767                         return true;
768         }
769         return false;
770 }
771 EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
772
773 static struct packet_type ipv6_packet_type __read_mostly = {
774         .type = cpu_to_be16(ETH_P_IPV6),
775         .func = ipv6_rcv,
776         .list_func = ipv6_list_rcv,
777 };
778
779 static int __init ipv6_packet_init(void)
780 {
781         dev_add_pack(&ipv6_packet_type);
782         return 0;
783 }
784
785 static void ipv6_packet_cleanup(void)
786 {
787         dev_remove_pack(&ipv6_packet_type);
788 }
789
790 static int __net_init ipv6_init_mibs(struct net *net)
791 {
792         int i;
793
794         net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
795         if (!net->mib.udp_stats_in6)
796                 return -ENOMEM;
797         net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib);
798         if (!net->mib.udplite_stats_in6)
799                 goto err_udplite_mib;
800         net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
801         if (!net->mib.ipv6_statistics)
802                 goto err_ip_mib;
803
804         for_each_possible_cpu(i) {
805                 struct ipstats_mib *af_inet6_stats;
806                 af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
807                 u64_stats_init(&af_inet6_stats->syncp);
808         }
809
810
811         net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
812         if (!net->mib.icmpv6_statistics)
813                 goto err_icmp_mib;
814         net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
815                                                 GFP_KERNEL);
816         if (!net->mib.icmpv6msg_statistics)
817                 goto err_icmpmsg_mib;
818         return 0;
819
820 err_icmpmsg_mib:
821         free_percpu(net->mib.icmpv6_statistics);
822 err_icmp_mib:
823         free_percpu(net->mib.ipv6_statistics);
824 err_ip_mib:
825         free_percpu(net->mib.udplite_stats_in6);
826 err_udplite_mib:
827         free_percpu(net->mib.udp_stats_in6);
828         return -ENOMEM;
829 }
830
831 static void ipv6_cleanup_mibs(struct net *net)
832 {
833         free_percpu(net->mib.udp_stats_in6);
834         free_percpu(net->mib.udplite_stats_in6);
835         free_percpu(net->mib.ipv6_statistics);
836         free_percpu(net->mib.icmpv6_statistics);
837         kfree(net->mib.icmpv6msg_statistics);
838 }
839
840 static int __net_init inet6_net_init(struct net *net)
841 {
842         int err = 0;
843
844         net->ipv6.sysctl.bindv6only = 0;
845         net->ipv6.sysctl.icmpv6_time = 1*HZ;
846         net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
847         net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
848         net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
849
850         /* By default, rate limit error messages.
851          * Except for pmtu discovery, it would break it.
852          * proc_do_large_bitmap needs pointer to the bitmap.
853          */
854         bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1);
855         bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1);
856         net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask;
857
858         net->ipv6.sysctl.flowlabel_consistency = 1;
859         net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
860         net->ipv6.sysctl.idgen_retries = 3;
861         net->ipv6.sysctl.idgen_delay = 1 * HZ;
862         net->ipv6.sysctl.flowlabel_state_ranges = 0;
863         net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT;
864         net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
865         net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
866         net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
867         atomic_set(&net->ipv6.fib6_sernum, 1);
868
869         err = ipv6_init_mibs(net);
870         if (err)
871                 return err;
872 #ifdef CONFIG_PROC_FS
873         err = udp6_proc_init(net);
874         if (err)
875                 goto out;
876         err = tcp6_proc_init(net);
877         if (err)
878                 goto proc_tcp6_fail;
879         err = ac6_proc_init(net);
880         if (err)
881                 goto proc_ac6_fail;
882 #endif
883         return err;
884
885 #ifdef CONFIG_PROC_FS
886 proc_ac6_fail:
887         tcp6_proc_exit(net);
888 proc_tcp6_fail:
889         udp6_proc_exit(net);
890 out:
891         ipv6_cleanup_mibs(net);
892         return err;
893 #endif
894 }
895
896 static void __net_exit inet6_net_exit(struct net *net)
897 {
898 #ifdef CONFIG_PROC_FS
899         udp6_proc_exit(net);
900         tcp6_proc_exit(net);
901         ac6_proc_exit(net);
902 #endif
903         ipv6_cleanup_mibs(net);
904 }
905
906 static struct pernet_operations inet6_net_ops = {
907         .init = inet6_net_init,
908         .exit = inet6_net_exit,
909 };
910
911 static int ipv6_route_input(struct sk_buff *skb)
912 {
913         ip6_route_input(skb);
914         return skb_dst(skb)->error;
915 }
916
917 static const struct ipv6_stub ipv6_stub_impl = {
918         .ipv6_sock_mc_join = ipv6_sock_mc_join,
919         .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
920         .ipv6_dst_lookup   = ip6_dst_lookup,
921         .ipv6_route_input  = ipv6_route_input,
922         .fib6_get_table    = fib6_get_table,
923         .fib6_table_lookup = fib6_table_lookup,
924         .fib6_lookup       = fib6_lookup,
925         .fib6_select_path  = fib6_select_path,
926         .ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
927         .fib6_nh_init      = fib6_nh_init,
928         .fib6_nh_release   = fib6_nh_release,
929         .fib6_update_sernum = fib6_update_sernum_stub,
930         .ip6_del_rt        = ip6_del_rt,
931         .udpv6_encap_enable = udpv6_encap_enable,
932         .ndisc_send_na = ndisc_send_na,
933         .nd_tbl = &nd_tbl,
934 };
935
936 static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
937         .inet6_bind = __inet6_bind,
938         .udp6_lib_lookup = __udp6_lib_lookup,
939 };
940
941 static int __init inet6_init(void)
942 {
943         struct list_head *r;
944         int err = 0;
945
946         sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));
947
948         /* Register the socket-side information for inet6_create.  */
949         for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
950                 INIT_LIST_HEAD(r);
951
952         if (disable_ipv6_mod) {
953                 pr_info("Loaded, but administratively disabled, reboot required to enable\n");
954                 goto out;
955         }
956
957         err = proto_register(&tcpv6_prot, 1);
958         if (err)
959                 goto out;
960
961         err = proto_register(&udpv6_prot, 1);
962         if (err)
963                 goto out_unregister_tcp_proto;
964
965         err = proto_register(&udplitev6_prot, 1);
966         if (err)
967                 goto out_unregister_udp_proto;
968
969         err = proto_register(&rawv6_prot, 1);
970         if (err)
971                 goto out_unregister_udplite_proto;
972
973         err = proto_register(&pingv6_prot, 1);
974         if (err)
975                 goto out_unregister_raw_proto;
976
977         /* We MUST register RAW sockets before we create the ICMP6,
978          * IGMP6, or NDISC control sockets.
979          */
980         err = rawv6_init();
981         if (err)
982                 goto out_unregister_ping_proto;
983
984         /* Register the family here so that the init calls below will
985          * be able to create sockets. (?? is this dangerous ??)
986          */
987         err = sock_register(&inet6_family_ops);
988         if (err)
989                 goto out_sock_register_fail;
990
991         /*
992          *      ipngwg API draft makes clear that the correct semantics
993          *      for TCP and UDP is to consider one TCP and UDP instance
994          *      in a host available by both INET and INET6 APIs and
995          *      able to communicate via both network protocols.
996          */
997
998         err = register_pernet_subsys(&inet6_net_ops);
999         if (err)
1000                 goto register_pernet_fail;
1001         err = ip6_mr_init();
1002         if (err)
1003                 goto ipmr_fail;
1004         err = icmpv6_init();
1005         if (err)
1006                 goto icmp_fail;
1007         err = ndisc_init();
1008         if (err)
1009                 goto ndisc_fail;
1010         err = igmp6_init();
1011         if (err)
1012                 goto igmp_fail;
1013
1014         err = ipv6_netfilter_init();
1015         if (err)
1016                 goto netfilter_fail;
1017         /* Create /proc/foo6 entries. */
1018 #ifdef CONFIG_PROC_FS
1019         err = -ENOMEM;
1020         if (raw6_proc_init())
1021                 goto proc_raw6_fail;
1022         if (udplite6_proc_init())
1023                 goto proc_udplite6_fail;
1024         if (ipv6_misc_proc_init())
1025                 goto proc_misc6_fail;
1026         if (if6_proc_init())
1027                 goto proc_if6_fail;
1028 #endif
1029         err = ip6_route_init();
1030         if (err)
1031                 goto ip6_route_fail;
1032         err = ndisc_late_init();
1033         if (err)
1034                 goto ndisc_late_fail;
1035         err = ip6_flowlabel_init();
1036         if (err)
1037                 goto ip6_flowlabel_fail;
1038         err = ipv6_anycast_init();
1039         if (err)
1040                 goto ipv6_anycast_fail;
1041         err = addrconf_init();
1042         if (err)
1043                 goto addrconf_fail;
1044
1045         /* Init v6 extension headers. */
1046         err = ipv6_exthdrs_init();
1047         if (err)
1048                 goto ipv6_exthdrs_fail;
1049
1050         err = ipv6_frag_init();
1051         if (err)
1052                 goto ipv6_frag_fail;
1053
1054         /* Init v6 transport protocols. */
1055         err = udpv6_init();
1056         if (err)
1057                 goto udpv6_fail;
1058
1059         err = udplitev6_init();
1060         if (err)
1061                 goto udplitev6_fail;
1062
1063         err = udpv6_offload_init();
1064         if (err)
1065                 goto udpv6_offload_fail;
1066
1067         err = tcpv6_init();
1068         if (err)
1069                 goto tcpv6_fail;
1070
1071         err = ipv6_packet_init();
1072         if (err)
1073                 goto ipv6_packet_fail;
1074
1075         err = pingv6_init();
1076         if (err)
1077                 goto pingv6_fail;
1078
1079         err = calipso_init();
1080         if (err)
1081                 goto calipso_fail;
1082
1083         err = seg6_init();
1084         if (err)
1085                 goto seg6_fail;
1086
1087         err = igmp6_late_init();
1088         if (err)
1089                 goto igmp6_late_err;
1090
1091 #ifdef CONFIG_SYSCTL
1092         err = ipv6_sysctl_register();
1093         if (err)
1094                 goto sysctl_fail;
1095 #endif
1096
1097         /* ensure that ipv6 stubs are visible only after ipv6 is ready */
1098         wmb();
1099         ipv6_stub = &ipv6_stub_impl;
1100         ipv6_bpf_stub = &ipv6_bpf_stub_impl;
1101 out:
1102         return err;
1103
1104 #ifdef CONFIG_SYSCTL
1105 sysctl_fail:
1106         igmp6_late_cleanup();
1107 #endif
1108 igmp6_late_err:
1109         seg6_exit();
1110 seg6_fail:
1111         calipso_exit();
1112 calipso_fail:
1113         pingv6_exit();
1114 pingv6_fail:
1115         ipv6_packet_cleanup();
1116 ipv6_packet_fail:
1117         tcpv6_exit();
1118 tcpv6_fail:
1119         udpv6_offload_exit();
1120 udpv6_offload_fail:
1121         udplitev6_exit();
1122 udplitev6_fail:
1123         udpv6_exit();
1124 udpv6_fail:
1125         ipv6_frag_exit();
1126 ipv6_frag_fail:
1127         ipv6_exthdrs_exit();
1128 ipv6_exthdrs_fail:
1129         addrconf_cleanup();
1130 addrconf_fail:
1131         ipv6_anycast_cleanup();
1132 ipv6_anycast_fail:
1133         ip6_flowlabel_cleanup();
1134 ip6_flowlabel_fail:
1135         ndisc_late_cleanup();
1136 ndisc_late_fail:
1137         ip6_route_cleanup();
1138 ip6_route_fail:
1139 #ifdef CONFIG_PROC_FS
1140         if6_proc_exit();
1141 proc_if6_fail:
1142         ipv6_misc_proc_exit();
1143 proc_misc6_fail:
1144         udplite6_proc_exit();
1145 proc_udplite6_fail:
1146         raw6_proc_exit();
1147 proc_raw6_fail:
1148 #endif
1149         ipv6_netfilter_fini();
1150 netfilter_fail:
1151         igmp6_cleanup();
1152 igmp_fail:
1153         ndisc_cleanup();
1154 ndisc_fail:
1155         icmpv6_cleanup();
1156 icmp_fail:
1157         ip6_mr_cleanup();
1158 ipmr_fail:
1159         unregister_pernet_subsys(&inet6_net_ops);
1160 register_pernet_fail:
1161         sock_unregister(PF_INET6);
1162         rtnl_unregister_all(PF_INET6);
1163 out_sock_register_fail:
1164         rawv6_exit();
1165 out_unregister_ping_proto:
1166         proto_unregister(&pingv6_prot);
1167 out_unregister_raw_proto:
1168         proto_unregister(&rawv6_prot);
1169 out_unregister_udplite_proto:
1170         proto_unregister(&udplitev6_prot);
1171 out_unregister_udp_proto:
1172         proto_unregister(&udpv6_prot);
1173 out_unregister_tcp_proto:
1174         proto_unregister(&tcpv6_prot);
1175         goto out;
1176 }
1177 module_init(inet6_init);
1178
1179 MODULE_ALIAS_NETPROTO(PF_INET6);