]> asedeno.scripts.mit.edu Git - linux.git/blob - net/ipv6/route.c
Merge tag 'clang-format-for-linus-v5.1-rc5' of git://github.com/ojeda/linux
[linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
49 #include <net/snmp.h>
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
55 #include <net/tcp.h>
56 #include <linux/rtnetlink.h>
57 #include <net/dst.h>
58 #include <net/dst_metadata.h>
59 #include <net/xfrm.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
66 #include <net/ip.h>
67 #include <linux/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 static int ip6_rt_type_to_error(u8 fib6_type);
74
75 #define CREATE_TRACE_POINTS
76 #include <trace/events/fib6.h>
77 EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78 #undef CREATE_TRACE_POINTS
79
80 enum rt6_nud_state {
81         RT6_NUD_FAIL_HARD = -3,
82         RT6_NUD_FAIL_PROBE = -2,
83         RT6_NUD_FAIL_DO_RR = -1,
84         RT6_NUD_SUCCEED = 1
85 };
86
87 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
88 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
89 static unsigned int      ip6_mtu(const struct dst_entry *dst);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(struct dst_ops *ops);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
98 static int              ip6_pkt_prohibit(struct sk_buff *skb);
99 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
100 static void             ip6_link_failure(struct sk_buff *skb);
101 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102                                            struct sk_buff *skb, u32 mtu);
103 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104                                         struct sk_buff *skb);
105 static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106 static size_t rt6_nlmsg_size(struct fib6_info *rt);
107 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
108                          struct fib6_info *rt, struct dst_entry *dst,
109                          struct in6_addr *dest, struct in6_addr *src,
110                          int iif, int type, u32 portid, u32 seq,
111                          unsigned int flags);
112 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
113                                            struct in6_addr *daddr,
114                                            struct in6_addr *saddr);
115
116 #ifdef CONFIG_IPV6_ROUTE_INFO
117 static struct fib6_info *rt6_add_route_info(struct net *net,
118                                            const struct in6_addr *prefix, int prefixlen,
119                                            const struct in6_addr *gwaddr,
120                                            struct net_device *dev,
121                                            unsigned int pref);
122 static struct fib6_info *rt6_get_route_info(struct net *net,
123                                            const struct in6_addr *prefix, int prefixlen,
124                                            const struct in6_addr *gwaddr,
125                                            struct net_device *dev);
126 #endif
127
128 struct uncached_list {
129         spinlock_t              lock;
130         struct list_head        head;
131 };
132
133 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
135 void rt6_uncached_list_add(struct rt6_info *rt)
136 {
137         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
139         rt->rt6i_uncached_list = ul;
140
141         spin_lock_bh(&ul->lock);
142         list_add_tail(&rt->rt6i_uncached, &ul->head);
143         spin_unlock_bh(&ul->lock);
144 }
145
146 void rt6_uncached_list_del(struct rt6_info *rt)
147 {
148         if (!list_empty(&rt->rt6i_uncached)) {
149                 struct uncached_list *ul = rt->rt6i_uncached_list;
150                 struct net *net = dev_net(rt->dst.dev);
151
152                 spin_lock_bh(&ul->lock);
153                 list_del(&rt->rt6i_uncached);
154                 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
155                 spin_unlock_bh(&ul->lock);
156         }
157 }
158
159 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160 {
161         struct net_device *loopback_dev = net->loopback_dev;
162         int cpu;
163
164         if (dev == loopback_dev)
165                 return;
166
167         for_each_possible_cpu(cpu) {
168                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169                 struct rt6_info *rt;
170
171                 spin_lock_bh(&ul->lock);
172                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173                         struct inet6_dev *rt_idev = rt->rt6i_idev;
174                         struct net_device *rt_dev = rt->dst.dev;
175
176                         if (rt_idev->dev == dev) {
177                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
178                                 in6_dev_put(rt_idev);
179                         }
180
181                         if (rt_dev == dev) {
182                                 rt->dst.dev = loopback_dev;
183                                 dev_hold(rt->dst.dev);
184                                 dev_put(rt_dev);
185                         }
186                 }
187                 spin_unlock_bh(&ul->lock);
188         }
189 }
190
191 static inline const void *choose_neigh_daddr(const struct in6_addr *p,
192                                              struct sk_buff *skb,
193                                              const void *daddr)
194 {
195         if (!ipv6_addr_any(p))
196                 return (const void *) p;
197         else if (skb)
198                 return &ipv6_hdr(skb)->daddr;
199         return daddr;
200 }
201
202 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203                                    struct net_device *dev,
204                                    struct sk_buff *skb,
205                                    const void *daddr)
206 {
207         struct neighbour *n;
208
209         daddr = choose_neigh_daddr(gw, skb, daddr);
210         n = __ipv6_neigh_lookup(dev, daddr);
211         if (n)
212                 return n;
213
214         n = neigh_create(&nd_tbl, daddr, dev);
215         return IS_ERR(n) ? NULL : n;
216 }
217
218 static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
219                                               struct sk_buff *skb,
220                                               const void *daddr)
221 {
222         const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
223
224         return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
225 }
226
227 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
228 {
229         struct net_device *dev = dst->dev;
230         struct rt6_info *rt = (struct rt6_info *)dst;
231
232         daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
233         if (!daddr)
234                 return;
235         if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
236                 return;
237         if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
238                 return;
239         __ipv6_confirm_neigh(dev, daddr);
240 }
241
242 static struct dst_ops ip6_dst_ops_template = {
243         .family                 =       AF_INET6,
244         .gc                     =       ip6_dst_gc,
245         .gc_thresh              =       1024,
246         .check                  =       ip6_dst_check,
247         .default_advmss         =       ip6_default_advmss,
248         .mtu                    =       ip6_mtu,
249         .cow_metrics            =       dst_cow_metrics_generic,
250         .destroy                =       ip6_dst_destroy,
251         .ifdown                 =       ip6_dst_ifdown,
252         .negative_advice        =       ip6_negative_advice,
253         .link_failure           =       ip6_link_failure,
254         .update_pmtu            =       ip6_rt_update_pmtu,
255         .redirect               =       rt6_do_redirect,
256         .local_out              =       __ip6_local_out,
257         .neigh_lookup           =       ip6_dst_neigh_lookup,
258         .confirm_neigh          =       ip6_confirm_neigh,
259 };
260
261 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
262 {
263         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
264
265         return mtu ? : dst->dev->mtu;
266 }
267
268 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
269                                          struct sk_buff *skb, u32 mtu)
270 {
271 }
272
273 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
274                                       struct sk_buff *skb)
275 {
276 }
277
278 static struct dst_ops ip6_dst_blackhole_ops = {
279         .family                 =       AF_INET6,
280         .destroy                =       ip6_dst_destroy,
281         .check                  =       ip6_dst_check,
282         .mtu                    =       ip6_blackhole_mtu,
283         .default_advmss         =       ip6_default_advmss,
284         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
285         .redirect               =       ip6_rt_blackhole_redirect,
286         .cow_metrics            =       dst_cow_metrics_generic,
287         .neigh_lookup           =       ip6_dst_neigh_lookup,
288 };
289
290 static const u32 ip6_template_metrics[RTAX_MAX] = {
291         [RTAX_HOPLIMIT - 1] = 0,
292 };
293
294 static const struct fib6_info fib6_null_entry_template = {
295         .fib6_flags     = (RTF_REJECT | RTF_NONEXTHOP),
296         .fib6_protocol  = RTPROT_KERNEL,
297         .fib6_metric    = ~(u32)0,
298         .fib6_ref       = ATOMIC_INIT(1),
299         .fib6_type      = RTN_UNREACHABLE,
300         .fib6_metrics   = (struct dst_metrics *)&dst_default_metrics,
301 };
302
303 static const struct rt6_info ip6_null_entry_template = {
304         .dst = {
305                 .__refcnt       = ATOMIC_INIT(1),
306                 .__use          = 1,
307                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
308                 .error          = -ENETUNREACH,
309                 .input          = ip6_pkt_discard,
310                 .output         = ip6_pkt_discard_out,
311         },
312         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
313 };
314
315 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
316
317 static const struct rt6_info ip6_prohibit_entry_template = {
318         .dst = {
319                 .__refcnt       = ATOMIC_INIT(1),
320                 .__use          = 1,
321                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
322                 .error          = -EACCES,
323                 .input          = ip6_pkt_prohibit,
324                 .output         = ip6_pkt_prohibit_out,
325         },
326         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
327 };
328
329 static const struct rt6_info ip6_blk_hole_entry_template = {
330         .dst = {
331                 .__refcnt       = ATOMIC_INIT(1),
332                 .__use          = 1,
333                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
334                 .error          = -EINVAL,
335                 .input          = dst_discard,
336                 .output         = dst_discard_out,
337         },
338         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
339 };
340
341 #endif
342
343 static void rt6_info_init(struct rt6_info *rt)
344 {
345         struct dst_entry *dst = &rt->dst;
346
347         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
348         INIT_LIST_HEAD(&rt->rt6i_uncached);
349 }
350
351 /* allocate dst with ip6_dst_ops */
352 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
353                                int flags)
354 {
355         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356                                         1, DST_OBSOLETE_FORCE_CHK, flags);
357
358         if (rt) {
359                 rt6_info_init(rt);
360                 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
361         }
362
363         return rt;
364 }
365 EXPORT_SYMBOL(ip6_dst_alloc);
366
367 static void ip6_dst_destroy(struct dst_entry *dst)
368 {
369         struct rt6_info *rt = (struct rt6_info *)dst;
370         struct fib6_info *from;
371         struct inet6_dev *idev;
372
373         ip_dst_metrics_put(dst);
374         rt6_uncached_list_del(rt);
375
376         idev = rt->rt6i_idev;
377         if (idev) {
378                 rt->rt6i_idev = NULL;
379                 in6_dev_put(idev);
380         }
381
382         rcu_read_lock();
383         from = rcu_dereference(rt->from);
384         rcu_assign_pointer(rt->from, NULL);
385         fib6_info_release(from);
386         rcu_read_unlock();
387 }
388
389 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
390                            int how)
391 {
392         struct rt6_info *rt = (struct rt6_info *)dst;
393         struct inet6_dev *idev = rt->rt6i_idev;
394         struct net_device *loopback_dev =
395                 dev_net(dev)->loopback_dev;
396
397         if (idev && idev->dev != loopback_dev) {
398                 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
399                 if (loopback_idev) {
400                         rt->rt6i_idev = loopback_idev;
401                         in6_dev_put(idev);
402                 }
403         }
404 }
405
406 static bool __rt6_check_expired(const struct rt6_info *rt)
407 {
408         if (rt->rt6i_flags & RTF_EXPIRES)
409                 return time_after(jiffies, rt->dst.expires);
410         else
411                 return false;
412 }
413
414 static bool rt6_check_expired(const struct rt6_info *rt)
415 {
416         struct fib6_info *from;
417
418         from = rcu_dereference(rt->from);
419
420         if (rt->rt6i_flags & RTF_EXPIRES) {
421                 if (time_after(jiffies, rt->dst.expires))
422                         return true;
423         } else if (from) {
424                 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
425                         fib6_check_expired(from);
426         }
427         return false;
428 }
429
430 struct fib6_info *fib6_multipath_select(const struct net *net,
431                                         struct fib6_info *match,
432                                         struct flowi6 *fl6, int oif,
433                                         const struct sk_buff *skb,
434                                         int strict)
435 {
436         struct fib6_info *sibling, *next_sibling;
437
438         /* We might have already computed the hash for ICMPv6 errors. In such
439          * case it will always be non-zero. Otherwise now is the time to do it.
440          */
441         if (!fl6->mp_hash)
442                 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
443
444         if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
445                 return match;
446
447         list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
448                                  fib6_siblings) {
449                 int nh_upper_bound;
450
451                 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
452                 if (fl6->mp_hash > nh_upper_bound)
453                         continue;
454                 if (rt6_score_route(sibling, oif, strict) < 0)
455                         break;
456                 match = sibling;
457                 break;
458         }
459
460         return match;
461 }
462
463 /*
464  *      Route lookup. rcu_read_lock() should be held.
465  */
466
467 static inline struct fib6_info *rt6_device_match(struct net *net,
468                                                  struct fib6_info *rt,
469                                                     const struct in6_addr *saddr,
470                                                     int oif,
471                                                     int flags)
472 {
473         struct fib6_info *sprt;
474
475         if (!oif && ipv6_addr_any(saddr) &&
476             !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
477                 return rt;
478
479         for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
480                 const struct net_device *dev = sprt->fib6_nh.nh_dev;
481
482                 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
483                         continue;
484
485                 if (oif) {
486                         if (dev->ifindex == oif)
487                                 return sprt;
488                 } else {
489                         if (ipv6_chk_addr(net, saddr, dev,
490                                           flags & RT6_LOOKUP_F_IFACE))
491                                 return sprt;
492                 }
493         }
494
495         if (oif && flags & RT6_LOOKUP_F_IFACE)
496                 return net->ipv6.fib6_null_entry;
497
498         return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
499 }
500
501 #ifdef CONFIG_IPV6_ROUTER_PREF
502 struct __rt6_probe_work {
503         struct work_struct work;
504         struct in6_addr target;
505         struct net_device *dev;
506 };
507
508 static void rt6_probe_deferred(struct work_struct *w)
509 {
510         struct in6_addr mcaddr;
511         struct __rt6_probe_work *work =
512                 container_of(w, struct __rt6_probe_work, work);
513
514         addrconf_addr_solict_mult(&work->target, &mcaddr);
515         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
516         dev_put(work->dev);
517         kfree(work);
518 }
519
520 static void rt6_probe(struct fib6_info *rt)
521 {
522         struct __rt6_probe_work *work = NULL;
523         const struct in6_addr *nh_gw;
524         struct neighbour *neigh;
525         struct net_device *dev;
526         struct inet6_dev *idev;
527
528         /*
529          * Okay, this does not seem to be appropriate
530          * for now, however, we need to check if it
531          * is really so; aka Router Reachability Probing.
532          *
533          * Router Reachability Probe MUST be rate-limited
534          * to no more than one per minute.
535          */
536         if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
537                 return;
538
539         nh_gw = &rt->fib6_nh.nh_gw;
540         dev = rt->fib6_nh.nh_dev;
541         rcu_read_lock_bh();
542         idev = __in6_dev_get(dev);
543         neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
544         if (neigh) {
545                 if (neigh->nud_state & NUD_VALID)
546                         goto out;
547
548                 write_lock(&neigh->lock);
549                 if (!(neigh->nud_state & NUD_VALID) &&
550                     time_after(jiffies,
551                                neigh->updated + idev->cnf.rtr_probe_interval)) {
552                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
553                         if (work)
554                                 __neigh_set_probe_once(neigh);
555                 }
556                 write_unlock(&neigh->lock);
557         } else if (time_after(jiffies, rt->last_probe +
558                                        idev->cnf.rtr_probe_interval)) {
559                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
560         }
561
562         if (work) {
563                 rt->last_probe = jiffies;
564                 INIT_WORK(&work->work, rt6_probe_deferred);
565                 work->target = *nh_gw;
566                 dev_hold(dev);
567                 work->dev = dev;
568                 schedule_work(&work->work);
569         }
570
571 out:
572         rcu_read_unlock_bh();
573 }
574 #else
575 static inline void rt6_probe(struct fib6_info *rt)
576 {
577 }
578 #endif
579
580 /*
581  * Default Router Selection (RFC 2461 6.3.6)
582  */
583 static inline int rt6_check_dev(struct fib6_info *rt, int oif)
584 {
585         const struct net_device *dev = rt->fib6_nh.nh_dev;
586
587         if (!oif || dev->ifindex == oif)
588                 return 2;
589         return 0;
590 }
591
592 static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
593 {
594         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
595         struct neighbour *neigh;
596
597         if (rt->fib6_flags & RTF_NONEXTHOP ||
598             !(rt->fib6_flags & RTF_GATEWAY))
599                 return RT6_NUD_SUCCEED;
600
601         rcu_read_lock_bh();
602         neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
603                                           &rt->fib6_nh.nh_gw);
604         if (neigh) {
605                 read_lock(&neigh->lock);
606                 if (neigh->nud_state & NUD_VALID)
607                         ret = RT6_NUD_SUCCEED;
608 #ifdef CONFIG_IPV6_ROUTER_PREF
609                 else if (!(neigh->nud_state & NUD_FAILED))
610                         ret = RT6_NUD_SUCCEED;
611                 else
612                         ret = RT6_NUD_FAIL_PROBE;
613 #endif
614                 read_unlock(&neigh->lock);
615         } else {
616                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
617                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
618         }
619         rcu_read_unlock_bh();
620
621         return ret;
622 }
623
624 static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
625 {
626         int m;
627
628         m = rt6_check_dev(rt, oif);
629         if (!m && (strict & RT6_LOOKUP_F_IFACE))
630                 return RT6_NUD_FAIL_HARD;
631 #ifdef CONFIG_IPV6_ROUTER_PREF
632         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
633 #endif
634         if (strict & RT6_LOOKUP_F_REACHABLE) {
635                 int n = rt6_check_neigh(rt);
636                 if (n < 0)
637                         return n;
638         }
639         return m;
640 }
641
642 /* called with rc_read_lock held */
643 static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
644 {
645         const struct net_device *dev = fib6_info_nh_dev(f6i);
646         bool rc = false;
647
648         if (dev) {
649                 const struct inet6_dev *idev = __in6_dev_get(dev);
650
651                 rc = !!idev->cnf.ignore_routes_with_linkdown;
652         }
653
654         return rc;
655 }
656
657 static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
658                                    int *mpri, struct fib6_info *match,
659                                    bool *do_rr)
660 {
661         int m;
662         bool match_do_rr = false;
663
664         if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
665                 goto out;
666
667         if (fib6_ignore_linkdown(rt) &&
668             rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
669             !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
670                 goto out;
671
672         if (fib6_check_expired(rt))
673                 goto out;
674
675         m = rt6_score_route(rt, oif, strict);
676         if (m == RT6_NUD_FAIL_DO_RR) {
677                 match_do_rr = true;
678                 m = 0; /* lowest valid score */
679         } else if (m == RT6_NUD_FAIL_HARD) {
680                 goto out;
681         }
682
683         if (strict & RT6_LOOKUP_F_REACHABLE)
684                 rt6_probe(rt);
685
686         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
687         if (m > *mpri) {
688                 *do_rr = match_do_rr;
689                 *mpri = m;
690                 match = rt;
691         }
692 out:
693         return match;
694 }
695
696 static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
697                                      struct fib6_info *leaf,
698                                      struct fib6_info *rr_head,
699                                      u32 metric, int oif, int strict,
700                                      bool *do_rr)
701 {
702         struct fib6_info *rt, *match, *cont;
703         int mpri = -1;
704
705         match = NULL;
706         cont = NULL;
707         for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
708                 if (rt->fib6_metric != metric) {
709                         cont = rt;
710                         break;
711                 }
712
713                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
714         }
715
716         for (rt = leaf; rt && rt != rr_head;
717              rt = rcu_dereference(rt->fib6_next)) {
718                 if (rt->fib6_metric != metric) {
719                         cont = rt;
720                         break;
721                 }
722
723                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
724         }
725
726         if (match || !cont)
727                 return match;
728
729         for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
730                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
731
732         return match;
733 }
734
735 static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
736                                    int oif, int strict)
737 {
738         struct fib6_info *leaf = rcu_dereference(fn->leaf);
739         struct fib6_info *match, *rt0;
740         bool do_rr = false;
741         int key_plen;
742
743         if (!leaf || leaf == net->ipv6.fib6_null_entry)
744                 return net->ipv6.fib6_null_entry;
745
746         rt0 = rcu_dereference(fn->rr_ptr);
747         if (!rt0)
748                 rt0 = leaf;
749
750         /* Double check to make sure fn is not an intermediate node
751          * and fn->leaf does not points to its child's leaf
752          * (This might happen if all routes under fn are deleted from
753          * the tree and fib6_repair_tree() is called on the node.)
754          */
755         key_plen = rt0->fib6_dst.plen;
756 #ifdef CONFIG_IPV6_SUBTREES
757         if (rt0->fib6_src.plen)
758                 key_plen = rt0->fib6_src.plen;
759 #endif
760         if (fn->fn_bit != key_plen)
761                 return net->ipv6.fib6_null_entry;
762
763         match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
764                              &do_rr);
765
766         if (do_rr) {
767                 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
768
769                 /* no entries matched; do round-robin */
770                 if (!next || next->fib6_metric != rt0->fib6_metric)
771                         next = leaf;
772
773                 if (next != rt0) {
774                         spin_lock_bh(&leaf->fib6_table->tb6_lock);
775                         /* make sure next is not being deleted from the tree */
776                         if (next->fib6_node)
777                                 rcu_assign_pointer(fn->rr_ptr, next);
778                         spin_unlock_bh(&leaf->fib6_table->tb6_lock);
779                 }
780         }
781
782         return match ? match : net->ipv6.fib6_null_entry;
783 }
784
785 static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
786 {
787         return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
788 }
789
790 #ifdef CONFIG_IPV6_ROUTE_INFO
791 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
792                   const struct in6_addr *gwaddr)
793 {
794         struct net *net = dev_net(dev);
795         struct route_info *rinfo = (struct route_info *) opt;
796         struct in6_addr prefix_buf, *prefix;
797         unsigned int pref;
798         unsigned long lifetime;
799         struct fib6_info *rt;
800
801         if (len < sizeof(struct route_info)) {
802                 return -EINVAL;
803         }
804
805         /* Sanity check for prefix_len and length */
806         if (rinfo->length > 3) {
807                 return -EINVAL;
808         } else if (rinfo->prefix_len > 128) {
809                 return -EINVAL;
810         } else if (rinfo->prefix_len > 64) {
811                 if (rinfo->length < 2) {
812                         return -EINVAL;
813                 }
814         } else if (rinfo->prefix_len > 0) {
815                 if (rinfo->length < 1) {
816                         return -EINVAL;
817                 }
818         }
819
820         pref = rinfo->route_pref;
821         if (pref == ICMPV6_ROUTER_PREF_INVALID)
822                 return -EINVAL;
823
824         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
825
826         if (rinfo->length == 3)
827                 prefix = (struct in6_addr *)rinfo->prefix;
828         else {
829                 /* this function is safe */
830                 ipv6_addr_prefix(&prefix_buf,
831                                  (struct in6_addr *)rinfo->prefix,
832                                  rinfo->prefix_len);
833                 prefix = &prefix_buf;
834         }
835
836         if (rinfo->prefix_len == 0)
837                 rt = rt6_get_dflt_router(net, gwaddr, dev);
838         else
839                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
840                                         gwaddr, dev);
841
842         if (rt && !lifetime) {
843                 ip6_del_rt(net, rt);
844                 rt = NULL;
845         }
846
847         if (!rt && lifetime)
848                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
849                                         dev, pref);
850         else if (rt)
851                 rt->fib6_flags = RTF_ROUTEINFO |
852                                  (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
853
854         if (rt) {
855                 if (!addrconf_finite_timeout(lifetime))
856                         fib6_clean_expires(rt);
857                 else
858                         fib6_set_expires(rt, jiffies + HZ * lifetime);
859
860                 fib6_info_release(rt);
861         }
862         return 0;
863 }
864 #endif
865
866 /*
867  *      Misc support functions
868  */
869
870 /* called with rcu_lock held */
871 static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
872 {
873         struct net_device *dev = rt->fib6_nh.nh_dev;
874
875         if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
876                 /* for copies of local routes, dst->dev needs to be the
877                  * device if it is a master device, the master device if
878                  * device is enslaved, and the loopback as the default
879                  */
880                 if (netif_is_l3_slave(dev) &&
881                     !rt6_need_strict(&rt->fib6_dst.addr))
882                         dev = l3mdev_master_dev_rcu(dev);
883                 else if (!netif_is_l3_master(dev))
884                         dev = dev_net(dev)->loopback_dev;
885                 /* last case is netif_is_l3_master(dev) is true in which
886                  * case we want dev returned to be dev
887                  */
888         }
889
890         return dev;
891 }
892
893 static const int fib6_prop[RTN_MAX + 1] = {
894         [RTN_UNSPEC]    = 0,
895         [RTN_UNICAST]   = 0,
896         [RTN_LOCAL]     = 0,
897         [RTN_BROADCAST] = 0,
898         [RTN_ANYCAST]   = 0,
899         [RTN_MULTICAST] = 0,
900         [RTN_BLACKHOLE] = -EINVAL,
901         [RTN_UNREACHABLE] = -EHOSTUNREACH,
902         [RTN_PROHIBIT]  = -EACCES,
903         [RTN_THROW]     = -EAGAIN,
904         [RTN_NAT]       = -EINVAL,
905         [RTN_XRESOLVE]  = -EINVAL,
906 };
907
908 static int ip6_rt_type_to_error(u8 fib6_type)
909 {
910         return fib6_prop[fib6_type];
911 }
912
913 static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
914 {
915         unsigned short flags = 0;
916
917         if (rt->dst_nocount)
918                 flags |= DST_NOCOUNT;
919         if (rt->dst_nopolicy)
920                 flags |= DST_NOPOLICY;
921         if (rt->dst_host)
922                 flags |= DST_HOST;
923
924         return flags;
925 }
926
927 static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
928 {
929         rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
930
931         switch (ort->fib6_type) {
932         case RTN_BLACKHOLE:
933                 rt->dst.output = dst_discard_out;
934                 rt->dst.input = dst_discard;
935                 break;
936         case RTN_PROHIBIT:
937                 rt->dst.output = ip6_pkt_prohibit_out;
938                 rt->dst.input = ip6_pkt_prohibit;
939                 break;
940         case RTN_THROW:
941         case RTN_UNREACHABLE:
942         default:
943                 rt->dst.output = ip6_pkt_discard_out;
944                 rt->dst.input = ip6_pkt_discard;
945                 break;
946         }
947 }
948
949 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
950 {
951         if (ort->fib6_flags & RTF_REJECT) {
952                 ip6_rt_init_dst_reject(rt, ort);
953                 return;
954         }
955
956         rt->dst.error = 0;
957         rt->dst.output = ip6_output;
958
959         if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
960                 rt->dst.input = ip6_input;
961         } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
962                 rt->dst.input = ip6_mc_input;
963         } else {
964                 rt->dst.input = ip6_forward;
965         }
966
967         if (ort->fib6_nh.nh_lwtstate) {
968                 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
969                 lwtunnel_set_redirect(&rt->dst);
970         }
971
972         rt->dst.lastuse = jiffies;
973 }
974
975 /* Caller must already hold reference to @from */
976 static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
977 {
978         rt->rt6i_flags &= ~RTF_EXPIRES;
979         rcu_assign_pointer(rt->from, from);
980         ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
981 }
982
983 /* Caller must already hold reference to @ort */
984 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
985 {
986         struct net_device *dev = fib6_info_nh_dev(ort);
987
988         ip6_rt_init_dst(rt, ort);
989
990         rt->rt6i_dst = ort->fib6_dst;
991         rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
992         rt->rt6i_gateway = ort->fib6_nh.nh_gw;
993         rt->rt6i_flags = ort->fib6_flags;
994         rt6_set_from(rt, ort);
995 #ifdef CONFIG_IPV6_SUBTREES
996         rt->rt6i_src = ort->fib6_src;
997 #endif
998 }
999
1000 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1001                                         struct in6_addr *saddr)
1002 {
1003         struct fib6_node *pn, *sn;
1004         while (1) {
1005                 if (fn->fn_flags & RTN_TL_ROOT)
1006                         return NULL;
1007                 pn = rcu_dereference(fn->parent);
1008                 sn = FIB6_SUBTREE(pn);
1009                 if (sn && sn != fn)
1010                         fn = fib6_node_lookup(sn, NULL, saddr);
1011                 else
1012                         fn = pn;
1013                 if (fn->fn_flags & RTN_RTINFO)
1014                         return fn;
1015         }
1016 }
1017
1018 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1019                           bool null_fallback)
1020 {
1021         struct rt6_info *rt = *prt;
1022
1023         if (dst_hold_safe(&rt->dst))
1024                 return true;
1025         if (null_fallback) {
1026                 rt = net->ipv6.ip6_null_entry;
1027                 dst_hold(&rt->dst);
1028         } else {
1029                 rt = NULL;
1030         }
1031         *prt = rt;
1032         return false;
1033 }
1034
1035 /* called with rcu_lock held */
1036 static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1037 {
1038         unsigned short flags = fib6_info_dst_flags(rt);
1039         struct net_device *dev = rt->fib6_nh.nh_dev;
1040         struct rt6_info *nrt;
1041
1042         if (!fib6_info_hold_safe(rt))
1043                 goto fallback;
1044
1045         nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1046         if (!nrt) {
1047                 fib6_info_release(rt);
1048                 goto fallback;
1049         }
1050
1051         ip6_rt_copy_init(nrt, rt);
1052         return nrt;
1053
1054 fallback:
1055         nrt = dev_net(dev)->ipv6.ip6_null_entry;
1056         dst_hold(&nrt->dst);
1057         return nrt;
1058 }
1059
1060 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1061                                              struct fib6_table *table,
1062                                              struct flowi6 *fl6,
1063                                              const struct sk_buff *skb,
1064                                              int flags)
1065 {
1066         struct fib6_info *f6i;
1067         struct fib6_node *fn;
1068         struct rt6_info *rt;
1069
1070         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1071                 flags &= ~RT6_LOOKUP_F_IFACE;
1072
1073         rcu_read_lock();
1074         fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1075 restart:
1076         f6i = rcu_dereference(fn->leaf);
1077         if (!f6i) {
1078                 f6i = net->ipv6.fib6_null_entry;
1079         } else {
1080                 f6i = rt6_device_match(net, f6i, &fl6->saddr,
1081                                       fl6->flowi6_oif, flags);
1082                 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
1083                         f6i = fib6_multipath_select(net, f6i, fl6,
1084                                                     fl6->flowi6_oif, skb,
1085                                                     flags);
1086         }
1087         if (f6i == net->ipv6.fib6_null_entry) {
1088                 fn = fib6_backtrack(fn, &fl6->saddr);
1089                 if (fn)
1090                         goto restart;
1091         }
1092
1093         trace_fib6_table_lookup(net, f6i, table, fl6);
1094
1095         /* Search through exception table */
1096         rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1097         if (rt) {
1098                 if (ip6_hold_safe(net, &rt, true))
1099                         dst_use_noref(&rt->dst, jiffies);
1100         } else if (f6i == net->ipv6.fib6_null_entry) {
1101                 rt = net->ipv6.ip6_null_entry;
1102                 dst_hold(&rt->dst);
1103         } else {
1104                 rt = ip6_create_rt_rcu(f6i);
1105         }
1106
1107         rcu_read_unlock();
1108
1109         return rt;
1110 }
1111
1112 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1113                                    const struct sk_buff *skb, int flags)
1114 {
1115         return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1116 }
1117 EXPORT_SYMBOL_GPL(ip6_route_lookup);
1118
1119 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1120                             const struct in6_addr *saddr, int oif,
1121                             const struct sk_buff *skb, int strict)
1122 {
1123         struct flowi6 fl6 = {
1124                 .flowi6_oif = oif,
1125                 .daddr = *daddr,
1126         };
1127         struct dst_entry *dst;
1128         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1129
1130         if (saddr) {
1131                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1132                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1133         }
1134
1135         dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1136         if (dst->error == 0)
1137                 return (struct rt6_info *) dst;
1138
1139         dst_release(dst);
1140
1141         return NULL;
1142 }
1143 EXPORT_SYMBOL(rt6_lookup);
1144
1145 /* ip6_ins_rt is called with FREE table->tb6_lock.
1146  * It takes new route entry, the addition fails by any reason the
1147  * route is released.
1148  * Caller must hold dst before calling it.
1149  */
1150
1151 static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1152                         struct netlink_ext_ack *extack)
1153 {
1154         int err;
1155         struct fib6_table *table;
1156
1157         table = rt->fib6_table;
1158         spin_lock_bh(&table->tb6_lock);
1159         err = fib6_add(&table->tb6_root, rt, info, extack);
1160         spin_unlock_bh(&table->tb6_lock);
1161
1162         return err;
1163 }
1164
1165 int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1166 {
1167         struct nl_info info = { .nl_net = net, };
1168
1169         return __ip6_ins_rt(rt, &info, NULL);
1170 }
1171
1172 static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
1173                                            const struct in6_addr *daddr,
1174                                            const struct in6_addr *saddr)
1175 {
1176         struct net_device *dev;
1177         struct rt6_info *rt;
1178
1179         /*
1180          *      Clone the route.
1181          */
1182
1183         if (!fib6_info_hold_safe(ort))
1184                 return NULL;
1185
1186         dev = ip6_rt_get_dev_rcu(ort);
1187         rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1188         if (!rt) {
1189                 fib6_info_release(ort);
1190                 return NULL;
1191         }
1192
1193         ip6_rt_copy_init(rt, ort);
1194         rt->rt6i_flags |= RTF_CACHE;
1195         rt->dst.flags |= DST_HOST;
1196         rt->rt6i_dst.addr = *daddr;
1197         rt->rt6i_dst.plen = 128;
1198
1199         if (!rt6_is_gw_or_nonexthop(ort)) {
1200                 if (ort->fib6_dst.plen != 128 &&
1201                     ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
1202                         rt->rt6i_flags |= RTF_ANYCAST;
1203 #ifdef CONFIG_IPV6_SUBTREES
1204                 if (rt->rt6i_src.plen && saddr) {
1205                         rt->rt6i_src.addr = *saddr;
1206                         rt->rt6i_src.plen = 128;
1207                 }
1208 #endif
1209         }
1210
1211         return rt;
1212 }
1213
1214 static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1215 {
1216         unsigned short flags = fib6_info_dst_flags(rt);
1217         struct net_device *dev;
1218         struct rt6_info *pcpu_rt;
1219
1220         if (!fib6_info_hold_safe(rt))
1221                 return NULL;
1222
1223         rcu_read_lock();
1224         dev = ip6_rt_get_dev_rcu(rt);
1225         pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1226         rcu_read_unlock();
1227         if (!pcpu_rt) {
1228                 fib6_info_release(rt);
1229                 return NULL;
1230         }
1231         ip6_rt_copy_init(pcpu_rt, rt);
1232         pcpu_rt->rt6i_flags |= RTF_PCPU;
1233         return pcpu_rt;
1234 }
1235
1236 /* It should be called with rcu_read_lock() acquired */
1237 static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1238 {
1239         struct rt6_info *pcpu_rt, **p;
1240
1241         p = this_cpu_ptr(rt->rt6i_pcpu);
1242         pcpu_rt = *p;
1243
1244         if (pcpu_rt)
1245                 ip6_hold_safe(NULL, &pcpu_rt, false);
1246
1247         return pcpu_rt;
1248 }
1249
1250 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1251                                             struct fib6_info *rt)
1252 {
1253         struct rt6_info *pcpu_rt, *prev, **p;
1254
1255         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1256         if (!pcpu_rt) {
1257                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1258                 return net->ipv6.ip6_null_entry;
1259         }
1260
1261         dst_hold(&pcpu_rt->dst);
1262         p = this_cpu_ptr(rt->rt6i_pcpu);
1263         prev = cmpxchg(p, NULL, pcpu_rt);
1264         BUG_ON(prev);
1265
1266         return pcpu_rt;
1267 }
1268
1269 /* exception hash table implementation
1270  */
1271 static DEFINE_SPINLOCK(rt6_exception_lock);
1272
1273 /* Remove rt6_ex from hash table and free the memory
1274  * Caller must hold rt6_exception_lock
1275  */
1276 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1277                                  struct rt6_exception *rt6_ex)
1278 {
1279         struct fib6_info *from;
1280         struct net *net;
1281
1282         if (!bucket || !rt6_ex)
1283                 return;
1284
1285         net = dev_net(rt6_ex->rt6i->dst.dev);
1286         net->ipv6.rt6_stats->fib_rt_cache--;
1287
1288         /* purge completely the exception to allow releasing the held resources:
1289          * some [sk] cache may keep the dst around for unlimited time
1290          */
1291         from = rcu_dereference_protected(rt6_ex->rt6i->from,
1292                                          lockdep_is_held(&rt6_exception_lock));
1293         rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1294         fib6_info_release(from);
1295         dst_dev_put(&rt6_ex->rt6i->dst);
1296
1297         hlist_del_rcu(&rt6_ex->hlist);
1298         dst_release(&rt6_ex->rt6i->dst);
1299         kfree_rcu(rt6_ex, rcu);
1300         WARN_ON_ONCE(!bucket->depth);
1301         bucket->depth--;
1302 }
1303
1304 /* Remove oldest rt6_ex in bucket and free the memory
1305  * Caller must hold rt6_exception_lock
1306  */
1307 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1308 {
1309         struct rt6_exception *rt6_ex, *oldest = NULL;
1310
1311         if (!bucket)
1312                 return;
1313
1314         hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1315                 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1316                         oldest = rt6_ex;
1317         }
1318         rt6_remove_exception(bucket, oldest);
1319 }
1320
1321 static u32 rt6_exception_hash(const struct in6_addr *dst,
1322                               const struct in6_addr *src)
1323 {
1324         static u32 seed __read_mostly;
1325         u32 val;
1326
1327         net_get_random_once(&seed, sizeof(seed));
1328         val = jhash(dst, sizeof(*dst), seed);
1329
1330 #ifdef CONFIG_IPV6_SUBTREES
1331         if (src)
1332                 val = jhash(src, sizeof(*src), val);
1333 #endif
1334         return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1335 }
1336
1337 /* Helper function to find the cached rt in the hash table
1338  * and update bucket pointer to point to the bucket for this
1339  * (daddr, saddr) pair
1340  * Caller must hold rt6_exception_lock
1341  */
1342 static struct rt6_exception *
1343 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1344                               const struct in6_addr *daddr,
1345                               const struct in6_addr *saddr)
1346 {
1347         struct rt6_exception *rt6_ex;
1348         u32 hval;
1349
1350         if (!(*bucket) || !daddr)
1351                 return NULL;
1352
1353         hval = rt6_exception_hash(daddr, saddr);
1354         *bucket += hval;
1355
1356         hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1357                 struct rt6_info *rt6 = rt6_ex->rt6i;
1358                 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1359
1360 #ifdef CONFIG_IPV6_SUBTREES
1361                 if (matched && saddr)
1362                         matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1363 #endif
1364                 if (matched)
1365                         return rt6_ex;
1366         }
1367         return NULL;
1368 }
1369
1370 /* Helper function to find the cached rt in the hash table
1371  * and update bucket pointer to point to the bucket for this
1372  * (daddr, saddr) pair
1373  * Caller must hold rcu_read_lock()
1374  */
1375 static struct rt6_exception *
1376 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1377                          const struct in6_addr *daddr,
1378                          const struct in6_addr *saddr)
1379 {
1380         struct rt6_exception *rt6_ex;
1381         u32 hval;
1382
1383         WARN_ON_ONCE(!rcu_read_lock_held());
1384
1385         if (!(*bucket) || !daddr)
1386                 return NULL;
1387
1388         hval = rt6_exception_hash(daddr, saddr);
1389         *bucket += hval;
1390
1391         hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1392                 struct rt6_info *rt6 = rt6_ex->rt6i;
1393                 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1394
1395 #ifdef CONFIG_IPV6_SUBTREES
1396                 if (matched && saddr)
1397                         matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1398 #endif
1399                 if (matched)
1400                         return rt6_ex;
1401         }
1402         return NULL;
1403 }
1404
1405 static unsigned int fib6_mtu(const struct fib6_info *rt)
1406 {
1407         unsigned int mtu;
1408
1409         if (rt->fib6_pmtu) {
1410                 mtu = rt->fib6_pmtu;
1411         } else {
1412                 struct net_device *dev = fib6_info_nh_dev(rt);
1413                 struct inet6_dev *idev;
1414
1415                 rcu_read_lock();
1416                 idev = __in6_dev_get(dev);
1417                 mtu = idev->cnf.mtu6;
1418                 rcu_read_unlock();
1419         }
1420
1421         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1422
1423         return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1424 }
1425
1426 static int rt6_insert_exception(struct rt6_info *nrt,
1427                                 struct fib6_info *ort)
1428 {
1429         struct net *net = dev_net(nrt->dst.dev);
1430         struct rt6_exception_bucket *bucket;
1431         struct in6_addr *src_key = NULL;
1432         struct rt6_exception *rt6_ex;
1433         int err = 0;
1434
1435         spin_lock_bh(&rt6_exception_lock);
1436
1437         if (ort->exception_bucket_flushed) {
1438                 err = -EINVAL;
1439                 goto out;
1440         }
1441
1442         bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1443                                         lockdep_is_held(&rt6_exception_lock));
1444         if (!bucket) {
1445                 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1446                                  GFP_ATOMIC);
1447                 if (!bucket) {
1448                         err = -ENOMEM;
1449                         goto out;
1450                 }
1451                 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1452         }
1453
1454 #ifdef CONFIG_IPV6_SUBTREES
1455         /* rt6i_src.plen != 0 indicates ort is in subtree
1456          * and exception table is indexed by a hash of
1457          * both rt6i_dst and rt6i_src.
1458          * Otherwise, the exception table is indexed by
1459          * a hash of only rt6i_dst.
1460          */
1461         if (ort->fib6_src.plen)
1462                 src_key = &nrt->rt6i_src.addr;
1463 #endif
1464         /* rt6_mtu_change() might lower mtu on ort.
1465          * Only insert this exception route if its mtu
1466          * is less than ort's mtu value.
1467          */
1468         if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1469                 err = -EINVAL;
1470                 goto out;
1471         }
1472
1473         rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1474                                                src_key);
1475         if (rt6_ex)
1476                 rt6_remove_exception(bucket, rt6_ex);
1477
1478         rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1479         if (!rt6_ex) {
1480                 err = -ENOMEM;
1481                 goto out;
1482         }
1483         rt6_ex->rt6i = nrt;
1484         rt6_ex->stamp = jiffies;
1485         hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1486         bucket->depth++;
1487         net->ipv6.rt6_stats->fib_rt_cache++;
1488
1489         if (bucket->depth > FIB6_MAX_DEPTH)
1490                 rt6_exception_remove_oldest(bucket);
1491
1492 out:
1493         spin_unlock_bh(&rt6_exception_lock);
1494
1495         /* Update fn->fn_sernum to invalidate all cached dst */
1496         if (!err) {
1497                 spin_lock_bh(&ort->fib6_table->tb6_lock);
1498                 fib6_update_sernum(net, ort);
1499                 spin_unlock_bh(&ort->fib6_table->tb6_lock);
1500                 fib6_force_start_gc(net);
1501         }
1502
1503         return err;
1504 }
1505
1506 void rt6_flush_exceptions(struct fib6_info *rt)
1507 {
1508         struct rt6_exception_bucket *bucket;
1509         struct rt6_exception *rt6_ex;
1510         struct hlist_node *tmp;
1511         int i;
1512
1513         spin_lock_bh(&rt6_exception_lock);
1514         /* Prevent rt6_insert_exception() to recreate the bucket list */
1515         rt->exception_bucket_flushed = 1;
1516
1517         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1518                                     lockdep_is_held(&rt6_exception_lock));
1519         if (!bucket)
1520                 goto out;
1521
1522         for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1523                 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1524                         rt6_remove_exception(bucket, rt6_ex);
1525                 WARN_ON_ONCE(bucket->depth);
1526                 bucket++;
1527         }
1528
1529 out:
1530         spin_unlock_bh(&rt6_exception_lock);
1531 }
1532
1533 /* Find cached rt in the hash table inside passed in rt
1534  * Caller has to hold rcu_read_lock()
1535  */
1536 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
1537                                            struct in6_addr *daddr,
1538                                            struct in6_addr *saddr)
1539 {
1540         struct rt6_exception_bucket *bucket;
1541         struct in6_addr *src_key = NULL;
1542         struct rt6_exception *rt6_ex;
1543         struct rt6_info *res = NULL;
1544
1545         bucket = rcu_dereference(rt->rt6i_exception_bucket);
1546
1547 #ifdef CONFIG_IPV6_SUBTREES
1548         /* rt6i_src.plen != 0 indicates rt is in subtree
1549          * and exception table is indexed by a hash of
1550          * both rt6i_dst and rt6i_src.
1551          * Otherwise, the exception table is indexed by
1552          * a hash of only rt6i_dst.
1553          */
1554         if (rt->fib6_src.plen)
1555                 src_key = saddr;
1556 #endif
1557         rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1558
1559         if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1560                 res = rt6_ex->rt6i;
1561
1562         return res;
1563 }
1564
1565 /* Remove the passed in cached rt from the hash table that contains it */
1566 static int rt6_remove_exception_rt(struct rt6_info *rt)
1567 {
1568         struct rt6_exception_bucket *bucket;
1569         struct in6_addr *src_key = NULL;
1570         struct rt6_exception *rt6_ex;
1571         struct fib6_info *from;
1572         int err;
1573
1574         from = rcu_dereference(rt->from);
1575         if (!from ||
1576             !(rt->rt6i_flags & RTF_CACHE))
1577                 return -EINVAL;
1578
1579         if (!rcu_access_pointer(from->rt6i_exception_bucket))
1580                 return -ENOENT;
1581
1582         spin_lock_bh(&rt6_exception_lock);
1583         bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1584                                     lockdep_is_held(&rt6_exception_lock));
1585 #ifdef CONFIG_IPV6_SUBTREES
1586         /* rt6i_src.plen != 0 indicates 'from' is in subtree
1587          * and exception table is indexed by a hash of
1588          * both rt6i_dst and rt6i_src.
1589          * Otherwise, the exception table is indexed by
1590          * a hash of only rt6i_dst.
1591          */
1592         if (from->fib6_src.plen)
1593                 src_key = &rt->rt6i_src.addr;
1594 #endif
1595         rt6_ex = __rt6_find_exception_spinlock(&bucket,
1596                                                &rt->rt6i_dst.addr,
1597                                                src_key);
1598         if (rt6_ex) {
1599                 rt6_remove_exception(bucket, rt6_ex);
1600                 err = 0;
1601         } else {
1602                 err = -ENOENT;
1603         }
1604
1605         spin_unlock_bh(&rt6_exception_lock);
1606         return err;
1607 }
1608
1609 /* Find rt6_ex which contains the passed in rt cache and
1610  * refresh its stamp
1611  */
1612 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1613 {
1614         struct rt6_exception_bucket *bucket;
1615         struct in6_addr *src_key = NULL;
1616         struct rt6_exception *rt6_ex;
1617         struct fib6_info *from;
1618
1619         rcu_read_lock();
1620         from = rcu_dereference(rt->from);
1621         if (!from || !(rt->rt6i_flags & RTF_CACHE))
1622                 goto unlock;
1623
1624         bucket = rcu_dereference(from->rt6i_exception_bucket);
1625
1626 #ifdef CONFIG_IPV6_SUBTREES
1627         /* rt6i_src.plen != 0 indicates 'from' is in subtree
1628          * and exception table is indexed by a hash of
1629          * both rt6i_dst and rt6i_src.
1630          * Otherwise, the exception table is indexed by
1631          * a hash of only rt6i_dst.
1632          */
1633         if (from->fib6_src.plen)
1634                 src_key = &rt->rt6i_src.addr;
1635 #endif
1636         rt6_ex = __rt6_find_exception_rcu(&bucket,
1637                                           &rt->rt6i_dst.addr,
1638                                           src_key);
1639         if (rt6_ex)
1640                 rt6_ex->stamp = jiffies;
1641
1642 unlock:
1643         rcu_read_unlock();
1644 }
1645
1646 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1647                                          struct rt6_info *rt, int mtu)
1648 {
1649         /* If the new MTU is lower than the route PMTU, this new MTU will be the
1650          * lowest MTU in the path: always allow updating the route PMTU to
1651          * reflect PMTU decreases.
1652          *
1653          * If the new MTU is higher, and the route PMTU is equal to the local
1654          * MTU, this means the old MTU is the lowest in the path, so allow
1655          * updating it: if other nodes now have lower MTUs, PMTU discovery will
1656          * handle this.
1657          */
1658
1659         if (dst_mtu(&rt->dst) >= mtu)
1660                 return true;
1661
1662         if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1663                 return true;
1664
1665         return false;
1666 }
1667
1668 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1669                                        struct fib6_info *rt, int mtu)
1670 {
1671         struct rt6_exception_bucket *bucket;
1672         struct rt6_exception *rt6_ex;
1673         int i;
1674
1675         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1676                                         lockdep_is_held(&rt6_exception_lock));
1677
1678         if (!bucket)
1679                 return;
1680
1681         for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1682                 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1683                         struct rt6_info *entry = rt6_ex->rt6i;
1684
1685                         /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1686                          * route), the metrics of its rt->from have already
1687                          * been updated.
1688                          */
1689                         if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1690                             rt6_mtu_change_route_allowed(idev, entry, mtu))
1691                                 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1692                 }
1693                 bucket++;
1694         }
1695 }
1696
1697 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
1698
1699 static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1700                                         struct in6_addr *gateway)
1701 {
1702         struct rt6_exception_bucket *bucket;
1703         struct rt6_exception *rt6_ex;
1704         struct hlist_node *tmp;
1705         int i;
1706
1707         if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1708                 return;
1709
1710         spin_lock_bh(&rt6_exception_lock);
1711         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1712                                      lockdep_is_held(&rt6_exception_lock));
1713
1714         if (bucket) {
1715                 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1716                         hlist_for_each_entry_safe(rt6_ex, tmp,
1717                                                   &bucket->chain, hlist) {
1718                                 struct rt6_info *entry = rt6_ex->rt6i;
1719
1720                                 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1721                                     RTF_CACHE_GATEWAY &&
1722                                     ipv6_addr_equal(gateway,
1723                                                     &entry->rt6i_gateway)) {
1724                                         rt6_remove_exception(bucket, rt6_ex);
1725                                 }
1726                         }
1727                         bucket++;
1728                 }
1729         }
1730
1731         spin_unlock_bh(&rt6_exception_lock);
1732 }
1733
1734 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1735                                       struct rt6_exception *rt6_ex,
1736                                       struct fib6_gc_args *gc_args,
1737                                       unsigned long now)
1738 {
1739         struct rt6_info *rt = rt6_ex->rt6i;
1740
1741         /* we are pruning and obsoleting aged-out and non gateway exceptions
1742          * even if others have still references to them, so that on next
1743          * dst_check() such references can be dropped.
1744          * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1745          * expired, independently from their aging, as per RFC 8201 section 4
1746          */
1747         if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1748                 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1749                         RT6_TRACE("aging clone %p\n", rt);
1750                         rt6_remove_exception(bucket, rt6_ex);
1751                         return;
1752                 }
1753         } else if (time_after(jiffies, rt->dst.expires)) {
1754                 RT6_TRACE("purging expired route %p\n", rt);
1755                 rt6_remove_exception(bucket, rt6_ex);
1756                 return;
1757         }
1758
1759         if (rt->rt6i_flags & RTF_GATEWAY) {
1760                 struct neighbour *neigh;
1761                 __u8 neigh_flags = 0;
1762
1763                 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1764                 if (neigh)
1765                         neigh_flags = neigh->flags;
1766
1767                 if (!(neigh_flags & NTF_ROUTER)) {
1768                         RT6_TRACE("purging route %p via non-router but gateway\n",
1769                                   rt);
1770                         rt6_remove_exception(bucket, rt6_ex);
1771                         return;
1772                 }
1773         }
1774
1775         gc_args->more++;
1776 }
1777
1778 void rt6_age_exceptions(struct fib6_info *rt,
1779                         struct fib6_gc_args *gc_args,
1780                         unsigned long now)
1781 {
1782         struct rt6_exception_bucket *bucket;
1783         struct rt6_exception *rt6_ex;
1784         struct hlist_node *tmp;
1785         int i;
1786
1787         if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1788                 return;
1789
1790         rcu_read_lock_bh();
1791         spin_lock(&rt6_exception_lock);
1792         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1793                                     lockdep_is_held(&rt6_exception_lock));
1794
1795         if (bucket) {
1796                 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1797                         hlist_for_each_entry_safe(rt6_ex, tmp,
1798                                                   &bucket->chain, hlist) {
1799                                 rt6_age_examine_exception(bucket, rt6_ex,
1800                                                           gc_args, now);
1801                         }
1802                         bucket++;
1803                 }
1804         }
1805         spin_unlock(&rt6_exception_lock);
1806         rcu_read_unlock_bh();
1807 }
1808
1809 /* must be called with rcu lock held */
1810 struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1811                                     int oif, struct flowi6 *fl6, int strict)
1812 {
1813         struct fib6_node *fn, *saved_fn;
1814         struct fib6_info *f6i;
1815
1816         fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1817         saved_fn = fn;
1818
1819         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1820                 oif = 0;
1821
1822 redo_rt6_select:
1823         f6i = rt6_select(net, fn, oif, strict);
1824         if (f6i == net->ipv6.fib6_null_entry) {
1825                 fn = fib6_backtrack(fn, &fl6->saddr);
1826                 if (fn)
1827                         goto redo_rt6_select;
1828                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1829                         /* also consider unreachable route */
1830                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1831                         fn = saved_fn;
1832                         goto redo_rt6_select;
1833                 }
1834         }
1835
1836         trace_fib6_table_lookup(net, f6i, table, fl6);
1837
1838         return f6i;
1839 }
1840
1841 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1842                                int oif, struct flowi6 *fl6,
1843                                const struct sk_buff *skb, int flags)
1844 {
1845         struct fib6_info *f6i;
1846         struct rt6_info *rt;
1847         int strict = 0;
1848
1849         strict |= flags & RT6_LOOKUP_F_IFACE;
1850         strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1851         if (net->ipv6.devconf_all->forwarding == 0)
1852                 strict |= RT6_LOOKUP_F_REACHABLE;
1853
1854         rcu_read_lock();
1855
1856         f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1857         if (f6i->fib6_nsiblings)
1858                 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1859
1860         if (f6i == net->ipv6.fib6_null_entry) {
1861                 rt = net->ipv6.ip6_null_entry;
1862                 rcu_read_unlock();
1863                 dst_hold(&rt->dst);
1864                 return rt;
1865         }
1866
1867         /*Search through exception table */
1868         rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1869         if (rt) {
1870                 if (ip6_hold_safe(net, &rt, true))
1871                         dst_use_noref(&rt->dst, jiffies);
1872
1873                 rcu_read_unlock();
1874                 return rt;
1875         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1876                             !(f6i->fib6_flags & RTF_GATEWAY))) {
1877                 /* Create a RTF_CACHE clone which will not be
1878                  * owned by the fib6 tree.  It is for the special case where
1879                  * the daddr in the skb during the neighbor look-up is different
1880                  * from the fl6->daddr used to look-up route here.
1881                  */
1882                 struct rt6_info *uncached_rt;
1883
1884                 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1885
1886                 rcu_read_unlock();
1887
1888                 if (uncached_rt) {
1889                         /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1890                          * No need for another dst_hold()
1891                          */
1892                         rt6_uncached_list_add(uncached_rt);
1893                         atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1894                 } else {
1895                         uncached_rt = net->ipv6.ip6_null_entry;
1896                         dst_hold(&uncached_rt->dst);
1897                 }
1898
1899                 return uncached_rt;
1900         } else {
1901                 /* Get a percpu copy */
1902
1903                 struct rt6_info *pcpu_rt;
1904
1905                 local_bh_disable();
1906                 pcpu_rt = rt6_get_pcpu_route(f6i);
1907
1908                 if (!pcpu_rt)
1909                         pcpu_rt = rt6_make_pcpu_route(net, f6i);
1910
1911                 local_bh_enable();
1912                 rcu_read_unlock();
1913
1914                 return pcpu_rt;
1915         }
1916 }
1917 EXPORT_SYMBOL_GPL(ip6_pol_route);
1918
1919 static struct rt6_info *ip6_pol_route_input(struct net *net,
1920                                             struct fib6_table *table,
1921                                             struct flowi6 *fl6,
1922                                             const struct sk_buff *skb,
1923                                             int flags)
1924 {
1925         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1926 }
1927
1928 struct dst_entry *ip6_route_input_lookup(struct net *net,
1929                                          struct net_device *dev,
1930                                          struct flowi6 *fl6,
1931                                          const struct sk_buff *skb,
1932                                          int flags)
1933 {
1934         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1935                 flags |= RT6_LOOKUP_F_IFACE;
1936
1937         return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1938 }
1939 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1940
1941 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1942                                   struct flow_keys *keys,
1943                                   struct flow_keys *flkeys)
1944 {
1945         const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1946         const struct ipv6hdr *key_iph = outer_iph;
1947         struct flow_keys *_flkeys = flkeys;
1948         const struct ipv6hdr *inner_iph;
1949         const struct icmp6hdr *icmph;
1950         struct ipv6hdr _inner_iph;
1951         struct icmp6hdr _icmph;
1952
1953         if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1954                 goto out;
1955
1956         icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1957                                    sizeof(_icmph), &_icmph);
1958         if (!icmph)
1959                 goto out;
1960
1961         if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1962             icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1963             icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1964             icmph->icmp6_type != ICMPV6_PARAMPROB)
1965                 goto out;
1966
1967         inner_iph = skb_header_pointer(skb,
1968                                        skb_transport_offset(skb) + sizeof(*icmph),
1969                                        sizeof(_inner_iph), &_inner_iph);
1970         if (!inner_iph)
1971                 goto out;
1972
1973         key_iph = inner_iph;
1974         _flkeys = NULL;
1975 out:
1976         if (_flkeys) {
1977                 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1978                 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1979                 keys->tags.flow_label = _flkeys->tags.flow_label;
1980                 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1981         } else {
1982                 keys->addrs.v6addrs.src = key_iph->saddr;
1983                 keys->addrs.v6addrs.dst = key_iph->daddr;
1984                 keys->tags.flow_label = ip6_flowlabel(key_iph);
1985                 keys->basic.ip_proto = key_iph->nexthdr;
1986         }
1987 }
1988
1989 /* if skb is set it will be used and fl6 can be NULL */
1990 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1991                        const struct sk_buff *skb, struct flow_keys *flkeys)
1992 {
1993         struct flow_keys hash_keys;
1994         u32 mhash;
1995
1996         switch (ip6_multipath_hash_policy(net)) {
1997         case 0:
1998                 memset(&hash_keys, 0, sizeof(hash_keys));
1999                 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2000                 if (skb) {
2001                         ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2002                 } else {
2003                         hash_keys.addrs.v6addrs.src = fl6->saddr;
2004                         hash_keys.addrs.v6addrs.dst = fl6->daddr;
2005                         hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2006                         hash_keys.basic.ip_proto = fl6->flowi6_proto;
2007                 }
2008                 break;
2009         case 1:
2010                 if (skb) {
2011                         unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2012                         struct flow_keys keys;
2013
2014                         /* short-circuit if we already have L4 hash present */
2015                         if (skb->l4_hash)
2016                                 return skb_get_hash_raw(skb) >> 1;
2017
2018                         memset(&hash_keys, 0, sizeof(hash_keys));
2019
2020                         if (!flkeys) {
2021                                 skb_flow_dissect_flow_keys(skb, &keys, flag);
2022                                 flkeys = &keys;
2023                         }
2024                         hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2025                         hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2026                         hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2027                         hash_keys.ports.src = flkeys->ports.src;
2028                         hash_keys.ports.dst = flkeys->ports.dst;
2029                         hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2030                 } else {
2031                         memset(&hash_keys, 0, sizeof(hash_keys));
2032                         hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2033                         hash_keys.addrs.v6addrs.src = fl6->saddr;
2034                         hash_keys.addrs.v6addrs.dst = fl6->daddr;
2035                         hash_keys.ports.src = fl6->fl6_sport;
2036                         hash_keys.ports.dst = fl6->fl6_dport;
2037                         hash_keys.basic.ip_proto = fl6->flowi6_proto;
2038                 }
2039                 break;
2040         }
2041         mhash = flow_hash_from_keys(&hash_keys);
2042
2043         return mhash >> 1;
2044 }
2045
2046 void ip6_route_input(struct sk_buff *skb)
2047 {
2048         const struct ipv6hdr *iph = ipv6_hdr(skb);
2049         struct net *net = dev_net(skb->dev);
2050         int flags = RT6_LOOKUP_F_HAS_SADDR;
2051         struct ip_tunnel_info *tun_info;
2052         struct flowi6 fl6 = {
2053                 .flowi6_iif = skb->dev->ifindex,
2054                 .daddr = iph->daddr,
2055                 .saddr = iph->saddr,
2056                 .flowlabel = ip6_flowinfo(iph),
2057                 .flowi6_mark = skb->mark,
2058                 .flowi6_proto = iph->nexthdr,
2059         };
2060         struct flow_keys *flkeys = NULL, _flkeys;
2061
2062         tun_info = skb_tunnel_info(skb);
2063         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2064                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2065
2066         if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2067                 flkeys = &_flkeys;
2068
2069         if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2070                 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2071         skb_dst_drop(skb);
2072         skb_dst_set(skb,
2073                     ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2074 }
2075
2076 static struct rt6_info *ip6_pol_route_output(struct net *net,
2077                                              struct fib6_table *table,
2078                                              struct flowi6 *fl6,
2079                                              const struct sk_buff *skb,
2080                                              int flags)
2081 {
2082         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2083 }
2084
2085 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2086                                          struct flowi6 *fl6, int flags)
2087 {
2088         bool any_src;
2089
2090         if (ipv6_addr_type(&fl6->daddr) &
2091             (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2092                 struct dst_entry *dst;
2093
2094                 dst = l3mdev_link_scope_lookup(net, fl6);
2095                 if (dst)
2096                         return dst;
2097         }
2098
2099         fl6->flowi6_iif = LOOPBACK_IFINDEX;
2100
2101         any_src = ipv6_addr_any(&fl6->saddr);
2102         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2103             (fl6->flowi6_oif && any_src))
2104                 flags |= RT6_LOOKUP_F_IFACE;
2105
2106         if (!any_src)
2107                 flags |= RT6_LOOKUP_F_HAS_SADDR;
2108         else if (sk)
2109                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2110
2111         return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2112 }
2113 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2114
2115 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2116 {
2117         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2118         struct net_device *loopback_dev = net->loopback_dev;
2119         struct dst_entry *new = NULL;
2120
2121         rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2122                        DST_OBSOLETE_DEAD, 0);
2123         if (rt) {
2124                 rt6_info_init(rt);
2125                 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2126
2127                 new = &rt->dst;
2128                 new->__use = 1;
2129                 new->input = dst_discard;
2130                 new->output = dst_discard_out;
2131
2132                 dst_copy_metrics(new, &ort->dst);
2133
2134                 rt->rt6i_idev = in6_dev_get(loopback_dev);
2135                 rt->rt6i_gateway = ort->rt6i_gateway;
2136                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2137
2138                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2139 #ifdef CONFIG_IPV6_SUBTREES
2140                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2141 #endif
2142         }
2143
2144         dst_release(dst_orig);
2145         return new ? new : ERR_PTR(-ENOMEM);
2146 }
2147
2148 /*
2149  *      Destination cache support functions
2150  */
2151
2152 static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2153 {
2154         u32 rt_cookie = 0;
2155
2156         if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2157                 return false;
2158
2159         if (fib6_check_expired(f6i))
2160                 return false;
2161
2162         return true;
2163 }
2164
2165 static struct dst_entry *rt6_check(struct rt6_info *rt,
2166                                    struct fib6_info *from,
2167                                    u32 cookie)
2168 {
2169         u32 rt_cookie = 0;
2170
2171         if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
2172             rt_cookie != cookie)
2173                 return NULL;
2174
2175         if (rt6_check_expired(rt))
2176                 return NULL;
2177
2178         return &rt->dst;
2179 }
2180
2181 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2182                                             struct fib6_info *from,
2183                                             u32 cookie)
2184 {
2185         if (!__rt6_check_expired(rt) &&
2186             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2187             fib6_check(from, cookie))
2188                 return &rt->dst;
2189         else
2190                 return NULL;
2191 }
2192
2193 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2194 {
2195         struct dst_entry *dst_ret;
2196         struct fib6_info *from;
2197         struct rt6_info *rt;
2198
2199         rt = container_of(dst, struct rt6_info, dst);
2200
2201         rcu_read_lock();
2202
2203         /* All IPV6 dsts are created with ->obsolete set to the value
2204          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2205          * into this function always.
2206          */
2207
2208         from = rcu_dereference(rt->from);
2209
2210         if (from && (rt->rt6i_flags & RTF_PCPU ||
2211             unlikely(!list_empty(&rt->rt6i_uncached))))
2212                 dst_ret = rt6_dst_from_check(rt, from, cookie);
2213         else
2214                 dst_ret = rt6_check(rt, from, cookie);
2215
2216         rcu_read_unlock();
2217
2218         return dst_ret;
2219 }
2220
2221 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2222 {
2223         struct rt6_info *rt = (struct rt6_info *) dst;
2224
2225         if (rt) {
2226                 if (rt->rt6i_flags & RTF_CACHE) {
2227                         rcu_read_lock();
2228                         if (rt6_check_expired(rt)) {
2229                                 rt6_remove_exception_rt(rt);
2230                                 dst = NULL;
2231                         }
2232                         rcu_read_unlock();
2233                 } else {
2234                         dst_release(dst);
2235                         dst = NULL;
2236                 }
2237         }
2238         return dst;
2239 }
2240
2241 static void ip6_link_failure(struct sk_buff *skb)
2242 {
2243         struct rt6_info *rt;
2244
2245         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2246
2247         rt = (struct rt6_info *) skb_dst(skb);
2248         if (rt) {
2249                 rcu_read_lock();
2250                 if (rt->rt6i_flags & RTF_CACHE) {
2251                         rt6_remove_exception_rt(rt);
2252                 } else {
2253                         struct fib6_info *from;
2254                         struct fib6_node *fn;
2255
2256                         from = rcu_dereference(rt->from);
2257                         if (from) {
2258                                 fn = rcu_dereference(from->fib6_node);
2259                                 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2260                                         fn->fn_sernum = -1;
2261                         }
2262                 }
2263                 rcu_read_unlock();
2264         }
2265 }
2266
2267 static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2268 {
2269         if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2270                 struct fib6_info *from;
2271
2272                 rcu_read_lock();
2273                 from = rcu_dereference(rt0->from);
2274                 if (from)
2275                         rt0->dst.expires = from->expires;
2276                 rcu_read_unlock();
2277         }
2278
2279         dst_set_expires(&rt0->dst, timeout);
2280         rt0->rt6i_flags |= RTF_EXPIRES;
2281 }
2282
2283 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2284 {
2285         struct net *net = dev_net(rt->dst.dev);
2286
2287         dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2288         rt->rt6i_flags |= RTF_MODIFIED;
2289         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2290 }
2291
2292 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2293 {
2294         return !(rt->rt6i_flags & RTF_CACHE) &&
2295                 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2296 }
2297
2298 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2299                                  const struct ipv6hdr *iph, u32 mtu)
2300 {
2301         const struct in6_addr *daddr, *saddr;
2302         struct rt6_info *rt6 = (struct rt6_info *)dst;
2303
2304         if (dst_metric_locked(dst, RTAX_MTU))
2305                 return;
2306
2307         if (iph) {
2308                 daddr = &iph->daddr;
2309                 saddr = &iph->saddr;
2310         } else if (sk) {
2311                 daddr = &sk->sk_v6_daddr;
2312                 saddr = &inet6_sk(sk)->saddr;
2313         } else {
2314                 daddr = NULL;
2315                 saddr = NULL;
2316         }
2317         dst_confirm_neigh(dst, daddr);
2318         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2319         if (mtu >= dst_mtu(dst))
2320                 return;
2321
2322         if (!rt6_cache_allowed_for_pmtu(rt6)) {
2323                 rt6_do_update_pmtu(rt6, mtu);
2324                 /* update rt6_ex->stamp for cache */
2325                 if (rt6->rt6i_flags & RTF_CACHE)
2326                         rt6_update_exception_stamp_rt(rt6);
2327         } else if (daddr) {
2328                 struct fib6_info *from;
2329                 struct rt6_info *nrt6;
2330
2331                 rcu_read_lock();
2332                 from = rcu_dereference(rt6->from);
2333                 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
2334                 if (nrt6) {
2335                         rt6_do_update_pmtu(nrt6, mtu);
2336                         if (rt6_insert_exception(nrt6, from))
2337                                 dst_release_immediate(&nrt6->dst);
2338                 }
2339                 rcu_read_unlock();
2340         }
2341 }
2342
2343 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2344                                struct sk_buff *skb, u32 mtu)
2345 {
2346         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2347 }
2348
2349 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2350                      int oif, u32 mark, kuid_t uid)
2351 {
2352         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2353         struct dst_entry *dst;
2354         struct flowi6 fl6 = {
2355                 .flowi6_oif = oif,
2356                 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2357                 .daddr = iph->daddr,
2358                 .saddr = iph->saddr,
2359                 .flowlabel = ip6_flowinfo(iph),
2360                 .flowi6_uid = uid,
2361         };
2362
2363         dst = ip6_route_output(net, NULL, &fl6);
2364         if (!dst->error)
2365                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2366         dst_release(dst);
2367 }
2368 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2369
2370 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2371 {
2372         int oif = sk->sk_bound_dev_if;
2373         struct dst_entry *dst;
2374
2375         if (!oif && skb->dev)
2376                 oif = l3mdev_master_ifindex(skb->dev);
2377
2378         ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2379
2380         dst = __sk_dst_get(sk);
2381         if (!dst || !dst->obsolete ||
2382             dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2383                 return;
2384
2385         bh_lock_sock(sk);
2386         if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2387                 ip6_datagram_dst_update(sk, false);
2388         bh_unlock_sock(sk);
2389 }
2390 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2391
2392 void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2393                            const struct flowi6 *fl6)
2394 {
2395 #ifdef CONFIG_IPV6_SUBTREES
2396         struct ipv6_pinfo *np = inet6_sk(sk);
2397 #endif
2398
2399         ip6_dst_store(sk, dst,
2400                       ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2401                       &sk->sk_v6_daddr : NULL,
2402 #ifdef CONFIG_IPV6_SUBTREES
2403                       ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2404                       &np->saddr :
2405 #endif
2406                       NULL);
2407 }
2408
2409 /* Handle redirects */
2410 struct ip6rd_flowi {
2411         struct flowi6 fl6;
2412         struct in6_addr gateway;
2413 };
2414
2415 static struct rt6_info *__ip6_route_redirect(struct net *net,
2416                                              struct fib6_table *table,
2417                                              struct flowi6 *fl6,
2418                                              const struct sk_buff *skb,
2419                                              int flags)
2420 {
2421         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2422         struct rt6_info *ret = NULL, *rt_cache;
2423         struct fib6_info *rt;
2424         struct fib6_node *fn;
2425
2426         /* Get the "current" route for this destination and
2427          * check if the redirect has come from appropriate router.
2428          *
2429          * RFC 4861 specifies that redirects should only be
2430          * accepted if they come from the nexthop to the target.
2431          * Due to the way the routes are chosen, this notion
2432          * is a bit fuzzy and one might need to check all possible
2433          * routes.
2434          */
2435
2436         rcu_read_lock();
2437         fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2438 restart:
2439         for_each_fib6_node_rt_rcu(fn) {
2440                 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
2441                         continue;
2442                 if (fib6_check_expired(rt))
2443                         continue;
2444                 if (rt->fib6_flags & RTF_REJECT)
2445                         break;
2446                 if (!(rt->fib6_flags & RTF_GATEWAY))
2447                         continue;
2448                 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2449                         continue;
2450                 /* rt_cache's gateway might be different from its 'parent'
2451                  * in the case of an ip redirect.
2452                  * So we keep searching in the exception table if the gateway
2453                  * is different.
2454                  */
2455                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2456                         rt_cache = rt6_find_cached_rt(rt,
2457                                                       &fl6->daddr,
2458                                                       &fl6->saddr);
2459                         if (rt_cache &&
2460                             ipv6_addr_equal(&rdfl->gateway,
2461                                             &rt_cache->rt6i_gateway)) {
2462                                 ret = rt_cache;
2463                                 break;
2464                         }
2465                         continue;
2466                 }
2467                 break;
2468         }
2469
2470         if (!rt)
2471                 rt = net->ipv6.fib6_null_entry;
2472         else if (rt->fib6_flags & RTF_REJECT) {
2473                 ret = net->ipv6.ip6_null_entry;
2474                 goto out;
2475         }
2476
2477         if (rt == net->ipv6.fib6_null_entry) {
2478                 fn = fib6_backtrack(fn, &fl6->saddr);
2479                 if (fn)
2480                         goto restart;
2481         }
2482
2483 out:
2484         if (ret)
2485                 ip6_hold_safe(net, &ret, true);
2486         else
2487                 ret = ip6_create_rt_rcu(rt);
2488
2489         rcu_read_unlock();
2490
2491         trace_fib6_table_lookup(net, rt, table, fl6);
2492         return ret;
2493 };
2494
2495 static struct dst_entry *ip6_route_redirect(struct net *net,
2496                                             const struct flowi6 *fl6,
2497                                             const struct sk_buff *skb,
2498                                             const struct in6_addr *gateway)
2499 {
2500         int flags = RT6_LOOKUP_F_HAS_SADDR;
2501         struct ip6rd_flowi rdfl;
2502
2503         rdfl.fl6 = *fl6;
2504         rdfl.gateway = *gateway;
2505
2506         return fib6_rule_lookup(net, &rdfl.fl6, skb,
2507                                 flags, __ip6_route_redirect);
2508 }
2509
2510 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2511                   kuid_t uid)
2512 {
2513         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2514         struct dst_entry *dst;
2515         struct flowi6 fl6 = {
2516                 .flowi6_iif = LOOPBACK_IFINDEX,
2517                 .flowi6_oif = oif,
2518                 .flowi6_mark = mark,
2519                 .daddr = iph->daddr,
2520                 .saddr = iph->saddr,
2521                 .flowlabel = ip6_flowinfo(iph),
2522                 .flowi6_uid = uid,
2523         };
2524
2525         dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2526         rt6_do_redirect(dst, NULL, skb);
2527         dst_release(dst);
2528 }
2529 EXPORT_SYMBOL_GPL(ip6_redirect);
2530
2531 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
2532 {
2533         const struct ipv6hdr *iph = ipv6_hdr(skb);
2534         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2535         struct dst_entry *dst;
2536         struct flowi6 fl6 = {
2537                 .flowi6_iif = LOOPBACK_IFINDEX,
2538                 .flowi6_oif = oif,
2539                 .daddr = msg->dest,
2540                 .saddr = iph->daddr,
2541                 .flowi6_uid = sock_net_uid(net, NULL),
2542         };
2543
2544         dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2545         rt6_do_redirect(dst, NULL, skb);
2546         dst_release(dst);
2547 }
2548
2549 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2550 {
2551         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2552                      sk->sk_uid);
2553 }
2554 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2555
2556 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2557 {
2558         struct net_device *dev = dst->dev;
2559         unsigned int mtu = dst_mtu(dst);
2560         struct net *net = dev_net(dev);
2561
2562         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2563
2564         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2565                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2566
2567         /*
2568          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2569          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2570          * IPV6_MAXPLEN is also valid and means: "any MSS,
2571          * rely only on pmtu discovery"
2572          */
2573         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2574                 mtu = IPV6_MAXPLEN;
2575         return mtu;
2576 }
2577
2578 static unsigned int ip6_mtu(const struct dst_entry *dst)
2579 {
2580         struct inet6_dev *idev;
2581         unsigned int mtu;
2582
2583         mtu = dst_metric_raw(dst, RTAX_MTU);
2584         if (mtu)
2585                 goto out;
2586
2587         mtu = IPV6_MIN_MTU;
2588
2589         rcu_read_lock();
2590         idev = __in6_dev_get(dst->dev);
2591         if (idev)
2592                 mtu = idev->cnf.mtu6;
2593         rcu_read_unlock();
2594
2595 out:
2596         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2597
2598         return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2599 }
2600
2601 /* MTU selection:
2602  * 1. mtu on route is locked - use it
2603  * 2. mtu from nexthop exception
2604  * 3. mtu from egress device
2605  *
2606  * based on ip6_dst_mtu_forward and exception logic of
2607  * rt6_find_cached_rt; called with rcu_read_lock
2608  */
2609 u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2610                       struct in6_addr *saddr)
2611 {
2612         struct rt6_exception_bucket *bucket;
2613         struct rt6_exception *rt6_ex;
2614         struct in6_addr *src_key;
2615         struct inet6_dev *idev;
2616         u32 mtu = 0;
2617
2618         if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2619                 mtu = f6i->fib6_pmtu;
2620                 if (mtu)
2621                         goto out;
2622         }
2623
2624         src_key = NULL;
2625 #ifdef CONFIG_IPV6_SUBTREES
2626         if (f6i->fib6_src.plen)
2627                 src_key = saddr;
2628 #endif
2629
2630         bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2631         rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2632         if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2633                 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2634
2635         if (likely(!mtu)) {
2636                 struct net_device *dev = fib6_info_nh_dev(f6i);
2637
2638                 mtu = IPV6_MIN_MTU;
2639                 idev = __in6_dev_get(dev);
2640                 if (idev && idev->cnf.mtu6 > mtu)
2641                         mtu = idev->cnf.mtu6;
2642         }
2643
2644         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2645 out:
2646         return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2647 }
2648
2649 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2650                                   struct flowi6 *fl6)
2651 {
2652         struct dst_entry *dst;
2653         struct rt6_info *rt;
2654         struct inet6_dev *idev = in6_dev_get(dev);
2655         struct net *net = dev_net(dev);
2656
2657         if (unlikely(!idev))
2658                 return ERR_PTR(-ENODEV);
2659
2660         rt = ip6_dst_alloc(net, dev, 0);
2661         if (unlikely(!rt)) {
2662                 in6_dev_put(idev);
2663                 dst = ERR_PTR(-ENOMEM);
2664                 goto out;
2665         }
2666
2667         rt->dst.flags |= DST_HOST;
2668         rt->dst.input = ip6_input;
2669         rt->dst.output  = ip6_output;
2670         rt->rt6i_gateway  = fl6->daddr;
2671         rt->rt6i_dst.addr = fl6->daddr;
2672         rt->rt6i_dst.plen = 128;
2673         rt->rt6i_idev     = idev;
2674         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2675
2676         /* Add this dst into uncached_list so that rt6_disable_ip() can
2677          * do proper release of the net_device
2678          */
2679         rt6_uncached_list_add(rt);
2680         atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2681
2682         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2683
2684 out:
2685         return dst;
2686 }
2687
2688 static int ip6_dst_gc(struct dst_ops *ops)
2689 {
2690         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2691         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2692         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2693         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2694         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2695         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2696         int entries;
2697
2698         entries = dst_entries_get_fast(ops);
2699         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2700             entries <= rt_max_size)
2701                 goto out;
2702
2703         net->ipv6.ip6_rt_gc_expire++;
2704         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2705         entries = dst_entries_get_slow(ops);
2706         if (entries < ops->gc_thresh)
2707                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2708 out:
2709         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2710         return entries > rt_max_size;
2711 }
2712
2713 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2714                                             struct fib6_config *cfg,
2715                                             const struct in6_addr *gw_addr,
2716                                             u32 tbid, int flags)
2717 {
2718         struct flowi6 fl6 = {
2719                 .flowi6_oif = cfg->fc_ifindex,
2720                 .daddr = *gw_addr,
2721                 .saddr = cfg->fc_prefsrc,
2722         };
2723         struct fib6_table *table;
2724         struct rt6_info *rt;
2725
2726         table = fib6_get_table(net, tbid);
2727         if (!table)
2728                 return NULL;
2729
2730         if (!ipv6_addr_any(&cfg->fc_prefsrc))
2731                 flags |= RT6_LOOKUP_F_HAS_SADDR;
2732
2733         flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2734         rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2735
2736         /* if table lookup failed, fall back to full lookup */
2737         if (rt == net->ipv6.ip6_null_entry) {
2738                 ip6_rt_put(rt);
2739                 rt = NULL;
2740         }
2741
2742         return rt;
2743 }
2744
2745 static int ip6_route_check_nh_onlink(struct net *net,
2746                                      struct fib6_config *cfg,
2747                                      const struct net_device *dev,
2748                                      struct netlink_ext_ack *extack)
2749 {
2750         u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2751         const struct in6_addr *gw_addr = &cfg->fc_gateway;
2752         u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2753         struct fib6_info *from;
2754         struct rt6_info *grt;
2755         int err;
2756
2757         err = 0;
2758         grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2759         if (grt) {
2760                 rcu_read_lock();
2761                 from = rcu_dereference(grt->from);
2762                 if (!grt->dst.error &&
2763                     /* ignore match if it is the default route */
2764                     from && !ipv6_addr_any(&from->fib6_dst.addr) &&
2765                     (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2766                         NL_SET_ERR_MSG(extack,
2767                                        "Nexthop has invalid gateway or device mismatch");
2768                         err = -EINVAL;
2769                 }
2770                 rcu_read_unlock();
2771
2772                 ip6_rt_put(grt);
2773         }
2774
2775         return err;
2776 }
2777
2778 static int ip6_route_check_nh(struct net *net,
2779                               struct fib6_config *cfg,
2780                               struct net_device **_dev,
2781                               struct inet6_dev **idev)
2782 {
2783         const struct in6_addr *gw_addr = &cfg->fc_gateway;
2784         struct net_device *dev = _dev ? *_dev : NULL;
2785         struct rt6_info *grt = NULL;
2786         int err = -EHOSTUNREACH;
2787
2788         if (cfg->fc_table) {
2789                 int flags = RT6_LOOKUP_F_IFACE;
2790
2791                 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2792                                           cfg->fc_table, flags);
2793                 if (grt) {
2794                         if (grt->rt6i_flags & RTF_GATEWAY ||
2795                             (dev && dev != grt->dst.dev)) {
2796                                 ip6_rt_put(grt);
2797                                 grt = NULL;
2798                         }
2799                 }
2800         }
2801
2802         if (!grt)
2803                 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2804
2805         if (!grt)
2806                 goto out;
2807
2808         if (dev) {
2809                 if (dev != grt->dst.dev) {
2810                         ip6_rt_put(grt);
2811                         goto out;
2812                 }
2813         } else {
2814                 *_dev = dev = grt->dst.dev;
2815                 *idev = grt->rt6i_idev;
2816                 dev_hold(dev);
2817                 in6_dev_hold(grt->rt6i_idev);
2818         }
2819
2820         if (!(grt->rt6i_flags & RTF_GATEWAY))
2821                 err = 0;
2822
2823         ip6_rt_put(grt);
2824
2825 out:
2826         return err;
2827 }
2828
2829 static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2830                            struct net_device **_dev, struct inet6_dev **idev,
2831                            struct netlink_ext_ack *extack)
2832 {
2833         const struct in6_addr *gw_addr = &cfg->fc_gateway;
2834         int gwa_type = ipv6_addr_type(gw_addr);
2835         bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2836         const struct net_device *dev = *_dev;
2837         bool need_addr_check = !dev;
2838         int err = -EINVAL;
2839
2840         /* if gw_addr is local we will fail to detect this in case
2841          * address is still TENTATIVE (DAD in progress). rt6_lookup()
2842          * will return already-added prefix route via interface that
2843          * prefix route was assigned to, which might be non-loopback.
2844          */
2845         if (dev &&
2846             ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2847                 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2848                 goto out;
2849         }
2850
2851         if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2852                 /* IPv6 strictly inhibits using not link-local
2853                  * addresses as nexthop address.
2854                  * Otherwise, router will not able to send redirects.
2855                  * It is very good, but in some (rare!) circumstances
2856                  * (SIT, PtP, NBMA NOARP links) it is handy to allow
2857                  * some exceptions. --ANK
2858                  * We allow IPv4-mapped nexthops to support RFC4798-type
2859                  * addressing
2860                  */
2861                 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2862                         NL_SET_ERR_MSG(extack, "Invalid gateway address");
2863                         goto out;
2864                 }
2865
2866                 if (cfg->fc_flags & RTNH_F_ONLINK)
2867                         err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2868                 else
2869                         err = ip6_route_check_nh(net, cfg, _dev, idev);
2870
2871                 if (err)
2872                         goto out;
2873         }
2874
2875         /* reload in case device was changed */
2876         dev = *_dev;
2877
2878         err = -EINVAL;
2879         if (!dev) {
2880                 NL_SET_ERR_MSG(extack, "Egress device not specified");
2881                 goto out;
2882         } else if (dev->flags & IFF_LOOPBACK) {
2883                 NL_SET_ERR_MSG(extack,
2884                                "Egress device can not be loopback device for this route");
2885                 goto out;
2886         }
2887
2888         /* if we did not check gw_addr above, do so now that the
2889          * egress device has been resolved.
2890          */
2891         if (need_addr_check &&
2892             ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2893                 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2894                 goto out;
2895         }
2896
2897         err = 0;
2898 out:
2899         return err;
2900 }
2901
2902 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
2903                                               gfp_t gfp_flags,
2904                                               struct netlink_ext_ack *extack)
2905 {
2906         struct net *net = cfg->fc_nlinfo.nl_net;
2907         struct fib6_info *rt = NULL;
2908         struct net_device *dev = NULL;
2909         struct inet6_dev *idev = NULL;
2910         struct fib6_table *table;
2911         int addr_type;
2912         int err = -EINVAL;
2913
2914         /* RTF_PCPU is an internal flag; can not be set by userspace */
2915         if (cfg->fc_flags & RTF_PCPU) {
2916                 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2917                 goto out;
2918         }
2919
2920         /* RTF_CACHE is an internal flag; can not be set by userspace */
2921         if (cfg->fc_flags & RTF_CACHE) {
2922                 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2923                 goto out;
2924         }
2925
2926         if (cfg->fc_type > RTN_MAX) {
2927                 NL_SET_ERR_MSG(extack, "Invalid route type");
2928                 goto out;
2929         }
2930
2931         if (cfg->fc_dst_len > 128) {
2932                 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2933                 goto out;
2934         }
2935         if (cfg->fc_src_len > 128) {
2936                 NL_SET_ERR_MSG(extack, "Invalid source address length");
2937                 goto out;
2938         }
2939 #ifndef CONFIG_IPV6_SUBTREES
2940         if (cfg->fc_src_len) {
2941                 NL_SET_ERR_MSG(extack,
2942                                "Specifying source address requires IPV6_SUBTREES to be enabled");
2943                 goto out;
2944         }
2945 #endif
2946         if (cfg->fc_ifindex) {
2947                 err = -ENODEV;
2948                 dev = dev_get_by_index(net, cfg->fc_ifindex);
2949                 if (!dev)
2950                         goto out;
2951                 idev = in6_dev_get(dev);
2952                 if (!idev)
2953                         goto out;
2954         }
2955
2956         if (cfg->fc_metric == 0)
2957                 cfg->fc_metric = IP6_RT_PRIO_USER;
2958
2959         if (cfg->fc_flags & RTNH_F_ONLINK) {
2960                 if (!dev) {
2961                         NL_SET_ERR_MSG(extack,
2962                                        "Nexthop device required for onlink");
2963                         err = -ENODEV;
2964                         goto out;
2965                 }
2966
2967                 if (!(dev->flags & IFF_UP)) {
2968                         NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2969                         err = -ENETDOWN;
2970                         goto out;
2971                 }
2972         }
2973
2974         err = -ENOBUFS;
2975         if (cfg->fc_nlinfo.nlh &&
2976             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2977                 table = fib6_get_table(net, cfg->fc_table);
2978                 if (!table) {
2979                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2980                         table = fib6_new_table(net, cfg->fc_table);
2981                 }
2982         } else {
2983                 table = fib6_new_table(net, cfg->fc_table);
2984         }
2985
2986         if (!table)
2987                 goto out;
2988
2989         err = -ENOMEM;
2990         rt = fib6_info_alloc(gfp_flags);
2991         if (!rt)
2992                 goto out;
2993
2994         rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
2995                                                extack);
2996         if (IS_ERR(rt->fib6_metrics)) {
2997                 err = PTR_ERR(rt->fib6_metrics);
2998                 /* Do not leave garbage there. */
2999                 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3000                 goto out;
3001         }
3002
3003         if (cfg->fc_flags & RTF_ADDRCONF)
3004                 rt->dst_nocount = true;
3005
3006         if (cfg->fc_flags & RTF_EXPIRES)
3007                 fib6_set_expires(rt, jiffies +
3008                                 clock_t_to_jiffies(cfg->fc_expires));
3009         else
3010                 fib6_clean_expires(rt);
3011
3012         if (cfg->fc_protocol == RTPROT_UNSPEC)
3013                 cfg->fc_protocol = RTPROT_BOOT;
3014         rt->fib6_protocol = cfg->fc_protocol;
3015
3016         addr_type = ipv6_addr_type(&cfg->fc_dst);
3017
3018         if (cfg->fc_encap) {
3019                 struct lwtunnel_state *lwtstate;
3020
3021                 err = lwtunnel_build_state(cfg->fc_encap_type,
3022                                            cfg->fc_encap, AF_INET6, cfg,
3023                                            &lwtstate, extack);
3024                 if (err)
3025                         goto out;
3026                 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
3027         }
3028
3029         ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3030         rt->fib6_dst.plen = cfg->fc_dst_len;
3031         if (rt->fib6_dst.plen == 128)
3032                 rt->dst_host = true;
3033
3034 #ifdef CONFIG_IPV6_SUBTREES
3035         ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3036         rt->fib6_src.plen = cfg->fc_src_len;
3037 #endif
3038
3039         rt->fib6_metric = cfg->fc_metric;
3040         rt->fib6_nh.nh_weight = 1;
3041
3042         rt->fib6_type = cfg->fc_type;
3043
3044         /* We cannot add true routes via loopback here,
3045            they would result in kernel looping; promote them to reject routes
3046          */
3047         if ((cfg->fc_flags & RTF_REJECT) ||
3048             (dev && (dev->flags & IFF_LOOPBACK) &&
3049              !(addr_type & IPV6_ADDR_LOOPBACK) &&
3050              !(cfg->fc_flags & RTF_LOCAL))) {
3051                 /* hold loopback dev/idev if we haven't done so. */
3052                 if (dev != net->loopback_dev) {
3053                         if (dev) {
3054                                 dev_put(dev);
3055                                 in6_dev_put(idev);
3056                         }
3057                         dev = net->loopback_dev;
3058                         dev_hold(dev);
3059                         idev = in6_dev_get(dev);
3060                         if (!idev) {
3061                                 err = -ENODEV;
3062                                 goto out;
3063                         }
3064                 }
3065                 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
3066                 goto install_route;
3067         }
3068
3069         if (cfg->fc_flags & RTF_GATEWAY) {
3070                 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3071                 if (err)
3072                         goto out;
3073
3074                 rt->fib6_nh.nh_gw = cfg->fc_gateway;
3075         }
3076
3077         err = -ENODEV;
3078         if (!dev)
3079                 goto out;
3080
3081         if (idev->cnf.disable_ipv6) {
3082                 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3083                 err = -EACCES;
3084                 goto out;
3085         }
3086
3087         if (!(dev->flags & IFF_UP)) {
3088                 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3089                 err = -ENETDOWN;
3090                 goto out;
3091         }
3092
3093         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3094                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3095                         NL_SET_ERR_MSG(extack, "Invalid source address");
3096                         err = -EINVAL;
3097                         goto out;
3098                 }
3099                 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3100                 rt->fib6_prefsrc.plen = 128;
3101         } else
3102                 rt->fib6_prefsrc.plen = 0;
3103
3104         rt->fib6_flags = cfg->fc_flags;
3105
3106 install_route:
3107         if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3108             !netif_carrier_ok(dev))
3109                 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3110         rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3111         rt->fib6_nh.nh_dev = dev;
3112         rt->fib6_table = table;
3113
3114         if (idev)
3115                 in6_dev_put(idev);
3116
3117         return rt;
3118 out:
3119         if (dev)
3120                 dev_put(dev);
3121         if (idev)
3122                 in6_dev_put(idev);
3123
3124         fib6_info_release(rt);
3125         return ERR_PTR(err);
3126 }
3127
3128 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3129                   struct netlink_ext_ack *extack)
3130 {
3131         struct fib6_info *rt;
3132         int err;
3133
3134         rt = ip6_route_info_create(cfg, gfp_flags, extack);
3135         if (IS_ERR(rt))
3136                 return PTR_ERR(rt);
3137
3138         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3139         fib6_info_release(rt);
3140
3141         return err;
3142 }
3143
3144 static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3145 {
3146         struct net *net = info->nl_net;
3147         struct fib6_table *table;
3148         int err;
3149
3150         if (rt == net->ipv6.fib6_null_entry) {
3151                 err = -ENOENT;
3152                 goto out;
3153         }
3154
3155         table = rt->fib6_table;
3156         spin_lock_bh(&table->tb6_lock);
3157         err = fib6_del(rt, info);
3158         spin_unlock_bh(&table->tb6_lock);
3159
3160 out:
3161         fib6_info_release(rt);
3162         return err;
3163 }
3164
3165 int ip6_del_rt(struct net *net, struct fib6_info *rt)
3166 {
3167         struct nl_info info = { .nl_net = net };
3168
3169         return __ip6_del_rt(rt, &info);
3170 }
3171
3172 static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3173 {
3174         struct nl_info *info = &cfg->fc_nlinfo;
3175         struct net *net = info->nl_net;
3176         struct sk_buff *skb = NULL;
3177         struct fib6_table *table;
3178         int err = -ENOENT;
3179
3180         if (rt == net->ipv6.fib6_null_entry)
3181                 goto out_put;
3182         table = rt->fib6_table;
3183         spin_lock_bh(&table->tb6_lock);
3184
3185         if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3186                 struct fib6_info *sibling, *next_sibling;
3187
3188                 /* prefer to send a single notification with all hops */
3189                 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3190                 if (skb) {
3191                         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3192
3193                         if (rt6_fill_node(net, skb, rt, NULL,
3194                                           NULL, NULL, 0, RTM_DELROUTE,
3195                                           info->portid, seq, 0) < 0) {
3196                                 kfree_skb(skb);
3197                                 skb = NULL;
3198                         } else
3199                                 info->skip_notify = 1;
3200                 }
3201
3202                 list_for_each_entry_safe(sibling, next_sibling,
3203                                          &rt->fib6_siblings,
3204                                          fib6_siblings) {
3205                         err = fib6_del(sibling, info);
3206                         if (err)
3207                                 goto out_unlock;
3208                 }
3209         }
3210
3211         err = fib6_del(rt, info);
3212 out_unlock:
3213         spin_unlock_bh(&table->tb6_lock);
3214 out_put:
3215         fib6_info_release(rt);
3216
3217         if (skb) {
3218                 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3219                             info->nlh, gfp_any());
3220         }
3221         return err;
3222 }
3223
3224 static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3225 {
3226         int rc = -ESRCH;
3227
3228         if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3229                 goto out;
3230
3231         if (cfg->fc_flags & RTF_GATEWAY &&
3232             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3233                 goto out;
3234
3235         rc = rt6_remove_exception_rt(rt);
3236 out:
3237         return rc;
3238 }
3239
3240 static int ip6_route_del(struct fib6_config *cfg,
3241                          struct netlink_ext_ack *extack)
3242 {
3243         struct rt6_info *rt_cache;
3244         struct fib6_table *table;
3245         struct fib6_info *rt;
3246         struct fib6_node *fn;
3247         int err = -ESRCH;
3248
3249         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3250         if (!table) {
3251                 NL_SET_ERR_MSG(extack, "FIB table does not exist");
3252                 return err;
3253         }
3254
3255         rcu_read_lock();
3256
3257         fn = fib6_locate(&table->tb6_root,
3258                          &cfg->fc_dst, cfg->fc_dst_len,
3259                          &cfg->fc_src, cfg->fc_src_len,
3260                          !(cfg->fc_flags & RTF_CACHE));
3261
3262         if (fn) {
3263                 for_each_fib6_node_rt_rcu(fn) {
3264                         if (cfg->fc_flags & RTF_CACHE) {
3265                                 int rc;
3266
3267                                 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3268                                                               &cfg->fc_src);
3269                                 if (rt_cache) {
3270                                         rc = ip6_del_cached_rt(rt_cache, cfg);
3271                                         if (rc != -ESRCH) {
3272                                                 rcu_read_unlock();
3273                                                 return rc;
3274                                         }
3275                                 }
3276                                 continue;
3277                         }
3278                         if (cfg->fc_ifindex &&
3279                             (!rt->fib6_nh.nh_dev ||
3280                              rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
3281                                 continue;
3282                         if (cfg->fc_flags & RTF_GATEWAY &&
3283                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
3284                                 continue;
3285                         if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3286                                 continue;
3287                         if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3288                                 continue;
3289                         if (!fib6_info_hold_safe(rt))
3290                                 continue;
3291                         rcu_read_unlock();
3292
3293                         /* if gateway was specified only delete the one hop */
3294                         if (cfg->fc_flags & RTF_GATEWAY)
3295                                 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3296
3297                         return __ip6_del_rt_siblings(rt, cfg);
3298                 }
3299         }
3300         rcu_read_unlock();
3301
3302         return err;
3303 }
3304
3305 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3306 {
3307         struct netevent_redirect netevent;
3308         struct rt6_info *rt, *nrt = NULL;
3309         struct ndisc_options ndopts;
3310         struct inet6_dev *in6_dev;
3311         struct neighbour *neigh;
3312         struct fib6_info *from;
3313         struct rd_msg *msg;
3314         int optlen, on_link;
3315         u8 *lladdr;
3316
3317         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3318         optlen -= sizeof(*msg);
3319
3320         if (optlen < 0) {
3321                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3322                 return;
3323         }
3324
3325         msg = (struct rd_msg *)icmp6_hdr(skb);
3326
3327         if (ipv6_addr_is_multicast(&msg->dest)) {
3328                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3329                 return;
3330         }
3331
3332         on_link = 0;
3333         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3334                 on_link = 1;
3335         } else if (ipv6_addr_type(&msg->target) !=
3336                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3337                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3338                 return;
3339         }
3340
3341         in6_dev = __in6_dev_get(skb->dev);
3342         if (!in6_dev)
3343                 return;
3344         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3345                 return;
3346
3347         /* RFC2461 8.1:
3348          *      The IP source address of the Redirect MUST be the same as the current
3349          *      first-hop router for the specified ICMP Destination Address.
3350          */
3351
3352         if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3353                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3354                 return;
3355         }
3356
3357         lladdr = NULL;
3358         if (ndopts.nd_opts_tgt_lladdr) {
3359                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3360                                              skb->dev);
3361                 if (!lladdr) {
3362                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3363                         return;
3364                 }
3365         }
3366
3367         rt = (struct rt6_info *) dst;
3368         if (rt->rt6i_flags & RTF_REJECT) {
3369                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3370                 return;
3371         }
3372
3373         /* Redirect received -> path was valid.
3374          * Look, redirects are sent only in response to data packets,
3375          * so that this nexthop apparently is reachable. --ANK
3376          */
3377         dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3378
3379         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3380         if (!neigh)
3381                 return;
3382
3383         /*
3384          *      We have finally decided to accept it.
3385          */
3386
3387         ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3388                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
3389                      NEIGH_UPDATE_F_OVERRIDE|
3390                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3391                                      NEIGH_UPDATE_F_ISROUTER)),
3392                      NDISC_REDIRECT, &ndopts);
3393
3394         rcu_read_lock();
3395         from = rcu_dereference(rt->from);
3396         /* This fib6_info_hold() is safe here because we hold reference to rt
3397          * and rt already holds reference to fib6_info.
3398          */
3399         fib6_info_hold(from);
3400         rcu_read_unlock();
3401
3402         nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
3403         if (!nrt)
3404                 goto out;
3405
3406         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3407         if (on_link)
3408                 nrt->rt6i_flags &= ~RTF_GATEWAY;
3409
3410         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3411
3412         /* No need to remove rt from the exception table if rt is
3413          * a cached route because rt6_insert_exception() will
3414          * takes care of it
3415          */
3416         if (rt6_insert_exception(nrt, from)) {
3417                 dst_release_immediate(&nrt->dst);
3418                 goto out;
3419         }
3420
3421         netevent.old = &rt->dst;
3422         netevent.new = &nrt->dst;
3423         netevent.daddr = &msg->dest;
3424         netevent.neigh = neigh;
3425         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3426
3427 out:
3428         fib6_info_release(from);
3429         neigh_release(neigh);
3430 }
3431
3432 #ifdef CONFIG_IPV6_ROUTE_INFO
3433 static struct fib6_info *rt6_get_route_info(struct net *net,
3434                                            const struct in6_addr *prefix, int prefixlen,
3435                                            const struct in6_addr *gwaddr,
3436                                            struct net_device *dev)
3437 {
3438         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3439         int ifindex = dev->ifindex;
3440         struct fib6_node *fn;
3441         struct fib6_info *rt = NULL;
3442         struct fib6_table *table;
3443
3444         table = fib6_get_table(net, tb_id);
3445         if (!table)
3446                 return NULL;
3447
3448         rcu_read_lock();
3449         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3450         if (!fn)
3451                 goto out;
3452
3453         for_each_fib6_node_rt_rcu(fn) {
3454                 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
3455                         continue;
3456                 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3457                         continue;
3458                 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
3459                         continue;
3460                 if (!fib6_info_hold_safe(rt))
3461                         continue;
3462                 break;
3463         }
3464 out:
3465         rcu_read_unlock();
3466         return rt;
3467 }
3468
3469 static struct fib6_info *rt6_add_route_info(struct net *net,
3470                                            const struct in6_addr *prefix, int prefixlen,
3471                                            const struct in6_addr *gwaddr,
3472                                            struct net_device *dev,
3473                                            unsigned int pref)
3474 {
3475         struct fib6_config cfg = {
3476                 .fc_metric      = IP6_RT_PRIO_USER,
3477                 .fc_ifindex     = dev->ifindex,
3478                 .fc_dst_len     = prefixlen,
3479                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3480                                   RTF_UP | RTF_PREF(pref),
3481                 .fc_protocol = RTPROT_RA,
3482                 .fc_type = RTN_UNICAST,
3483                 .fc_nlinfo.portid = 0,
3484                 .fc_nlinfo.nlh = NULL,
3485                 .fc_nlinfo.nl_net = net,
3486         };
3487
3488         cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3489         cfg.fc_dst = *prefix;
3490         cfg.fc_gateway = *gwaddr;
3491
3492         /* We should treat it as a default route if prefix length is 0. */
3493         if (!prefixlen)
3494                 cfg.fc_flags |= RTF_DEFAULT;
3495
3496         ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3497
3498         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3499 }
3500 #endif
3501
3502 struct fib6_info *rt6_get_dflt_router(struct net *net,
3503                                      const struct in6_addr *addr,
3504                                      struct net_device *dev)
3505 {
3506         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3507         struct fib6_info *rt;
3508         struct fib6_table *table;
3509
3510         table = fib6_get_table(net, tb_id);
3511         if (!table)
3512                 return NULL;
3513
3514         rcu_read_lock();
3515         for_each_fib6_node_rt_rcu(&table->tb6_root) {
3516                 if (dev == rt->fib6_nh.nh_dev &&
3517                     ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3518                     ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
3519                         break;
3520         }
3521         if (rt && !fib6_info_hold_safe(rt))
3522                 rt = NULL;
3523         rcu_read_unlock();
3524         return rt;
3525 }
3526
3527 struct fib6_info *rt6_add_dflt_router(struct net *net,
3528                                      const struct in6_addr *gwaddr,
3529                                      struct net_device *dev,
3530                                      unsigned int pref)
3531 {
3532         struct fib6_config cfg = {
3533                 .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3534                 .fc_metric      = IP6_RT_PRIO_USER,
3535                 .fc_ifindex     = dev->ifindex,
3536                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3537                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3538                 .fc_protocol = RTPROT_RA,
3539                 .fc_type = RTN_UNICAST,
3540                 .fc_nlinfo.portid = 0,
3541                 .fc_nlinfo.nlh = NULL,
3542                 .fc_nlinfo.nl_net = net,
3543         };
3544
3545         cfg.fc_gateway = *gwaddr;
3546
3547         if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3548                 struct fib6_table *table;
3549
3550                 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3551                 if (table)
3552                         table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3553         }
3554
3555         return rt6_get_dflt_router(net, gwaddr, dev);
3556 }
3557
3558 static void __rt6_purge_dflt_routers(struct net *net,
3559                                      struct fib6_table *table)
3560 {
3561         struct fib6_info *rt;
3562
3563 restart:
3564         rcu_read_lock();
3565         for_each_fib6_node_rt_rcu(&table->tb6_root) {
3566                 struct net_device *dev = fib6_info_nh_dev(rt);
3567                 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3568
3569                 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3570                     (!idev || idev->cnf.accept_ra != 2) &&
3571                     fib6_info_hold_safe(rt)) {
3572                         rcu_read_unlock();
3573                         ip6_del_rt(net, rt);
3574                         goto restart;
3575                 }
3576         }
3577         rcu_read_unlock();
3578
3579         table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3580 }
3581
3582 void rt6_purge_dflt_routers(struct net *net)
3583 {
3584         struct fib6_table *table;
3585         struct hlist_head *head;
3586         unsigned int h;
3587
3588         rcu_read_lock();
3589
3590         for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3591                 head = &net->ipv6.fib_table_hash[h];
3592                 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3593                         if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3594                                 __rt6_purge_dflt_routers(net, table);
3595                 }
3596         }
3597
3598         rcu_read_unlock();
3599 }
3600
3601 static void rtmsg_to_fib6_config(struct net *net,
3602                                  struct in6_rtmsg *rtmsg,
3603                                  struct fib6_config *cfg)
3604 {
3605         *cfg = (struct fib6_config){
3606                 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3607                          : RT6_TABLE_MAIN,
3608                 .fc_ifindex = rtmsg->rtmsg_ifindex,
3609                 .fc_metric = rtmsg->rtmsg_metric,
3610                 .fc_expires = rtmsg->rtmsg_info,
3611                 .fc_dst_len = rtmsg->rtmsg_dst_len,
3612                 .fc_src_len = rtmsg->rtmsg_src_len,
3613                 .fc_flags = rtmsg->rtmsg_flags,
3614                 .fc_type = rtmsg->rtmsg_type,
3615
3616                 .fc_nlinfo.nl_net = net,
3617
3618                 .fc_dst = rtmsg->rtmsg_dst,
3619                 .fc_src = rtmsg->rtmsg_src,
3620                 .fc_gateway = rtmsg->rtmsg_gateway,
3621         };
3622 }
3623
3624 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3625 {
3626         struct fib6_config cfg;
3627         struct in6_rtmsg rtmsg;
3628         int err;
3629
3630         switch (cmd) {
3631         case SIOCADDRT:         /* Add a route */
3632         case SIOCDELRT:         /* Delete a route */
3633                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3634                         return -EPERM;
3635                 err = copy_from_user(&rtmsg, arg,
3636                                      sizeof(struct in6_rtmsg));
3637                 if (err)
3638                         return -EFAULT;
3639
3640                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3641
3642                 rtnl_lock();
3643                 switch (cmd) {
3644                 case SIOCADDRT:
3645                         err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3646                         break;
3647                 case SIOCDELRT:
3648                         err = ip6_route_del(&cfg, NULL);
3649                         break;
3650                 default:
3651                         err = -EINVAL;
3652                 }
3653                 rtnl_unlock();
3654
3655                 return err;
3656         }
3657
3658         return -EINVAL;
3659 }
3660
3661 /*
3662  *      Drop the packet on the floor
3663  */
3664
3665 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3666 {
3667         int type;
3668         struct dst_entry *dst = skb_dst(skb);
3669         switch (ipstats_mib_noroutes) {
3670         case IPSTATS_MIB_INNOROUTES:
3671                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3672                 if (type == IPV6_ADDR_ANY) {
3673                         IP6_INC_STATS(dev_net(dst->dev),
3674                                       __in6_dev_get_safely(skb->dev),
3675                                       IPSTATS_MIB_INADDRERRORS);
3676                         break;
3677                 }
3678                 /* FALLTHROUGH */
3679         case IPSTATS_MIB_OUTNOROUTES:
3680                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3681                               ipstats_mib_noroutes);
3682                 break;
3683         }
3684         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3685         kfree_skb(skb);
3686         return 0;
3687 }
3688
3689 static int ip6_pkt_discard(struct sk_buff *skb)
3690 {
3691         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3692 }
3693
3694 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3695 {
3696         skb->dev = skb_dst(skb)->dev;
3697         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3698 }
3699
3700 static int ip6_pkt_prohibit(struct sk_buff *skb)
3701 {
3702         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3703 }
3704
3705 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3706 {
3707         skb->dev = skb_dst(skb)->dev;
3708         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3709 }
3710
3711 /*
3712  *      Allocate a dst for local (unicast / anycast) address.
3713  */
3714
3715 struct fib6_info *addrconf_f6i_alloc(struct net *net,
3716                                      struct inet6_dev *idev,
3717                                      const struct in6_addr *addr,
3718                                      bool anycast, gfp_t gfp_flags)
3719 {
3720         u32 tb_id;
3721         struct net_device *dev = idev->dev;
3722         struct fib6_info *f6i;
3723
3724         f6i = fib6_info_alloc(gfp_flags);
3725         if (!f6i)
3726                 return ERR_PTR(-ENOMEM);
3727
3728         f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0, NULL);
3729         f6i->dst_nocount = true;
3730         f6i->dst_host = true;
3731         f6i->fib6_protocol = RTPROT_KERNEL;
3732         f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
3733         if (anycast) {
3734                 f6i->fib6_type = RTN_ANYCAST;
3735                 f6i->fib6_flags |= RTF_ANYCAST;
3736         } else {
3737                 f6i->fib6_type = RTN_LOCAL;
3738                 f6i->fib6_flags |= RTF_LOCAL;
3739         }
3740
3741         f6i->fib6_nh.nh_gw = *addr;
3742         dev_hold(dev);
3743         f6i->fib6_nh.nh_dev = dev;
3744         f6i->fib6_dst.addr = *addr;
3745         f6i->fib6_dst.plen = 128;
3746         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3747         f6i->fib6_table = fib6_get_table(net, tb_id);
3748
3749         return f6i;
3750 }
3751
3752 /* remove deleted ip from prefsrc entries */
3753 struct arg_dev_net_ip {
3754         struct net_device *dev;
3755         struct net *net;
3756         struct in6_addr *addr;
3757 };
3758
3759 static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3760 {
3761         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3762         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3763         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3764
3765         if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3766             rt != net->ipv6.fib6_null_entry &&
3767             ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3768                 spin_lock_bh(&rt6_exception_lock);
3769                 /* remove prefsrc entry */
3770                 rt->fib6_prefsrc.plen = 0;
3771                 spin_unlock_bh(&rt6_exception_lock);
3772         }
3773         return 0;
3774 }
3775
3776 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3777 {
3778         struct net *net = dev_net(ifp->idev->dev);
3779         struct arg_dev_net_ip adni = {
3780                 .dev = ifp->idev->dev,
3781                 .net = net,
3782                 .addr = &ifp->addr,
3783         };
3784         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3785 }
3786
3787 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3788
3789 /* Remove routers and update dst entries when gateway turn into host. */
3790 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3791 {
3792         struct in6_addr *gateway = (struct in6_addr *)arg;
3793
3794         if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3795             ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3796                 return -1;
3797         }
3798
3799         /* Further clean up cached routes in exception table.
3800          * This is needed because cached route may have a different
3801          * gateway than its 'parent' in the case of an ip redirect.
3802          */
3803         rt6_exceptions_clean_tohost(rt, gateway);
3804
3805         return 0;
3806 }
3807
3808 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3809 {
3810         fib6_clean_all(net, fib6_clean_tohost, gateway);
3811 }
3812
3813 struct arg_netdev_event {
3814         const struct net_device *dev;
3815         union {
3816                 unsigned int nh_flags;
3817                 unsigned long event;
3818         };
3819 };
3820
3821 static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3822 {
3823         struct fib6_info *iter;
3824         struct fib6_node *fn;
3825
3826         fn = rcu_dereference_protected(rt->fib6_node,
3827                         lockdep_is_held(&rt->fib6_table->tb6_lock));
3828         iter = rcu_dereference_protected(fn->leaf,
3829                         lockdep_is_held(&rt->fib6_table->tb6_lock));
3830         while (iter) {
3831                 if (iter->fib6_metric == rt->fib6_metric &&
3832                     rt6_qualify_for_ecmp(iter))
3833                         return iter;
3834                 iter = rcu_dereference_protected(iter->fib6_next,
3835                                 lockdep_is_held(&rt->fib6_table->tb6_lock));
3836         }
3837
3838         return NULL;
3839 }
3840
3841 static bool rt6_is_dead(const struct fib6_info *rt)
3842 {
3843         if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3844             (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
3845              fib6_ignore_linkdown(rt)))
3846                 return true;
3847
3848         return false;
3849 }
3850
3851 static int rt6_multipath_total_weight(const struct fib6_info *rt)
3852 {
3853         struct fib6_info *iter;
3854         int total = 0;
3855
3856         if (!rt6_is_dead(rt))
3857                 total += rt->fib6_nh.nh_weight;
3858
3859         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3860                 if (!rt6_is_dead(iter))
3861                         total += iter->fib6_nh.nh_weight;
3862         }
3863
3864         return total;
3865 }
3866
3867 static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3868 {
3869         int upper_bound = -1;
3870
3871         if (!rt6_is_dead(rt)) {
3872                 *weight += rt->fib6_nh.nh_weight;
3873                 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3874                                                     total) - 1;
3875         }
3876         atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3877 }
3878
3879 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3880 {
3881         struct fib6_info *iter;
3882         int weight = 0;
3883
3884         rt6_upper_bound_set(rt, &weight, total);
3885
3886         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3887                 rt6_upper_bound_set(iter, &weight, total);
3888 }
3889
3890 void rt6_multipath_rebalance(struct fib6_info *rt)
3891 {
3892         struct fib6_info *first;
3893         int total;
3894
3895         /* In case the entire multipath route was marked for flushing,
3896          * then there is no need to rebalance upon the removal of every
3897          * sibling route.
3898          */
3899         if (!rt->fib6_nsiblings || rt->should_flush)
3900                 return;
3901
3902         /* During lookup routes are evaluated in order, so we need to
3903          * make sure upper bounds are assigned from the first sibling
3904          * onwards.
3905          */
3906         first = rt6_multipath_first_sibling(rt);
3907         if (WARN_ON_ONCE(!first))
3908                 return;
3909
3910         total = rt6_multipath_total_weight(first);
3911         rt6_multipath_upper_bound_set(first, total);
3912 }
3913
3914 static int fib6_ifup(struct fib6_info *rt, void *p_arg)
3915 {
3916         const struct arg_netdev_event *arg = p_arg;
3917         struct net *net = dev_net(arg->dev);
3918
3919         if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
3920                 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
3921                 fib6_update_sernum_upto_root(net, rt);
3922                 rt6_multipath_rebalance(rt);
3923         }
3924
3925         return 0;
3926 }
3927
3928 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3929 {
3930         struct arg_netdev_event arg = {
3931                 .dev = dev,
3932                 {
3933                         .nh_flags = nh_flags,
3934                 },
3935         };
3936
3937         if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3938                 arg.nh_flags |= RTNH_F_LINKDOWN;
3939
3940         fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3941 }
3942
3943 static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
3944                                    const struct net_device *dev)
3945 {
3946         struct fib6_info *iter;
3947
3948         if (rt->fib6_nh.nh_dev == dev)
3949                 return true;
3950         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3951                 if (iter->fib6_nh.nh_dev == dev)
3952                         return true;
3953
3954         return false;
3955 }
3956
3957 static void rt6_multipath_flush(struct fib6_info *rt)
3958 {
3959         struct fib6_info *iter;
3960
3961         rt->should_flush = 1;
3962         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3963                 iter->should_flush = 1;
3964 }
3965
3966 static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
3967                                              const struct net_device *down_dev)
3968 {
3969         struct fib6_info *iter;
3970         unsigned int dead = 0;
3971
3972         if (rt->fib6_nh.nh_dev == down_dev ||
3973             rt->fib6_nh.nh_flags & RTNH_F_DEAD)
3974                 dead++;
3975         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3976                 if (iter->fib6_nh.nh_dev == down_dev ||
3977                     iter->fib6_nh.nh_flags & RTNH_F_DEAD)
3978                         dead++;
3979
3980         return dead;
3981 }
3982
3983 static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
3984                                        const struct net_device *dev,
3985                                        unsigned int nh_flags)
3986 {
3987         struct fib6_info *iter;
3988
3989         if (rt->fib6_nh.nh_dev == dev)
3990                 rt->fib6_nh.nh_flags |= nh_flags;
3991         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3992                 if (iter->fib6_nh.nh_dev == dev)
3993                         iter->fib6_nh.nh_flags |= nh_flags;
3994 }
3995
3996 /* called with write lock held for table with rt */
3997 static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
3998 {
3999         const struct arg_netdev_event *arg = p_arg;
4000         const struct net_device *dev = arg->dev;
4001         struct net *net = dev_net(dev);
4002
4003         if (rt == net->ipv6.fib6_null_entry)
4004                 return 0;
4005
4006         switch (arg->event) {
4007         case NETDEV_UNREGISTER:
4008                 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
4009         case NETDEV_DOWN:
4010                 if (rt->should_flush)
4011                         return -1;
4012                 if (!rt->fib6_nsiblings)
4013                         return rt->fib6_nh.nh_dev == dev ? -1 : 0;
4014                 if (rt6_multipath_uses_dev(rt, dev)) {
4015                         unsigned int count;
4016
4017                         count = rt6_multipath_dead_count(rt, dev);
4018                         if (rt->fib6_nsiblings + 1 == count) {
4019                                 rt6_multipath_flush(rt);
4020                                 return -1;
4021                         }
4022                         rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4023                                                    RTNH_F_LINKDOWN);
4024                         fib6_update_sernum(net, rt);
4025                         rt6_multipath_rebalance(rt);
4026                 }
4027                 return -2;
4028         case NETDEV_CHANGE:
4029                 if (rt->fib6_nh.nh_dev != dev ||
4030                     rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4031                         break;
4032                 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
4033                 rt6_multipath_rebalance(rt);
4034                 break;
4035         }
4036
4037         return 0;
4038 }
4039
4040 void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4041 {
4042         struct arg_netdev_event arg = {
4043                 .dev = dev,
4044                 {
4045                         .event = event,
4046                 },
4047         };
4048         struct net *net = dev_net(dev);
4049
4050         if (net->ipv6.sysctl.skip_notify_on_dev_down)
4051                 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4052         else
4053                 fib6_clean_all(net, fib6_ifdown, &arg);
4054 }
4055
4056 void rt6_disable_ip(struct net_device *dev, unsigned long event)
4057 {
4058         rt6_sync_down_dev(dev, event);
4059         rt6_uncached_list_flush_dev(dev_net(dev), dev);
4060         neigh_ifdown(&nd_tbl, dev);
4061 }
4062
4063 struct rt6_mtu_change_arg {
4064         struct net_device *dev;
4065         unsigned int mtu;
4066 };
4067
4068 static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4069 {
4070         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4071         struct inet6_dev *idev;
4072
4073         /* In IPv6 pmtu discovery is not optional,
4074            so that RTAX_MTU lock cannot disable it.
4075            We still use this lock to block changes
4076            caused by addrconf/ndisc.
4077         */
4078
4079         idev = __in6_dev_get(arg->dev);
4080         if (!idev)
4081                 return 0;
4082
4083         /* For administrative MTU increase, there is no way to discover
4084            IPv6 PMTU increase, so PMTU increase should be updated here.
4085            Since RFC 1981 doesn't include administrative MTU increase
4086            update PMTU increase is a MUST. (i.e. jumbo frame)
4087          */
4088         if (rt->fib6_nh.nh_dev == arg->dev &&
4089             !fib6_metric_locked(rt, RTAX_MTU)) {
4090                 u32 mtu = rt->fib6_pmtu;
4091
4092                 if (mtu >= arg->mtu ||
4093                     (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4094                         fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4095
4096                 spin_lock_bh(&rt6_exception_lock);
4097                 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4098                 spin_unlock_bh(&rt6_exception_lock);
4099         }
4100         return 0;
4101 }
4102
4103 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4104 {
4105         struct rt6_mtu_change_arg arg = {
4106                 .dev = dev,
4107                 .mtu = mtu,
4108         };
4109
4110         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4111 }
4112
4113 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4114         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4115         [RTA_PREFSRC]           = { .len = sizeof(struct in6_addr) },
4116         [RTA_OIF]               = { .type = NLA_U32 },
4117         [RTA_IIF]               = { .type = NLA_U32 },
4118         [RTA_PRIORITY]          = { .type = NLA_U32 },
4119         [RTA_METRICS]           = { .type = NLA_NESTED },
4120         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
4121         [RTA_PREF]              = { .type = NLA_U8 },
4122         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
4123         [RTA_ENCAP]             = { .type = NLA_NESTED },
4124         [RTA_EXPIRES]           = { .type = NLA_U32 },
4125         [RTA_UID]               = { .type = NLA_U32 },
4126         [RTA_MARK]              = { .type = NLA_U32 },
4127         [RTA_TABLE]             = { .type = NLA_U32 },
4128         [RTA_IP_PROTO]          = { .type = NLA_U8 },
4129         [RTA_SPORT]             = { .type = NLA_U16 },
4130         [RTA_DPORT]             = { .type = NLA_U16 },
4131 };
4132
4133 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4134                               struct fib6_config *cfg,
4135                               struct netlink_ext_ack *extack)
4136 {
4137         struct rtmsg *rtm;
4138         struct nlattr *tb[RTA_MAX+1];
4139         unsigned int pref;
4140         int err;
4141
4142         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4143                           extack);
4144         if (err < 0)
4145                 goto errout;
4146
4147         err = -EINVAL;
4148         rtm = nlmsg_data(nlh);
4149
4150         *cfg = (struct fib6_config){
4151                 .fc_table = rtm->rtm_table,
4152                 .fc_dst_len = rtm->rtm_dst_len,
4153                 .fc_src_len = rtm->rtm_src_len,
4154                 .fc_flags = RTF_UP,
4155                 .fc_protocol = rtm->rtm_protocol,
4156                 .fc_type = rtm->rtm_type,
4157
4158                 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4159                 .fc_nlinfo.nlh = nlh,
4160                 .fc_nlinfo.nl_net = sock_net(skb->sk),
4161         };
4162
4163         if (rtm->rtm_type == RTN_UNREACHABLE ||
4164             rtm->rtm_type == RTN_BLACKHOLE ||
4165             rtm->rtm_type == RTN_PROHIBIT ||
4166             rtm->rtm_type == RTN_THROW)
4167                 cfg->fc_flags |= RTF_REJECT;
4168
4169         if (rtm->rtm_type == RTN_LOCAL)
4170                 cfg->fc_flags |= RTF_LOCAL;
4171
4172         if (rtm->rtm_flags & RTM_F_CLONED)
4173                 cfg->fc_flags |= RTF_CACHE;
4174
4175         cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4176
4177         if (tb[RTA_GATEWAY]) {
4178                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4179                 cfg->fc_flags |= RTF_GATEWAY;
4180         }
4181         if (tb[RTA_VIA]) {
4182                 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4183                 goto errout;
4184         }
4185
4186         if (tb[RTA_DST]) {
4187                 int plen = (rtm->rtm_dst_len + 7) >> 3;
4188
4189                 if (nla_len(tb[RTA_DST]) < plen)
4190                         goto errout;
4191
4192                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4193         }
4194
4195         if (tb[RTA_SRC]) {
4196                 int plen = (rtm->rtm_src_len + 7) >> 3;
4197
4198                 if (nla_len(tb[RTA_SRC]) < plen)
4199                         goto errout;
4200
4201                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4202         }
4203
4204         if (tb[RTA_PREFSRC])
4205                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4206
4207         if (tb[RTA_OIF])
4208                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4209
4210         if (tb[RTA_PRIORITY])
4211                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4212
4213         if (tb[RTA_METRICS]) {
4214                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4215                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4216         }
4217
4218         if (tb[RTA_TABLE])
4219                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4220
4221         if (tb[RTA_MULTIPATH]) {
4222                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4223                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4224
4225                 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4226                                                      cfg->fc_mp_len, extack);
4227                 if (err < 0)
4228                         goto errout;
4229         }
4230
4231         if (tb[RTA_PREF]) {
4232                 pref = nla_get_u8(tb[RTA_PREF]);
4233                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4234                     pref != ICMPV6_ROUTER_PREF_HIGH)
4235                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
4236                 cfg->fc_flags |= RTF_PREF(pref);
4237         }
4238
4239         if (tb[RTA_ENCAP])
4240                 cfg->fc_encap = tb[RTA_ENCAP];
4241
4242         if (tb[RTA_ENCAP_TYPE]) {
4243                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4244
4245                 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4246                 if (err < 0)
4247                         goto errout;
4248         }
4249
4250         if (tb[RTA_EXPIRES]) {
4251                 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4252
4253                 if (addrconf_finite_timeout(timeout)) {
4254                         cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4255                         cfg->fc_flags |= RTF_EXPIRES;
4256                 }
4257         }
4258
4259         err = 0;
4260 errout:
4261         return err;
4262 }
4263
4264 struct rt6_nh {
4265         struct fib6_info *fib6_info;
4266         struct fib6_config r_cfg;
4267         struct list_head next;
4268 };
4269
4270 static int ip6_route_info_append(struct net *net,
4271                                  struct list_head *rt6_nh_list,
4272                                  struct fib6_info *rt,
4273                                  struct fib6_config *r_cfg)
4274 {
4275         struct rt6_nh *nh;
4276         int err = -EEXIST;
4277
4278         list_for_each_entry(nh, rt6_nh_list, next) {
4279                 /* check if fib6_info already exists */
4280                 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4281                         return err;
4282         }
4283
4284         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4285         if (!nh)
4286                 return -ENOMEM;
4287         nh->fib6_info = rt;
4288         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4289         list_add_tail(&nh->next, rt6_nh_list);
4290
4291         return 0;
4292 }
4293
4294 static void ip6_route_mpath_notify(struct fib6_info *rt,
4295                                    struct fib6_info *rt_last,
4296                                    struct nl_info *info,
4297                                    __u16 nlflags)
4298 {
4299         /* if this is an APPEND route, then rt points to the first route
4300          * inserted and rt_last points to last route inserted. Userspace
4301          * wants a consistent dump of the route which starts at the first
4302          * nexthop. Since sibling routes are always added at the end of
4303          * the list, find the first sibling of the last route appended
4304          */
4305         if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4306                 rt = list_first_entry(&rt_last->fib6_siblings,
4307                                       struct fib6_info,
4308                                       fib6_siblings);
4309         }
4310
4311         if (rt)
4312                 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4313 }
4314
4315 static int ip6_route_multipath_add(struct fib6_config *cfg,
4316                                    struct netlink_ext_ack *extack)
4317 {
4318         struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4319         struct nl_info *info = &cfg->fc_nlinfo;
4320         struct fib6_config r_cfg;
4321         struct rtnexthop *rtnh;
4322         struct fib6_info *rt;
4323         struct rt6_nh *err_nh;
4324         struct rt6_nh *nh, *nh_safe;
4325         __u16 nlflags;
4326         int remaining;
4327         int attrlen;
4328         int err = 1;
4329         int nhn = 0;
4330         int replace = (cfg->fc_nlinfo.nlh &&
4331                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4332         LIST_HEAD(rt6_nh_list);
4333
4334         nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4335         if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4336                 nlflags |= NLM_F_APPEND;
4337
4338         remaining = cfg->fc_mp_len;
4339         rtnh = (struct rtnexthop *)cfg->fc_mp;
4340
4341         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4342          * fib6_info structs per nexthop
4343          */
4344         while (rtnh_ok(rtnh, remaining)) {
4345                 memcpy(&r_cfg, cfg, sizeof(*cfg));
4346                 if (rtnh->rtnh_ifindex)
4347                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4348
4349                 attrlen = rtnh_attrlen(rtnh);
4350                 if (attrlen > 0) {
4351                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4352
4353                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4354                         if (nla) {
4355                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
4356                                 r_cfg.fc_flags |= RTF_GATEWAY;
4357                         }
4358                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4359                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4360                         if (nla)
4361                                 r_cfg.fc_encap_type = nla_get_u16(nla);
4362                 }
4363
4364                 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4365                 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4366                 if (IS_ERR(rt)) {
4367                         err = PTR_ERR(rt);
4368                         rt = NULL;
4369                         goto cleanup;
4370                 }
4371                 if (!rt6_qualify_for_ecmp(rt)) {
4372                         err = -EINVAL;
4373                         NL_SET_ERR_MSG(extack,
4374                                        "Device only routes can not be added for IPv6 using the multipath API.");
4375                         fib6_info_release(rt);
4376                         goto cleanup;
4377                 }
4378
4379                 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4380
4381                 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4382                                             rt, &r_cfg);
4383                 if (err) {
4384                         fib6_info_release(rt);
4385                         goto cleanup;
4386                 }
4387
4388                 rtnh = rtnh_next(rtnh, &remaining);
4389         }
4390
4391         /* for add and replace send one notification with all nexthops.
4392          * Skip the notification in fib6_add_rt2node and send one with
4393          * the full route when done
4394          */
4395         info->skip_notify = 1;
4396
4397         err_nh = NULL;
4398         list_for_each_entry(nh, &rt6_nh_list, next) {
4399                 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4400                 fib6_info_release(nh->fib6_info);
4401
4402                 if (!err) {
4403                         /* save reference to last route successfully inserted */
4404                         rt_last = nh->fib6_info;
4405
4406                         /* save reference to first route for notification */
4407                         if (!rt_notif)
4408                                 rt_notif = nh->fib6_info;
4409                 }
4410
4411                 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4412                 nh->fib6_info = NULL;
4413                 if (err) {
4414                         if (replace && nhn)
4415                                 NL_SET_ERR_MSG_MOD(extack,
4416                                                    "multipath route replace failed (check consistency of installed routes)");
4417                         err_nh = nh;
4418                         goto add_errout;
4419                 }
4420
4421                 /* Because each route is added like a single route we remove
4422                  * these flags after the first nexthop: if there is a collision,
4423                  * we have already failed to add the first nexthop:
4424                  * fib6_add_rt2node() has rejected it; when replacing, old
4425                  * nexthops have been replaced by first new, the rest should
4426                  * be added to it.
4427                  */
4428                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4429                                                      NLM_F_REPLACE);
4430                 nhn++;
4431         }
4432
4433         /* success ... tell user about new route */
4434         ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4435         goto cleanup;
4436
4437 add_errout:
4438         /* send notification for routes that were added so that
4439          * the delete notifications sent by ip6_route_del are
4440          * coherent
4441          */
4442         if (rt_notif)
4443                 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4444
4445         /* Delete routes that were already added */
4446         list_for_each_entry(nh, &rt6_nh_list, next) {
4447                 if (err_nh == nh)
4448                         break;
4449                 ip6_route_del(&nh->r_cfg, extack);
4450         }
4451
4452 cleanup:
4453         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4454                 if (nh->fib6_info)
4455                         fib6_info_release(nh->fib6_info);
4456                 list_del(&nh->next);
4457                 kfree(nh);
4458         }
4459
4460         return err;
4461 }
4462
4463 static int ip6_route_multipath_del(struct fib6_config *cfg,
4464                                    struct netlink_ext_ack *extack)
4465 {
4466         struct fib6_config r_cfg;
4467         struct rtnexthop *rtnh;
4468         int remaining;
4469         int attrlen;
4470         int err = 1, last_err = 0;
4471
4472         remaining = cfg->fc_mp_len;
4473         rtnh = (struct rtnexthop *)cfg->fc_mp;
4474
4475         /* Parse a Multipath Entry */
4476         while (rtnh_ok(rtnh, remaining)) {
4477                 memcpy(&r_cfg, cfg, sizeof(*cfg));
4478                 if (rtnh->rtnh_ifindex)
4479                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4480
4481                 attrlen = rtnh_attrlen(rtnh);
4482                 if (attrlen > 0) {
4483                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4484
4485                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4486                         if (nla) {
4487                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4488                                 r_cfg.fc_flags |= RTF_GATEWAY;
4489                         }
4490                 }
4491                 err = ip6_route_del(&r_cfg, extack);
4492                 if (err)
4493                         last_err = err;
4494
4495                 rtnh = rtnh_next(rtnh, &remaining);
4496         }
4497
4498         return last_err;
4499 }
4500
4501 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4502                               struct netlink_ext_ack *extack)
4503 {
4504         struct fib6_config cfg;
4505         int err;
4506
4507         err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4508         if (err < 0)
4509                 return err;
4510
4511         if (cfg.fc_mp)
4512                 return ip6_route_multipath_del(&cfg, extack);
4513         else {
4514                 cfg.fc_delete_all_nh = 1;
4515                 return ip6_route_del(&cfg, extack);
4516         }
4517 }
4518
4519 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4520                               struct netlink_ext_ack *extack)
4521 {
4522         struct fib6_config cfg;
4523         int err;
4524
4525         err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4526         if (err < 0)
4527                 return err;
4528
4529         if (cfg.fc_mp)
4530                 return ip6_route_multipath_add(&cfg, extack);
4531         else
4532                 return ip6_route_add(&cfg, GFP_KERNEL, extack);
4533 }
4534
4535 static size_t rt6_nlmsg_size(struct fib6_info *rt)
4536 {
4537         int nexthop_len = 0;
4538
4539         if (rt->fib6_nsiblings) {
4540                 nexthop_len = nla_total_size(0)  /* RTA_MULTIPATH */
4541                             + NLA_ALIGN(sizeof(struct rtnexthop))
4542                             + nla_total_size(16) /* RTA_GATEWAY */
4543                             + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4544
4545                 nexthop_len *= rt->fib6_nsiblings;
4546         }
4547
4548         return NLMSG_ALIGN(sizeof(struct rtmsg))
4549                + nla_total_size(16) /* RTA_SRC */
4550                + nla_total_size(16) /* RTA_DST */
4551                + nla_total_size(16) /* RTA_GATEWAY */
4552                + nla_total_size(16) /* RTA_PREFSRC */
4553                + nla_total_size(4) /* RTA_TABLE */
4554                + nla_total_size(4) /* RTA_IIF */
4555                + nla_total_size(4) /* RTA_OIF */
4556                + nla_total_size(4) /* RTA_PRIORITY */
4557                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4558                + nla_total_size(sizeof(struct rta_cacheinfo))
4559                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
4560                + nla_total_size(1) /* RTA_PREF */
4561                + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4562                + nexthop_len;
4563 }
4564
4565 static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
4566                             unsigned int *flags, bool skip_oif)
4567 {
4568         if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4569                 *flags |= RTNH_F_DEAD;
4570
4571         if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4572                 *flags |= RTNH_F_LINKDOWN;
4573
4574                 rcu_read_lock();
4575                 if (fib6_ignore_linkdown(rt))
4576                         *flags |= RTNH_F_DEAD;
4577                 rcu_read_unlock();
4578         }
4579
4580         if (rt->fib6_flags & RTF_GATEWAY) {
4581                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4582                         goto nla_put_failure;
4583         }
4584
4585         *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4586         if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
4587                 *flags |= RTNH_F_OFFLOAD;
4588
4589         /* not needed for multipath encoding b/c it has a rtnexthop struct */
4590         if (!skip_oif && rt->fib6_nh.nh_dev &&
4591             nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4592                 goto nla_put_failure;
4593
4594         if (rt->fib6_nh.nh_lwtstate &&
4595             lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4596                 goto nla_put_failure;
4597
4598         return 0;
4599
4600 nla_put_failure:
4601         return -EMSGSIZE;
4602 }
4603
4604 /* add multipath next hop */
4605 static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
4606 {
4607         const struct net_device *dev = rt->fib6_nh.nh_dev;
4608         struct rtnexthop *rtnh;
4609         unsigned int flags = 0;
4610
4611         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4612         if (!rtnh)
4613                 goto nla_put_failure;
4614
4615         rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4616         rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4617
4618         if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4619                 goto nla_put_failure;
4620
4621         rtnh->rtnh_flags = flags;
4622
4623         /* length of rtnetlink header + attributes */
4624         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4625
4626         return 0;
4627
4628 nla_put_failure:
4629         return -EMSGSIZE;
4630 }
4631
4632 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4633                          struct fib6_info *rt, struct dst_entry *dst,
4634                          struct in6_addr *dest, struct in6_addr *src,
4635                          int iif, int type, u32 portid, u32 seq,
4636                          unsigned int flags)
4637 {
4638         struct rt6_info *rt6 = (struct rt6_info *)dst;
4639         struct rt6key *rt6_dst, *rt6_src;
4640         u32 *pmetrics, table, rt6_flags;
4641         struct nlmsghdr *nlh;
4642         struct rtmsg *rtm;
4643         long expires = 0;
4644
4645         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4646         if (!nlh)
4647                 return -EMSGSIZE;
4648
4649         if (rt6) {
4650                 rt6_dst = &rt6->rt6i_dst;
4651                 rt6_src = &rt6->rt6i_src;
4652                 rt6_flags = rt6->rt6i_flags;
4653         } else {
4654                 rt6_dst = &rt->fib6_dst;
4655                 rt6_src = &rt->fib6_src;
4656                 rt6_flags = rt->fib6_flags;
4657         }
4658
4659         rtm = nlmsg_data(nlh);
4660         rtm->rtm_family = AF_INET6;
4661         rtm->rtm_dst_len = rt6_dst->plen;
4662         rtm->rtm_src_len = rt6_src->plen;
4663         rtm->rtm_tos = 0;
4664         if (rt->fib6_table)
4665                 table = rt->fib6_table->tb6_id;
4666         else
4667                 table = RT6_TABLE_UNSPEC;
4668         rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
4669         if (nla_put_u32(skb, RTA_TABLE, table))
4670                 goto nla_put_failure;
4671
4672         rtm->rtm_type = rt->fib6_type;
4673         rtm->rtm_flags = 0;
4674         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4675         rtm->rtm_protocol = rt->fib6_protocol;
4676
4677         if (rt6_flags & RTF_CACHE)
4678                 rtm->rtm_flags |= RTM_F_CLONED;
4679
4680         if (dest) {
4681                 if (nla_put_in6_addr(skb, RTA_DST, dest))
4682                         goto nla_put_failure;
4683                 rtm->rtm_dst_len = 128;
4684         } else if (rtm->rtm_dst_len)
4685                 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4686                         goto nla_put_failure;
4687 #ifdef CONFIG_IPV6_SUBTREES
4688         if (src) {
4689                 if (nla_put_in6_addr(skb, RTA_SRC, src))
4690                         goto nla_put_failure;
4691                 rtm->rtm_src_len = 128;
4692         } else if (rtm->rtm_src_len &&
4693                    nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4694                 goto nla_put_failure;
4695 #endif
4696         if (iif) {
4697 #ifdef CONFIG_IPV6_MROUTE
4698                 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4699                         int err = ip6mr_get_route(net, skb, rtm, portid);
4700
4701                         if (err == 0)
4702                                 return 0;
4703                         if (err < 0)
4704                                 goto nla_put_failure;
4705                 } else
4706 #endif
4707                         if (nla_put_u32(skb, RTA_IIF, iif))
4708                                 goto nla_put_failure;
4709         } else if (dest) {
4710                 struct in6_addr saddr_buf;
4711                 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4712                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4713                         goto nla_put_failure;
4714         }
4715
4716         if (rt->fib6_prefsrc.plen) {
4717                 struct in6_addr saddr_buf;
4718                 saddr_buf = rt->fib6_prefsrc.addr;
4719                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4720                         goto nla_put_failure;
4721         }
4722
4723         pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4724         if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4725                 goto nla_put_failure;
4726
4727         if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4728                 goto nla_put_failure;
4729
4730         /* For multipath routes, walk the siblings list and add
4731          * each as a nexthop within RTA_MULTIPATH.
4732          */
4733         if (rt6) {
4734                 if (rt6_flags & RTF_GATEWAY &&
4735                     nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4736                         goto nla_put_failure;
4737
4738                 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4739                         goto nla_put_failure;
4740         } else if (rt->fib6_nsiblings) {
4741                 struct fib6_info *sibling, *next_sibling;
4742                 struct nlattr *mp;
4743
4744                 mp = nla_nest_start(skb, RTA_MULTIPATH);
4745                 if (!mp)
4746                         goto nla_put_failure;
4747
4748                 if (rt6_add_nexthop(skb, rt) < 0)
4749                         goto nla_put_failure;
4750
4751                 list_for_each_entry_safe(sibling, next_sibling,
4752                                          &rt->fib6_siblings, fib6_siblings) {
4753                         if (rt6_add_nexthop(skb, sibling) < 0)
4754                                 goto nla_put_failure;
4755                 }
4756
4757                 nla_nest_end(skb, mp);
4758         } else {
4759                 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4760                         goto nla_put_failure;
4761         }
4762
4763         if (rt6_flags & RTF_EXPIRES) {
4764                 expires = dst ? dst->expires : rt->expires;
4765                 expires -= jiffies;
4766         }
4767
4768         if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4769                 goto nla_put_failure;
4770
4771         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4772                 goto nla_put_failure;
4773
4774
4775         nlmsg_end(skb, nlh);
4776         return 0;
4777
4778 nla_put_failure:
4779         nlmsg_cancel(skb, nlh);
4780         return -EMSGSIZE;
4781 }
4782
4783 static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4784                                const struct net_device *dev)
4785 {
4786         if (f6i->fib6_nh.nh_dev == dev)
4787                 return true;
4788
4789         if (f6i->fib6_nsiblings) {
4790                 struct fib6_info *sibling, *next_sibling;
4791
4792                 list_for_each_entry_safe(sibling, next_sibling,
4793                                          &f6i->fib6_siblings, fib6_siblings) {
4794                         if (sibling->fib6_nh.nh_dev == dev)
4795                                 return true;
4796                 }
4797         }
4798
4799         return false;
4800 }
4801
4802 int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4803 {
4804         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4805         struct fib_dump_filter *filter = &arg->filter;
4806         unsigned int flags = NLM_F_MULTI;
4807         struct net *net = arg->net;
4808
4809         if (rt == net->ipv6.fib6_null_entry)
4810                 return 0;
4811
4812         if ((filter->flags & RTM_F_PREFIX) &&
4813             !(rt->fib6_flags & RTF_PREFIX_RT)) {
4814                 /* success since this is not a prefix route */
4815                 return 1;
4816         }
4817         if (filter->filter_set) {
4818                 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4819                     (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4820                     (filter->protocol && rt->fib6_protocol != filter->protocol)) {
4821                         return 1;
4822                 }
4823                 flags |= NLM_F_DUMP_FILTERED;
4824         }
4825
4826         return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4827                              RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4828                              arg->cb->nlh->nlmsg_seq, flags);
4829 }
4830
4831 static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4832                                         const struct nlmsghdr *nlh,
4833                                         struct nlattr **tb,
4834                                         struct netlink_ext_ack *extack)
4835 {
4836         struct rtmsg *rtm;
4837         int i, err;
4838
4839         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4840                 NL_SET_ERR_MSG_MOD(extack,
4841                                    "Invalid header for get route request");
4842                 return -EINVAL;
4843         }
4844
4845         if (!netlink_strict_get_check(skb))
4846                 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
4847                                    rtm_ipv6_policy, extack);
4848
4849         rtm = nlmsg_data(nlh);
4850         if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4851             (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4852             rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4853             rtm->rtm_type) {
4854                 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4855                 return -EINVAL;
4856         }
4857         if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4858                 NL_SET_ERR_MSG_MOD(extack,
4859                                    "Invalid flags for get route request");
4860                 return -EINVAL;
4861         }
4862
4863         err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4864                                  rtm_ipv6_policy, extack);
4865         if (err)
4866                 return err;
4867
4868         if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4869             (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4870                 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4871                 return -EINVAL;
4872         }
4873
4874         for (i = 0; i <= RTA_MAX; i++) {
4875                 if (!tb[i])
4876                         continue;
4877
4878                 switch (i) {
4879                 case RTA_SRC:
4880                 case RTA_DST:
4881                 case RTA_IIF:
4882                 case RTA_OIF:
4883                 case RTA_MARK:
4884                 case RTA_UID:
4885                 case RTA_SPORT:
4886                 case RTA_DPORT:
4887                 case RTA_IP_PROTO:
4888                         break;
4889                 default:
4890                         NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4891                         return -EINVAL;
4892                 }
4893         }
4894
4895         return 0;
4896 }
4897
4898 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4899                               struct netlink_ext_ack *extack)
4900 {
4901         struct net *net = sock_net(in_skb->sk);
4902         struct nlattr *tb[RTA_MAX+1];
4903         int err, iif = 0, oif = 0;
4904         struct fib6_info *from;
4905         struct dst_entry *dst;
4906         struct rt6_info *rt;
4907         struct sk_buff *skb;
4908         struct rtmsg *rtm;
4909         struct flowi6 fl6 = {};
4910         bool fibmatch;
4911
4912         err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
4913         if (err < 0)
4914                 goto errout;
4915
4916         err = -EINVAL;
4917         rtm = nlmsg_data(nlh);
4918         fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4919         fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4920
4921         if (tb[RTA_SRC]) {
4922                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4923                         goto errout;
4924
4925                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4926         }
4927
4928         if (tb[RTA_DST]) {
4929                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4930                         goto errout;
4931
4932                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4933         }
4934
4935         if (tb[RTA_IIF])
4936                 iif = nla_get_u32(tb[RTA_IIF]);
4937
4938         if (tb[RTA_OIF])
4939                 oif = nla_get_u32(tb[RTA_OIF]);
4940
4941         if (tb[RTA_MARK])
4942                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4943
4944         if (tb[RTA_UID])
4945                 fl6.flowi6_uid = make_kuid(current_user_ns(),
4946                                            nla_get_u32(tb[RTA_UID]));
4947         else
4948                 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4949
4950         if (tb[RTA_SPORT])
4951                 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4952
4953         if (tb[RTA_DPORT])
4954                 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4955
4956         if (tb[RTA_IP_PROTO]) {
4957                 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4958                                                   &fl6.flowi6_proto, AF_INET6,
4959                                                   extack);
4960                 if (err)
4961                         goto errout;
4962         }
4963
4964         if (iif) {
4965                 struct net_device *dev;
4966                 int flags = 0;
4967
4968                 rcu_read_lock();
4969
4970                 dev = dev_get_by_index_rcu(net, iif);
4971                 if (!dev) {
4972                         rcu_read_unlock();
4973                         err = -ENODEV;
4974                         goto errout;
4975                 }
4976
4977                 fl6.flowi6_iif = iif;
4978
4979                 if (!ipv6_addr_any(&fl6.saddr))
4980                         flags |= RT6_LOOKUP_F_HAS_SADDR;
4981
4982                 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4983
4984                 rcu_read_unlock();
4985         } else {
4986                 fl6.flowi6_oif = oif;
4987
4988                 dst = ip6_route_output(net, NULL, &fl6);
4989         }
4990
4991
4992         rt = container_of(dst, struct rt6_info, dst);
4993         if (rt->dst.error) {
4994                 err = rt->dst.error;
4995                 ip6_rt_put(rt);
4996                 goto errout;
4997         }
4998
4999         if (rt == net->ipv6.ip6_null_entry) {
5000                 err = rt->dst.error;
5001                 ip6_rt_put(rt);
5002                 goto errout;
5003         }
5004
5005         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5006         if (!skb) {
5007                 ip6_rt_put(rt);
5008                 err = -ENOBUFS;
5009                 goto errout;
5010         }
5011
5012         skb_dst_set(skb, &rt->dst);
5013
5014         rcu_read_lock();
5015         from = rcu_dereference(rt->from);
5016
5017         if (fibmatch)
5018                 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
5019                                     RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
5020                                     nlh->nlmsg_seq, 0);
5021         else
5022                 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5023                                     &fl6.saddr, iif, RTM_NEWROUTE,
5024                                     NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5025                                     0);
5026         rcu_read_unlock();
5027
5028         if (err < 0) {
5029                 kfree_skb(skb);
5030                 goto errout;
5031         }
5032
5033         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5034 errout:
5035         return err;
5036 }
5037
5038 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
5039                      unsigned int nlm_flags)
5040 {
5041         struct sk_buff *skb;
5042         struct net *net = info->nl_net;
5043         u32 seq;
5044         int err;
5045
5046         err = -ENOBUFS;
5047         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
5048
5049         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
5050         if (!skb)
5051                 goto errout;
5052
5053         err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5054                             event, info->portid, seq, nlm_flags);
5055         if (err < 0) {
5056                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5057                 WARN_ON(err == -EMSGSIZE);
5058                 kfree_skb(skb);
5059                 goto errout;
5060         }
5061         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
5062                     info->nlh, gfp_any());
5063         return;
5064 errout:
5065         if (err < 0)
5066                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
5067 }
5068
5069 static int ip6_route_dev_notify(struct notifier_block *this,
5070                                 unsigned long event, void *ptr)
5071 {
5072         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5073         struct net *net = dev_net(dev);
5074
5075         if (!(dev->flags & IFF_LOOPBACK))
5076                 return NOTIFY_OK;
5077
5078         if (event == NETDEV_REGISTER) {
5079                 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
5080                 net->ipv6.ip6_null_entry->dst.dev = dev;
5081                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5082 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5083                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
5084                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5085                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
5086                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
5087 #endif
5088          } else if (event == NETDEV_UNREGISTER &&
5089                     dev->reg_state != NETREG_UNREGISTERED) {
5090                 /* NETDEV_UNREGISTER could be fired for multiple times by
5091                  * netdev_wait_allrefs(). Make sure we only call this once.
5092                  */
5093                 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5094 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5095                 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5096                 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5097 #endif
5098         }
5099
5100         return NOTIFY_OK;
5101 }
5102
5103 /*
5104  *      /proc
5105  */
5106
5107 #ifdef CONFIG_PROC_FS
5108 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5109 {
5110         struct net *net = (struct net *)seq->private;
5111         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
5112                    net->ipv6.rt6_stats->fib_nodes,
5113                    net->ipv6.rt6_stats->fib_route_nodes,
5114                    atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
5115                    net->ipv6.rt6_stats->fib_rt_entries,
5116                    net->ipv6.rt6_stats->fib_rt_cache,
5117                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
5118                    net->ipv6.rt6_stats->fib_discarded_routes);
5119
5120         return 0;
5121 }
5122 #endif  /* CONFIG_PROC_FS */
5123
5124 #ifdef CONFIG_SYSCTL
5125
5126 static
5127 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
5128                               void __user *buffer, size_t *lenp, loff_t *ppos)
5129 {
5130         struct net *net;
5131         int delay;
5132         int ret;
5133         if (!write)
5134                 return -EINVAL;
5135
5136         net = (struct net *)ctl->extra1;
5137         delay = net->ipv6.sysctl.flush_delay;
5138         ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5139         if (ret)
5140                 return ret;
5141
5142         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
5143         return 0;
5144 }
5145
5146 static int zero;
5147 static int one = 1;
5148
5149 static struct ctl_table ipv6_route_table_template[] = {
5150         {
5151                 .procname       =       "flush",
5152                 .data           =       &init_net.ipv6.sysctl.flush_delay,
5153                 .maxlen         =       sizeof(int),
5154                 .mode           =       0200,
5155                 .proc_handler   =       ipv6_sysctl_rtcache_flush
5156         },
5157         {
5158                 .procname       =       "gc_thresh",
5159                 .data           =       &ip6_dst_ops_template.gc_thresh,
5160                 .maxlen         =       sizeof(int),
5161                 .mode           =       0644,
5162                 .proc_handler   =       proc_dointvec,
5163         },
5164         {
5165                 .procname       =       "max_size",
5166                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
5167                 .maxlen         =       sizeof(int),
5168                 .mode           =       0644,
5169                 .proc_handler   =       proc_dointvec,
5170         },
5171         {
5172                 .procname       =       "gc_min_interval",
5173                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5174                 .maxlen         =       sizeof(int),
5175                 .mode           =       0644,
5176                 .proc_handler   =       proc_dointvec_jiffies,
5177         },
5178         {
5179                 .procname       =       "gc_timeout",
5180                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5181                 .maxlen         =       sizeof(int),
5182                 .mode           =       0644,
5183                 .proc_handler   =       proc_dointvec_jiffies,
5184         },
5185         {
5186                 .procname       =       "gc_interval",
5187                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
5188                 .maxlen         =       sizeof(int),
5189                 .mode           =       0644,
5190                 .proc_handler   =       proc_dointvec_jiffies,
5191         },
5192         {
5193                 .procname       =       "gc_elasticity",
5194                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5195                 .maxlen         =       sizeof(int),
5196                 .mode           =       0644,
5197                 .proc_handler   =       proc_dointvec,
5198         },
5199         {
5200                 .procname       =       "mtu_expires",
5201                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5202                 .maxlen         =       sizeof(int),
5203                 .mode           =       0644,
5204                 .proc_handler   =       proc_dointvec_jiffies,
5205         },
5206         {
5207                 .procname       =       "min_adv_mss",
5208                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
5209                 .maxlen         =       sizeof(int),
5210                 .mode           =       0644,
5211                 .proc_handler   =       proc_dointvec,
5212         },
5213         {
5214                 .procname       =       "gc_min_interval_ms",
5215                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5216                 .maxlen         =       sizeof(int),
5217                 .mode           =       0644,
5218                 .proc_handler   =       proc_dointvec_ms_jiffies,
5219         },
5220         {
5221                 .procname       =       "skip_notify_on_dev_down",
5222                 .data           =       &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5223                 .maxlen         =       sizeof(int),
5224                 .mode           =       0644,
5225                 .proc_handler   =       proc_dointvec,
5226                 .extra1         =       &zero,
5227                 .extra2         =       &one,
5228         },
5229         { }
5230 };
5231
5232 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5233 {
5234         struct ctl_table *table;
5235
5236         table = kmemdup(ipv6_route_table_template,
5237                         sizeof(ipv6_route_table_template),
5238                         GFP_KERNEL);
5239
5240         if (table) {
5241                 table[0].data = &net->ipv6.sysctl.flush_delay;
5242                 table[0].extra1 = net;
5243                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5244                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5245                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5246                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5247                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5248                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5249                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5250                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5251                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5252                 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5253
5254                 /* Don't export sysctls to unprivileged users */
5255                 if (net->user_ns != &init_user_ns)
5256                         table[0].procname = NULL;
5257         }
5258
5259         return table;
5260 }
5261 #endif
5262
5263 static int __net_init ip6_route_net_init(struct net *net)
5264 {
5265         int ret = -ENOMEM;
5266
5267         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5268                sizeof(net->ipv6.ip6_dst_ops));
5269
5270         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5271                 goto out_ip6_dst_ops;
5272
5273         net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5274                                             sizeof(*net->ipv6.fib6_null_entry),
5275                                             GFP_KERNEL);
5276         if (!net->ipv6.fib6_null_entry)
5277                 goto out_ip6_dst_entries;
5278
5279         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5280                                            sizeof(*net->ipv6.ip6_null_entry),
5281                                            GFP_KERNEL);
5282         if (!net->ipv6.ip6_null_entry)
5283                 goto out_fib6_null_entry;
5284         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5285         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5286                          ip6_template_metrics, true);
5287
5288 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5289         net->ipv6.fib6_has_custom_rules = false;
5290         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5291                                                sizeof(*net->ipv6.ip6_prohibit_entry),
5292                                                GFP_KERNEL);
5293         if (!net->ipv6.ip6_prohibit_entry)
5294                 goto out_ip6_null_entry;
5295         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5296         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5297                          ip6_template_metrics, true);
5298
5299         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5300                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
5301                                                GFP_KERNEL);
5302         if (!net->ipv6.ip6_blk_hole_entry)
5303                 goto out_ip6_prohibit_entry;
5304         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5305         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5306                          ip6_template_metrics, true);
5307 #endif
5308
5309         net->ipv6.sysctl.flush_delay = 0;
5310         net->ipv6.sysctl.ip6_rt_max_size = 4096;
5311         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5312         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5313         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5314         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5315         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5316         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5317         net->ipv6.sysctl.skip_notify_on_dev_down = 0;
5318
5319         net->ipv6.ip6_rt_gc_expire = 30*HZ;
5320
5321         ret = 0;
5322 out:
5323         return ret;
5324
5325 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5326 out_ip6_prohibit_entry:
5327         kfree(net->ipv6.ip6_prohibit_entry);
5328 out_ip6_null_entry:
5329         kfree(net->ipv6.ip6_null_entry);
5330 #endif
5331 out_fib6_null_entry:
5332         kfree(net->ipv6.fib6_null_entry);
5333 out_ip6_dst_entries:
5334         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5335 out_ip6_dst_ops:
5336         goto out;
5337 }
5338
5339 static void __net_exit ip6_route_net_exit(struct net *net)
5340 {
5341         kfree(net->ipv6.fib6_null_entry);
5342         kfree(net->ipv6.ip6_null_entry);
5343 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5344         kfree(net->ipv6.ip6_prohibit_entry);
5345         kfree(net->ipv6.ip6_blk_hole_entry);
5346 #endif
5347         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5348 }
5349
5350 static int __net_init ip6_route_net_init_late(struct net *net)
5351 {
5352 #ifdef CONFIG_PROC_FS
5353         proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5354                         sizeof(struct ipv6_route_iter));
5355         proc_create_net_single("rt6_stats", 0444, net->proc_net,
5356                         rt6_stats_seq_show, NULL);
5357 #endif
5358         return 0;
5359 }
5360
5361 static void __net_exit ip6_route_net_exit_late(struct net *net)
5362 {
5363 #ifdef CONFIG_PROC_FS
5364         remove_proc_entry("ipv6_route", net->proc_net);
5365         remove_proc_entry("rt6_stats", net->proc_net);
5366 #endif
5367 }
5368
5369 static struct pernet_operations ip6_route_net_ops = {
5370         .init = ip6_route_net_init,
5371         .exit = ip6_route_net_exit,
5372 };
5373
5374 static int __net_init ipv6_inetpeer_init(struct net *net)
5375 {
5376         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5377
5378         if (!bp)
5379                 return -ENOMEM;
5380         inet_peer_base_init(bp);
5381         net->ipv6.peers = bp;
5382         return 0;
5383 }
5384
5385 static void __net_exit ipv6_inetpeer_exit(struct net *net)
5386 {
5387         struct inet_peer_base *bp = net->ipv6.peers;
5388
5389         net->ipv6.peers = NULL;
5390         inetpeer_invalidate_tree(bp);
5391         kfree(bp);
5392 }
5393
5394 static struct pernet_operations ipv6_inetpeer_ops = {
5395         .init   =       ipv6_inetpeer_init,
5396         .exit   =       ipv6_inetpeer_exit,
5397 };
5398
5399 static struct pernet_operations ip6_route_net_late_ops = {
5400         .init = ip6_route_net_init_late,
5401         .exit = ip6_route_net_exit_late,
5402 };
5403
5404 static struct notifier_block ip6_route_dev_notifier = {
5405         .notifier_call = ip6_route_dev_notify,
5406         .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5407 };
5408
5409 void __init ip6_route_init_special_entries(void)
5410 {
5411         /* Registering of the loopback is done before this portion of code,
5412          * the loopback reference in rt6_info will not be taken, do it
5413          * manually for init_net */
5414         init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5415         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5416         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5417   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5418         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5419         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5420         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5421         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5422   #endif
5423 }
5424
5425 int __init ip6_route_init(void)
5426 {
5427         int ret;
5428         int cpu;
5429
5430         ret = -ENOMEM;
5431         ip6_dst_ops_template.kmem_cachep =
5432                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5433                                   SLAB_HWCACHE_ALIGN, NULL);
5434         if (!ip6_dst_ops_template.kmem_cachep)
5435                 goto out;
5436
5437         ret = dst_entries_init(&ip6_dst_blackhole_ops);
5438         if (ret)
5439                 goto out_kmem_cache;
5440
5441         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5442         if (ret)
5443                 goto out_dst_entries;
5444
5445         ret = register_pernet_subsys(&ip6_route_net_ops);
5446         if (ret)
5447                 goto out_register_inetpeer;
5448
5449         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5450
5451         ret = fib6_init();
5452         if (ret)
5453                 goto out_register_subsys;
5454
5455         ret = xfrm6_init();
5456         if (ret)
5457                 goto out_fib6_init;
5458
5459         ret = fib6_rules_init();
5460         if (ret)
5461                 goto xfrm6_init;
5462
5463         ret = register_pernet_subsys(&ip6_route_net_late_ops);
5464         if (ret)
5465                 goto fib6_rules_init;
5466
5467         ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5468                                    inet6_rtm_newroute, NULL, 0);
5469         if (ret < 0)
5470                 goto out_register_late_subsys;
5471
5472         ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5473                                    inet6_rtm_delroute, NULL, 0);
5474         if (ret < 0)
5475                 goto out_register_late_subsys;
5476
5477         ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5478                                    inet6_rtm_getroute, NULL,
5479                                    RTNL_FLAG_DOIT_UNLOCKED);
5480         if (ret < 0)
5481                 goto out_register_late_subsys;
5482
5483         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5484         if (ret)
5485                 goto out_register_late_subsys;
5486
5487         for_each_possible_cpu(cpu) {
5488                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5489
5490                 INIT_LIST_HEAD(&ul->head);
5491                 spin_lock_init(&ul->lock);
5492         }
5493
5494 out:
5495         return ret;
5496
5497 out_register_late_subsys:
5498         rtnl_unregister_all(PF_INET6);
5499         unregister_pernet_subsys(&ip6_route_net_late_ops);
5500 fib6_rules_init:
5501         fib6_rules_cleanup();
5502 xfrm6_init:
5503         xfrm6_fini();
5504 out_fib6_init:
5505         fib6_gc_cleanup();
5506 out_register_subsys:
5507         unregister_pernet_subsys(&ip6_route_net_ops);
5508 out_register_inetpeer:
5509         unregister_pernet_subsys(&ipv6_inetpeer_ops);
5510 out_dst_entries:
5511         dst_entries_destroy(&ip6_dst_blackhole_ops);
5512 out_kmem_cache:
5513         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5514         goto out;
5515 }
5516
5517 void ip6_route_cleanup(void)
5518 {
5519         unregister_netdevice_notifier(&ip6_route_dev_notifier);
5520         unregister_pernet_subsys(&ip6_route_net_late_ops);
5521         fib6_rules_cleanup();
5522         xfrm6_fini();
5523         fib6_gc_cleanup();
5524         unregister_pernet_subsys(&ipv6_inetpeer_ops);
5525         unregister_pernet_subsys(&ip6_route_net_ops);
5526         dst_entries_destroy(&ip6_dst_blackhole_ops);
5527         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5528 }