2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
56 #include <linux/rtnetlink.h>
58 #include <net/dst_metadata.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
67 #include <linux/uaccess.h>
70 #include <linux/sysctl.h>
73 static int ip6_rt_type_to_error(u8 fib6_type);
75 #define CREATE_TRACE_POINTS
76 #include <trace/events/fib6.h>
77 EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78 #undef CREATE_TRACE_POINTS
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
87 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
88 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
89 static unsigned int ip6_mtu(const struct dst_entry *dst);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void ip6_dst_destroy(struct dst_entry *);
92 static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94 static int ip6_dst_gc(struct dst_ops *ops);
96 static int ip6_pkt_discard(struct sk_buff *skb);
97 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
98 static int ip6_pkt_prohibit(struct sk_buff *skb);
99 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
100 static void ip6_link_failure(struct sk_buff *skb);
101 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
105 static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106 static size_t rt6_nlmsg_size(struct fib6_info *rt);
107 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
108 struct fib6_info *rt, struct dst_entry *dst,
109 struct in6_addr *dest, struct in6_addr *src,
110 int iif, int type, u32 portid, u32 seq,
112 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
116 #ifdef CONFIG_IPV6_ROUTE_INFO
117 static struct fib6_info *rt6_add_route_info(struct net *net,
118 const struct in6_addr *prefix, int prefixlen,
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
122 static struct fib6_info *rt6_get_route_info(struct net *net,
123 const struct in6_addr *prefix, int prefixlen,
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
128 struct uncached_list {
130 struct list_head head;
133 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
135 void rt6_uncached_list_add(struct rt6_info *rt)
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
139 rt->rt6i_uncached_list = ul;
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
146 void rt6_uncached_list_del(struct rt6_info *rt)
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
150 struct net *net = dev_net(rt->dst.dev);
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
155 spin_unlock_bh(&ul->lock);
159 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
161 struct net_device *loopback_dev = net->loopback_dev;
164 if (dev == loopback_dev)
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
176 if (rt_idev->dev == dev) {
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
187 spin_unlock_bh(&ul->lock);
191 static inline const void *choose_neigh_daddr(const struct in6_addr *p,
195 if (!ipv6_addr_any(p))
196 return (const void *) p;
198 return &ipv6_hdr(skb)->daddr;
202 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
214 n = neigh_create(&nd_tbl, daddr, dev);
215 return IS_ERR(n) ? NULL : n;
218 static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
222 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
224 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
227 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
229 struct net_device *dev = dst->dev;
230 struct rt6_info *rt = (struct rt6_info *)dst;
232 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
235 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
237 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
239 __ipv6_confirm_neigh(dev, daddr);
242 static struct dst_ops ip6_dst_ops_template = {
246 .check = ip6_dst_check,
247 .default_advmss = ip6_default_advmss,
249 .cow_metrics = dst_cow_metrics_generic,
250 .destroy = ip6_dst_destroy,
251 .ifdown = ip6_dst_ifdown,
252 .negative_advice = ip6_negative_advice,
253 .link_failure = ip6_link_failure,
254 .update_pmtu = ip6_rt_update_pmtu,
255 .redirect = rt6_do_redirect,
256 .local_out = __ip6_local_out,
257 .neigh_lookup = ip6_dst_neigh_lookup,
258 .confirm_neigh = ip6_confirm_neigh,
261 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
263 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
265 return mtu ? : dst->dev->mtu;
268 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
269 struct sk_buff *skb, u32 mtu)
273 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 static struct dst_ops ip6_dst_blackhole_ops = {
280 .destroy = ip6_dst_destroy,
281 .check = ip6_dst_check,
282 .mtu = ip6_blackhole_mtu,
283 .default_advmss = ip6_default_advmss,
284 .update_pmtu = ip6_rt_blackhole_update_pmtu,
285 .redirect = ip6_rt_blackhole_redirect,
286 .cow_metrics = dst_cow_metrics_generic,
287 .neigh_lookup = ip6_dst_neigh_lookup,
290 static const u32 ip6_template_metrics[RTAX_MAX] = {
291 [RTAX_HOPLIMIT - 1] = 0,
294 static const struct fib6_info fib6_null_entry_template = {
295 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
296 .fib6_protocol = RTPROT_KERNEL,
297 .fib6_metric = ~(u32)0,
298 .fib6_ref = ATOMIC_INIT(1),
299 .fib6_type = RTN_UNREACHABLE,
300 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
303 static const struct rt6_info ip6_null_entry_template = {
305 .__refcnt = ATOMIC_INIT(1),
307 .obsolete = DST_OBSOLETE_FORCE_CHK,
308 .error = -ENETUNREACH,
309 .input = ip6_pkt_discard,
310 .output = ip6_pkt_discard_out,
312 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
315 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
317 static const struct rt6_info ip6_prohibit_entry_template = {
319 .__refcnt = ATOMIC_INIT(1),
321 .obsolete = DST_OBSOLETE_FORCE_CHK,
323 .input = ip6_pkt_prohibit,
324 .output = ip6_pkt_prohibit_out,
326 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
329 static const struct rt6_info ip6_blk_hole_entry_template = {
331 .__refcnt = ATOMIC_INIT(1),
333 .obsolete = DST_OBSOLETE_FORCE_CHK,
335 .input = dst_discard,
336 .output = dst_discard_out,
338 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
343 static void rt6_info_init(struct rt6_info *rt)
345 struct dst_entry *dst = &rt->dst;
347 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
348 INIT_LIST_HEAD(&rt->rt6i_uncached);
351 /* allocate dst with ip6_dst_ops */
352 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356 1, DST_OBSOLETE_FORCE_CHK, flags);
360 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
365 EXPORT_SYMBOL(ip6_dst_alloc);
367 static void ip6_dst_destroy(struct dst_entry *dst)
369 struct rt6_info *rt = (struct rt6_info *)dst;
370 struct fib6_info *from;
371 struct inet6_dev *idev;
373 ip_dst_metrics_put(dst);
374 rt6_uncached_list_del(rt);
376 idev = rt->rt6i_idev;
378 rt->rt6i_idev = NULL;
383 from = rcu_dereference(rt->from);
384 rcu_assign_pointer(rt->from, NULL);
385 fib6_info_release(from);
389 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
392 struct rt6_info *rt = (struct rt6_info *)dst;
393 struct inet6_dev *idev = rt->rt6i_idev;
394 struct net_device *loopback_dev =
395 dev_net(dev)->loopback_dev;
397 if (idev && idev->dev != loopback_dev) {
398 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
400 rt->rt6i_idev = loopback_idev;
406 static bool __rt6_check_expired(const struct rt6_info *rt)
408 if (rt->rt6i_flags & RTF_EXPIRES)
409 return time_after(jiffies, rt->dst.expires);
414 static bool rt6_check_expired(const struct rt6_info *rt)
416 struct fib6_info *from;
418 from = rcu_dereference(rt->from);
420 if (rt->rt6i_flags & RTF_EXPIRES) {
421 if (time_after(jiffies, rt->dst.expires))
424 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
425 fib6_check_expired(from);
430 struct fib6_info *fib6_multipath_select(const struct net *net,
431 struct fib6_info *match,
432 struct flowi6 *fl6, int oif,
433 const struct sk_buff *skb,
436 struct fib6_info *sibling, *next_sibling;
438 /* We might have already computed the hash for ICMPv6 errors. In such
439 * case it will always be non-zero. Otherwise now is the time to do it.
442 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
444 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
447 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
451 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
452 if (fl6->mp_hash > nh_upper_bound)
454 if (rt6_score_route(sibling, oif, strict) < 0)
464 * Route lookup. rcu_read_lock() should be held.
467 static inline struct fib6_info *rt6_device_match(struct net *net,
468 struct fib6_info *rt,
469 const struct in6_addr *saddr,
473 struct fib6_info *sprt;
475 if (!oif && ipv6_addr_any(saddr) &&
476 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
479 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
480 const struct net_device *dev = sprt->fib6_nh.nh_dev;
482 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
486 if (dev->ifindex == oif)
489 if (ipv6_chk_addr(net, saddr, dev,
490 flags & RT6_LOOKUP_F_IFACE))
495 if (oif && flags & RT6_LOOKUP_F_IFACE)
496 return net->ipv6.fib6_null_entry;
498 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
501 #ifdef CONFIG_IPV6_ROUTER_PREF
502 struct __rt6_probe_work {
503 struct work_struct work;
504 struct in6_addr target;
505 struct net_device *dev;
508 static void rt6_probe_deferred(struct work_struct *w)
510 struct in6_addr mcaddr;
511 struct __rt6_probe_work *work =
512 container_of(w, struct __rt6_probe_work, work);
514 addrconf_addr_solict_mult(&work->target, &mcaddr);
515 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
520 static void rt6_probe(struct fib6_info *rt)
522 struct __rt6_probe_work *work = NULL;
523 const struct in6_addr *nh_gw;
524 struct neighbour *neigh;
525 struct net_device *dev;
526 struct inet6_dev *idev;
529 * Okay, this does not seem to be appropriate
530 * for now, however, we need to check if it
531 * is really so; aka Router Reachability Probing.
533 * Router Reachability Probe MUST be rate-limited
534 * to no more than one per minute.
536 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
539 nh_gw = &rt->fib6_nh.nh_gw;
540 dev = rt->fib6_nh.nh_dev;
542 idev = __in6_dev_get(dev);
543 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
545 if (neigh->nud_state & NUD_VALID)
548 write_lock(&neigh->lock);
549 if (!(neigh->nud_state & NUD_VALID) &&
551 neigh->updated + idev->cnf.rtr_probe_interval)) {
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
554 __neigh_set_probe_once(neigh);
556 write_unlock(&neigh->lock);
557 } else if (time_after(jiffies, rt->last_probe +
558 idev->cnf.rtr_probe_interval)) {
559 work = kmalloc(sizeof(*work), GFP_ATOMIC);
563 rt->last_probe = jiffies;
564 INIT_WORK(&work->work, rt6_probe_deferred);
565 work->target = *nh_gw;
568 schedule_work(&work->work);
572 rcu_read_unlock_bh();
575 static inline void rt6_probe(struct fib6_info *rt)
581 * Default Router Selection (RFC 2461 6.3.6)
583 static inline int rt6_check_dev(struct fib6_info *rt, int oif)
585 const struct net_device *dev = rt->fib6_nh.nh_dev;
587 if (!oif || dev->ifindex == oif)
592 static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
594 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
595 struct neighbour *neigh;
597 if (rt->fib6_flags & RTF_NONEXTHOP ||
598 !(rt->fib6_flags & RTF_GATEWAY))
599 return RT6_NUD_SUCCEED;
602 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
605 read_lock(&neigh->lock);
606 if (neigh->nud_state & NUD_VALID)
607 ret = RT6_NUD_SUCCEED;
608 #ifdef CONFIG_IPV6_ROUTER_PREF
609 else if (!(neigh->nud_state & NUD_FAILED))
610 ret = RT6_NUD_SUCCEED;
612 ret = RT6_NUD_FAIL_PROBE;
614 read_unlock(&neigh->lock);
616 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
617 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
619 rcu_read_unlock_bh();
624 static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
628 m = rt6_check_dev(rt, oif);
629 if (!m && (strict & RT6_LOOKUP_F_IFACE))
630 return RT6_NUD_FAIL_HARD;
631 #ifdef CONFIG_IPV6_ROUTER_PREF
632 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
634 if (strict & RT6_LOOKUP_F_REACHABLE) {
635 int n = rt6_check_neigh(rt);
642 /* called with rc_read_lock held */
643 static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
645 const struct net_device *dev = fib6_info_nh_dev(f6i);
649 const struct inet6_dev *idev = __in6_dev_get(dev);
651 rc = !!idev->cnf.ignore_routes_with_linkdown;
657 static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
658 int *mpri, struct fib6_info *match,
662 bool match_do_rr = false;
664 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
667 if (fib6_ignore_linkdown(rt) &&
668 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
669 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
672 if (fib6_check_expired(rt))
675 m = rt6_score_route(rt, oif, strict);
676 if (m == RT6_NUD_FAIL_DO_RR) {
678 m = 0; /* lowest valid score */
679 } else if (m == RT6_NUD_FAIL_HARD) {
683 if (strict & RT6_LOOKUP_F_REACHABLE)
686 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
688 *do_rr = match_do_rr;
696 static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
697 struct fib6_info *leaf,
698 struct fib6_info *rr_head,
699 u32 metric, int oif, int strict,
702 struct fib6_info *rt, *match, *cont;
707 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
708 if (rt->fib6_metric != metric) {
713 match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 for (rt = leaf; rt && rt != rr_head;
717 rt = rcu_dereference(rt->fib6_next)) {
718 if (rt->fib6_metric != metric) {
723 match = find_match(rt, oif, strict, &mpri, match, do_rr);
729 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
730 match = find_match(rt, oif, strict, &mpri, match, do_rr);
735 static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
738 struct fib6_info *leaf = rcu_dereference(fn->leaf);
739 struct fib6_info *match, *rt0;
743 if (!leaf || leaf == net->ipv6.fib6_null_entry)
744 return net->ipv6.fib6_null_entry;
746 rt0 = rcu_dereference(fn->rr_ptr);
750 /* Double check to make sure fn is not an intermediate node
751 * and fn->leaf does not points to its child's leaf
752 * (This might happen if all routes under fn are deleted from
753 * the tree and fib6_repair_tree() is called on the node.)
755 key_plen = rt0->fib6_dst.plen;
756 #ifdef CONFIG_IPV6_SUBTREES
757 if (rt0->fib6_src.plen)
758 key_plen = rt0->fib6_src.plen;
760 if (fn->fn_bit != key_plen)
761 return net->ipv6.fib6_null_entry;
763 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
767 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
769 /* no entries matched; do round-robin */
770 if (!next || next->fib6_metric != rt0->fib6_metric)
774 spin_lock_bh(&leaf->fib6_table->tb6_lock);
775 /* make sure next is not being deleted from the tree */
777 rcu_assign_pointer(fn->rr_ptr, next);
778 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
782 return match ? match : net->ipv6.fib6_null_entry;
785 static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
787 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
790 #ifdef CONFIG_IPV6_ROUTE_INFO
791 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
792 const struct in6_addr *gwaddr)
794 struct net *net = dev_net(dev);
795 struct route_info *rinfo = (struct route_info *) opt;
796 struct in6_addr prefix_buf, *prefix;
798 unsigned long lifetime;
799 struct fib6_info *rt;
801 if (len < sizeof(struct route_info)) {
805 /* Sanity check for prefix_len and length */
806 if (rinfo->length > 3) {
808 } else if (rinfo->prefix_len > 128) {
810 } else if (rinfo->prefix_len > 64) {
811 if (rinfo->length < 2) {
814 } else if (rinfo->prefix_len > 0) {
815 if (rinfo->length < 1) {
820 pref = rinfo->route_pref;
821 if (pref == ICMPV6_ROUTER_PREF_INVALID)
824 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
826 if (rinfo->length == 3)
827 prefix = (struct in6_addr *)rinfo->prefix;
829 /* this function is safe */
830 ipv6_addr_prefix(&prefix_buf,
831 (struct in6_addr *)rinfo->prefix,
833 prefix = &prefix_buf;
836 if (rinfo->prefix_len == 0)
837 rt = rt6_get_dflt_router(net, gwaddr, dev);
839 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
842 if (rt && !lifetime) {
848 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
851 rt->fib6_flags = RTF_ROUTEINFO |
852 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
855 if (!addrconf_finite_timeout(lifetime))
856 fib6_clean_expires(rt);
858 fib6_set_expires(rt, jiffies + HZ * lifetime);
860 fib6_info_release(rt);
867 * Misc support functions
870 /* called with rcu_lock held */
871 static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
873 struct net_device *dev = rt->fib6_nh.nh_dev;
875 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
876 /* for copies of local routes, dst->dev needs to be the
877 * device if it is a master device, the master device if
878 * device is enslaved, and the loopback as the default
880 if (netif_is_l3_slave(dev) &&
881 !rt6_need_strict(&rt->fib6_dst.addr))
882 dev = l3mdev_master_dev_rcu(dev);
883 else if (!netif_is_l3_master(dev))
884 dev = dev_net(dev)->loopback_dev;
885 /* last case is netif_is_l3_master(dev) is true in which
886 * case we want dev returned to be dev
893 static const int fib6_prop[RTN_MAX + 1] = {
900 [RTN_BLACKHOLE] = -EINVAL,
901 [RTN_UNREACHABLE] = -EHOSTUNREACH,
902 [RTN_PROHIBIT] = -EACCES,
903 [RTN_THROW] = -EAGAIN,
905 [RTN_XRESOLVE] = -EINVAL,
908 static int ip6_rt_type_to_error(u8 fib6_type)
910 return fib6_prop[fib6_type];
913 static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
915 unsigned short flags = 0;
918 flags |= DST_NOCOUNT;
919 if (rt->dst_nopolicy)
920 flags |= DST_NOPOLICY;
927 static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
929 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
931 switch (ort->fib6_type) {
933 rt->dst.output = dst_discard_out;
934 rt->dst.input = dst_discard;
937 rt->dst.output = ip6_pkt_prohibit_out;
938 rt->dst.input = ip6_pkt_prohibit;
941 case RTN_UNREACHABLE:
943 rt->dst.output = ip6_pkt_discard_out;
944 rt->dst.input = ip6_pkt_discard;
949 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
951 if (ort->fib6_flags & RTF_REJECT) {
952 ip6_rt_init_dst_reject(rt, ort);
957 rt->dst.output = ip6_output;
959 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
960 rt->dst.input = ip6_input;
961 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
962 rt->dst.input = ip6_mc_input;
964 rt->dst.input = ip6_forward;
967 if (ort->fib6_nh.nh_lwtstate) {
968 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
969 lwtunnel_set_redirect(&rt->dst);
972 rt->dst.lastuse = jiffies;
975 /* Caller must already hold reference to @from */
976 static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
978 rt->rt6i_flags &= ~RTF_EXPIRES;
979 rcu_assign_pointer(rt->from, from);
980 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
983 /* Caller must already hold reference to @ort */
984 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
986 struct net_device *dev = fib6_info_nh_dev(ort);
988 ip6_rt_init_dst(rt, ort);
990 rt->rt6i_dst = ort->fib6_dst;
991 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
992 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
993 rt->rt6i_flags = ort->fib6_flags;
994 rt6_set_from(rt, ort);
995 #ifdef CONFIG_IPV6_SUBTREES
996 rt->rt6i_src = ort->fib6_src;
1000 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1001 struct in6_addr *saddr)
1003 struct fib6_node *pn, *sn;
1005 if (fn->fn_flags & RTN_TL_ROOT)
1007 pn = rcu_dereference(fn->parent);
1008 sn = FIB6_SUBTREE(pn);
1010 fn = fib6_node_lookup(sn, NULL, saddr);
1013 if (fn->fn_flags & RTN_RTINFO)
1018 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1020 struct rt6_info *rt = *prt;
1022 if (dst_hold_safe(&rt->dst))
1025 rt = net->ipv6.ip6_null_entry;
1034 /* called with rcu_lock held */
1035 static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1037 unsigned short flags = fib6_info_dst_flags(rt);
1038 struct net_device *dev = rt->fib6_nh.nh_dev;
1039 struct rt6_info *nrt;
1041 if (!fib6_info_hold_safe(rt))
1044 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1046 fib6_info_release(rt);
1050 ip6_rt_copy_init(nrt, rt);
1054 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1055 dst_hold(&nrt->dst);
1059 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1060 struct fib6_table *table,
1062 const struct sk_buff *skb,
1065 struct fib6_info *f6i;
1066 struct fib6_node *fn;
1067 struct rt6_info *rt;
1069 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1070 flags &= ~RT6_LOOKUP_F_IFACE;
1073 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1075 f6i = rcu_dereference(fn->leaf);
1077 f6i = net->ipv6.fib6_null_entry;
1079 f6i = rt6_device_match(net, f6i, &fl6->saddr,
1080 fl6->flowi6_oif, flags);
1081 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
1082 f6i = fib6_multipath_select(net, f6i, fl6,
1083 fl6->flowi6_oif, skb,
1086 if (f6i == net->ipv6.fib6_null_entry) {
1087 fn = fib6_backtrack(fn, &fl6->saddr);
1092 trace_fib6_table_lookup(net, f6i, table, fl6);
1094 /* Search through exception table */
1095 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1097 if (ip6_hold_safe(net, &rt))
1098 dst_use_noref(&rt->dst, jiffies);
1099 } else if (f6i == net->ipv6.fib6_null_entry) {
1100 rt = net->ipv6.ip6_null_entry;
1103 rt = ip6_create_rt_rcu(f6i);
1111 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1112 const struct sk_buff *skb, int flags)
1114 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1116 EXPORT_SYMBOL_GPL(ip6_route_lookup);
1118 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1119 const struct in6_addr *saddr, int oif,
1120 const struct sk_buff *skb, int strict)
1122 struct flowi6 fl6 = {
1126 struct dst_entry *dst;
1127 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1130 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1131 flags |= RT6_LOOKUP_F_HAS_SADDR;
1134 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1135 if (dst->error == 0)
1136 return (struct rt6_info *) dst;
1142 EXPORT_SYMBOL(rt6_lookup);
1144 /* ip6_ins_rt is called with FREE table->tb6_lock.
1145 * It takes new route entry, the addition fails by any reason the
1146 * route is released.
1147 * Caller must hold dst before calling it.
1150 static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1151 struct netlink_ext_ack *extack)
1154 struct fib6_table *table;
1156 table = rt->fib6_table;
1157 spin_lock_bh(&table->tb6_lock);
1158 err = fib6_add(&table->tb6_root, rt, info, extack);
1159 spin_unlock_bh(&table->tb6_lock);
1164 int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1166 struct nl_info info = { .nl_net = net, };
1168 return __ip6_ins_rt(rt, &info, NULL);
1171 static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
1172 const struct in6_addr *daddr,
1173 const struct in6_addr *saddr)
1175 struct net_device *dev;
1176 struct rt6_info *rt;
1182 if (!fib6_info_hold_safe(ort))
1185 dev = ip6_rt_get_dev_rcu(ort);
1186 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1188 fib6_info_release(ort);
1192 ip6_rt_copy_init(rt, ort);
1193 rt->rt6i_flags |= RTF_CACHE;
1194 rt->dst.flags |= DST_HOST;
1195 rt->rt6i_dst.addr = *daddr;
1196 rt->rt6i_dst.plen = 128;
1198 if (!rt6_is_gw_or_nonexthop(ort)) {
1199 if (ort->fib6_dst.plen != 128 &&
1200 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
1201 rt->rt6i_flags |= RTF_ANYCAST;
1202 #ifdef CONFIG_IPV6_SUBTREES
1203 if (rt->rt6i_src.plen && saddr) {
1204 rt->rt6i_src.addr = *saddr;
1205 rt->rt6i_src.plen = 128;
1213 static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1215 unsigned short flags = fib6_info_dst_flags(rt);
1216 struct net_device *dev;
1217 struct rt6_info *pcpu_rt;
1219 if (!fib6_info_hold_safe(rt))
1223 dev = ip6_rt_get_dev_rcu(rt);
1224 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1227 fib6_info_release(rt);
1230 ip6_rt_copy_init(pcpu_rt, rt);
1231 pcpu_rt->rt6i_flags |= RTF_PCPU;
1235 /* It should be called with rcu_read_lock() acquired */
1236 static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1238 struct rt6_info *pcpu_rt, **p;
1240 p = this_cpu_ptr(rt->rt6i_pcpu);
1244 ip6_hold_safe(NULL, &pcpu_rt);
1249 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1250 struct fib6_info *rt)
1252 struct rt6_info *pcpu_rt, *prev, **p;
1254 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1256 dst_hold(&net->ipv6.ip6_null_entry->dst);
1257 return net->ipv6.ip6_null_entry;
1260 dst_hold(&pcpu_rt->dst);
1261 p = this_cpu_ptr(rt->rt6i_pcpu);
1262 prev = cmpxchg(p, NULL, pcpu_rt);
1268 /* exception hash table implementation
1270 static DEFINE_SPINLOCK(rt6_exception_lock);
1272 /* Remove rt6_ex from hash table and free the memory
1273 * Caller must hold rt6_exception_lock
1275 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1276 struct rt6_exception *rt6_ex)
1278 struct fib6_info *from;
1281 if (!bucket || !rt6_ex)
1284 net = dev_net(rt6_ex->rt6i->dst.dev);
1285 net->ipv6.rt6_stats->fib_rt_cache--;
1287 /* purge completely the exception to allow releasing the held resources:
1288 * some [sk] cache may keep the dst around for unlimited time
1290 from = rcu_dereference_protected(rt6_ex->rt6i->from,
1291 lockdep_is_held(&rt6_exception_lock));
1292 rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1293 fib6_info_release(from);
1294 dst_dev_put(&rt6_ex->rt6i->dst);
1296 hlist_del_rcu(&rt6_ex->hlist);
1297 dst_release(&rt6_ex->rt6i->dst);
1298 kfree_rcu(rt6_ex, rcu);
1299 WARN_ON_ONCE(!bucket->depth);
1303 /* Remove oldest rt6_ex in bucket and free the memory
1304 * Caller must hold rt6_exception_lock
1306 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1308 struct rt6_exception *rt6_ex, *oldest = NULL;
1313 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1314 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1317 rt6_remove_exception(bucket, oldest);
1320 static u32 rt6_exception_hash(const struct in6_addr *dst,
1321 const struct in6_addr *src)
1323 static u32 seed __read_mostly;
1326 net_get_random_once(&seed, sizeof(seed));
1327 val = jhash(dst, sizeof(*dst), seed);
1329 #ifdef CONFIG_IPV6_SUBTREES
1331 val = jhash(src, sizeof(*src), val);
1333 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1336 /* Helper function to find the cached rt in the hash table
1337 * and update bucket pointer to point to the bucket for this
1338 * (daddr, saddr) pair
1339 * Caller must hold rt6_exception_lock
1341 static struct rt6_exception *
1342 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1343 const struct in6_addr *daddr,
1344 const struct in6_addr *saddr)
1346 struct rt6_exception *rt6_ex;
1349 if (!(*bucket) || !daddr)
1352 hval = rt6_exception_hash(daddr, saddr);
1355 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1356 struct rt6_info *rt6 = rt6_ex->rt6i;
1357 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1359 #ifdef CONFIG_IPV6_SUBTREES
1360 if (matched && saddr)
1361 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1369 /* Helper function to find the cached rt in the hash table
1370 * and update bucket pointer to point to the bucket for this
1371 * (daddr, saddr) pair
1372 * Caller must hold rcu_read_lock()
1374 static struct rt6_exception *
1375 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1376 const struct in6_addr *daddr,
1377 const struct in6_addr *saddr)
1379 struct rt6_exception *rt6_ex;
1382 WARN_ON_ONCE(!rcu_read_lock_held());
1384 if (!(*bucket) || !daddr)
1387 hval = rt6_exception_hash(daddr, saddr);
1390 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1391 struct rt6_info *rt6 = rt6_ex->rt6i;
1392 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1394 #ifdef CONFIG_IPV6_SUBTREES
1395 if (matched && saddr)
1396 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1404 static unsigned int fib6_mtu(const struct fib6_info *rt)
1408 if (rt->fib6_pmtu) {
1409 mtu = rt->fib6_pmtu;
1411 struct net_device *dev = fib6_info_nh_dev(rt);
1412 struct inet6_dev *idev;
1415 idev = __in6_dev_get(dev);
1416 mtu = idev->cnf.mtu6;
1420 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1422 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1425 static int rt6_insert_exception(struct rt6_info *nrt,
1426 struct fib6_info *ort)
1428 struct net *net = dev_net(nrt->dst.dev);
1429 struct rt6_exception_bucket *bucket;
1430 struct in6_addr *src_key = NULL;
1431 struct rt6_exception *rt6_ex;
1434 spin_lock_bh(&rt6_exception_lock);
1436 if (ort->exception_bucket_flushed) {
1441 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1442 lockdep_is_held(&rt6_exception_lock));
1444 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1450 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1453 #ifdef CONFIG_IPV6_SUBTREES
1454 /* rt6i_src.plen != 0 indicates ort is in subtree
1455 * and exception table is indexed by a hash of
1456 * both rt6i_dst and rt6i_src.
1457 * Otherwise, the exception table is indexed by
1458 * a hash of only rt6i_dst.
1460 if (ort->fib6_src.plen)
1461 src_key = &nrt->rt6i_src.addr;
1463 /* rt6_mtu_change() might lower mtu on ort.
1464 * Only insert this exception route if its mtu
1465 * is less than ort's mtu value.
1467 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1472 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1475 rt6_remove_exception(bucket, rt6_ex);
1477 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1483 rt6_ex->stamp = jiffies;
1484 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1486 net->ipv6.rt6_stats->fib_rt_cache++;
1488 if (bucket->depth > FIB6_MAX_DEPTH)
1489 rt6_exception_remove_oldest(bucket);
1492 spin_unlock_bh(&rt6_exception_lock);
1494 /* Update fn->fn_sernum to invalidate all cached dst */
1496 spin_lock_bh(&ort->fib6_table->tb6_lock);
1497 fib6_update_sernum(net, ort);
1498 spin_unlock_bh(&ort->fib6_table->tb6_lock);
1499 fib6_force_start_gc(net);
1505 void rt6_flush_exceptions(struct fib6_info *rt)
1507 struct rt6_exception_bucket *bucket;
1508 struct rt6_exception *rt6_ex;
1509 struct hlist_node *tmp;
1512 spin_lock_bh(&rt6_exception_lock);
1513 /* Prevent rt6_insert_exception() to recreate the bucket list */
1514 rt->exception_bucket_flushed = 1;
1516 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1517 lockdep_is_held(&rt6_exception_lock));
1521 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1522 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1523 rt6_remove_exception(bucket, rt6_ex);
1524 WARN_ON_ONCE(bucket->depth);
1529 spin_unlock_bh(&rt6_exception_lock);
1532 /* Find cached rt in the hash table inside passed in rt
1533 * Caller has to hold rcu_read_lock()
1535 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
1536 struct in6_addr *daddr,
1537 struct in6_addr *saddr)
1539 struct rt6_exception_bucket *bucket;
1540 struct in6_addr *src_key = NULL;
1541 struct rt6_exception *rt6_ex;
1542 struct rt6_info *res = NULL;
1544 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1546 #ifdef CONFIG_IPV6_SUBTREES
1547 /* rt6i_src.plen != 0 indicates rt is in subtree
1548 * and exception table is indexed by a hash of
1549 * both rt6i_dst and rt6i_src.
1550 * Otherwise, the exception table is indexed by
1551 * a hash of only rt6i_dst.
1553 if (rt->fib6_src.plen)
1556 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1558 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1564 /* Remove the passed in cached rt from the hash table that contains it */
1565 static int rt6_remove_exception_rt(struct rt6_info *rt)
1567 struct rt6_exception_bucket *bucket;
1568 struct in6_addr *src_key = NULL;
1569 struct rt6_exception *rt6_ex;
1570 struct fib6_info *from;
1573 from = rcu_dereference(rt->from);
1575 !(rt->rt6i_flags & RTF_CACHE))
1578 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1581 spin_lock_bh(&rt6_exception_lock);
1582 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1583 lockdep_is_held(&rt6_exception_lock));
1584 #ifdef CONFIG_IPV6_SUBTREES
1585 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1586 * and exception table is indexed by a hash of
1587 * both rt6i_dst and rt6i_src.
1588 * Otherwise, the exception table is indexed by
1589 * a hash of only rt6i_dst.
1591 if (from->fib6_src.plen)
1592 src_key = &rt->rt6i_src.addr;
1594 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1598 rt6_remove_exception(bucket, rt6_ex);
1604 spin_unlock_bh(&rt6_exception_lock);
1608 /* Find rt6_ex which contains the passed in rt cache and
1611 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1613 struct rt6_exception_bucket *bucket;
1614 struct in6_addr *src_key = NULL;
1615 struct rt6_exception *rt6_ex;
1616 struct fib6_info *from;
1619 from = rcu_dereference(rt->from);
1620 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1623 bucket = rcu_dereference(from->rt6i_exception_bucket);
1625 #ifdef CONFIG_IPV6_SUBTREES
1626 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1627 * and exception table is indexed by a hash of
1628 * both rt6i_dst and rt6i_src.
1629 * Otherwise, the exception table is indexed by
1630 * a hash of only rt6i_dst.
1632 if (from->fib6_src.plen)
1633 src_key = &rt->rt6i_src.addr;
1635 rt6_ex = __rt6_find_exception_rcu(&bucket,
1639 rt6_ex->stamp = jiffies;
1645 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1646 struct rt6_info *rt, int mtu)
1648 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1649 * lowest MTU in the path: always allow updating the route PMTU to
1650 * reflect PMTU decreases.
1652 * If the new MTU is higher, and the route PMTU is equal to the local
1653 * MTU, this means the old MTU is the lowest in the path, so allow
1654 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1658 if (dst_mtu(&rt->dst) >= mtu)
1661 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1667 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1668 struct fib6_info *rt, int mtu)
1670 struct rt6_exception_bucket *bucket;
1671 struct rt6_exception *rt6_ex;
1674 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1675 lockdep_is_held(&rt6_exception_lock));
1680 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1681 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1682 struct rt6_info *entry = rt6_ex->rt6i;
1684 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1685 * route), the metrics of its rt->from have already
1688 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1689 rt6_mtu_change_route_allowed(idev, entry, mtu))
1690 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1696 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1698 static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1699 struct in6_addr *gateway)
1701 struct rt6_exception_bucket *bucket;
1702 struct rt6_exception *rt6_ex;
1703 struct hlist_node *tmp;
1706 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1709 spin_lock_bh(&rt6_exception_lock);
1710 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1711 lockdep_is_held(&rt6_exception_lock));
1714 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1715 hlist_for_each_entry_safe(rt6_ex, tmp,
1716 &bucket->chain, hlist) {
1717 struct rt6_info *entry = rt6_ex->rt6i;
1719 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1720 RTF_CACHE_GATEWAY &&
1721 ipv6_addr_equal(gateway,
1722 &entry->rt6i_gateway)) {
1723 rt6_remove_exception(bucket, rt6_ex);
1730 spin_unlock_bh(&rt6_exception_lock);
1733 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1734 struct rt6_exception *rt6_ex,
1735 struct fib6_gc_args *gc_args,
1738 struct rt6_info *rt = rt6_ex->rt6i;
1740 /* we are pruning and obsoleting aged-out and non gateway exceptions
1741 * even if others have still references to them, so that on next
1742 * dst_check() such references can be dropped.
1743 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1744 * expired, independently from their aging, as per RFC 8201 section 4
1746 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1747 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1748 RT6_TRACE("aging clone %p\n", rt);
1749 rt6_remove_exception(bucket, rt6_ex);
1752 } else if (time_after(jiffies, rt->dst.expires)) {
1753 RT6_TRACE("purging expired route %p\n", rt);
1754 rt6_remove_exception(bucket, rt6_ex);
1758 if (rt->rt6i_flags & RTF_GATEWAY) {
1759 struct neighbour *neigh;
1760 __u8 neigh_flags = 0;
1762 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1764 neigh_flags = neigh->flags;
1766 if (!(neigh_flags & NTF_ROUTER)) {
1767 RT6_TRACE("purging route %p via non-router but gateway\n",
1769 rt6_remove_exception(bucket, rt6_ex);
1777 void rt6_age_exceptions(struct fib6_info *rt,
1778 struct fib6_gc_args *gc_args,
1781 struct rt6_exception_bucket *bucket;
1782 struct rt6_exception *rt6_ex;
1783 struct hlist_node *tmp;
1786 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1790 spin_lock(&rt6_exception_lock);
1791 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1792 lockdep_is_held(&rt6_exception_lock));
1795 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1796 hlist_for_each_entry_safe(rt6_ex, tmp,
1797 &bucket->chain, hlist) {
1798 rt6_age_examine_exception(bucket, rt6_ex,
1804 spin_unlock(&rt6_exception_lock);
1805 rcu_read_unlock_bh();
1808 /* must be called with rcu lock held */
1809 struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1810 int oif, struct flowi6 *fl6, int strict)
1812 struct fib6_node *fn, *saved_fn;
1813 struct fib6_info *f6i;
1815 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1818 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1822 f6i = rt6_select(net, fn, oif, strict);
1823 if (f6i == net->ipv6.fib6_null_entry) {
1824 fn = fib6_backtrack(fn, &fl6->saddr);
1826 goto redo_rt6_select;
1827 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1828 /* also consider unreachable route */
1829 strict &= ~RT6_LOOKUP_F_REACHABLE;
1831 goto redo_rt6_select;
1835 trace_fib6_table_lookup(net, f6i, table, fl6);
1840 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1841 int oif, struct flowi6 *fl6,
1842 const struct sk_buff *skb, int flags)
1844 struct fib6_info *f6i;
1845 struct rt6_info *rt;
1848 strict |= flags & RT6_LOOKUP_F_IFACE;
1849 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1850 if (net->ipv6.devconf_all->forwarding == 0)
1851 strict |= RT6_LOOKUP_F_REACHABLE;
1855 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1856 if (f6i->fib6_nsiblings)
1857 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1859 if (f6i == net->ipv6.fib6_null_entry) {
1860 rt = net->ipv6.ip6_null_entry;
1866 /*Search through exception table */
1867 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1869 if (ip6_hold_safe(net, &rt))
1870 dst_use_noref(&rt->dst, jiffies);
1874 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1875 !(f6i->fib6_flags & RTF_GATEWAY))) {
1876 /* Create a RTF_CACHE clone which will not be
1877 * owned by the fib6 tree. It is for the special case where
1878 * the daddr in the skb during the neighbor look-up is different
1879 * from the fl6->daddr used to look-up route here.
1881 struct rt6_info *uncached_rt;
1883 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1888 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1889 * No need for another dst_hold()
1891 rt6_uncached_list_add(uncached_rt);
1892 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1894 uncached_rt = net->ipv6.ip6_null_entry;
1895 dst_hold(&uncached_rt->dst);
1900 /* Get a percpu copy */
1902 struct rt6_info *pcpu_rt;
1905 pcpu_rt = rt6_get_pcpu_route(f6i);
1908 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1916 EXPORT_SYMBOL_GPL(ip6_pol_route);
1918 static struct rt6_info *ip6_pol_route_input(struct net *net,
1919 struct fib6_table *table,
1921 const struct sk_buff *skb,
1924 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1927 struct dst_entry *ip6_route_input_lookup(struct net *net,
1928 struct net_device *dev,
1930 const struct sk_buff *skb,
1933 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1934 flags |= RT6_LOOKUP_F_IFACE;
1936 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1938 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1940 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1941 struct flow_keys *keys,
1942 struct flow_keys *flkeys)
1944 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1945 const struct ipv6hdr *key_iph = outer_iph;
1946 struct flow_keys *_flkeys = flkeys;
1947 const struct ipv6hdr *inner_iph;
1948 const struct icmp6hdr *icmph;
1949 struct ipv6hdr _inner_iph;
1950 struct icmp6hdr _icmph;
1952 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1955 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1956 sizeof(_icmph), &_icmph);
1960 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1961 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1962 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1963 icmph->icmp6_type != ICMPV6_PARAMPROB)
1966 inner_iph = skb_header_pointer(skb,
1967 skb_transport_offset(skb) + sizeof(*icmph),
1968 sizeof(_inner_iph), &_inner_iph);
1972 key_iph = inner_iph;
1976 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1977 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1978 keys->tags.flow_label = _flkeys->tags.flow_label;
1979 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1981 keys->addrs.v6addrs.src = key_iph->saddr;
1982 keys->addrs.v6addrs.dst = key_iph->daddr;
1983 keys->tags.flow_label = ip6_flowlabel(key_iph);
1984 keys->basic.ip_proto = key_iph->nexthdr;
1988 /* if skb is set it will be used and fl6 can be NULL */
1989 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1990 const struct sk_buff *skb, struct flow_keys *flkeys)
1992 struct flow_keys hash_keys;
1995 switch (ip6_multipath_hash_policy(net)) {
1997 memset(&hash_keys, 0, sizeof(hash_keys));
1998 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2000 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2002 hash_keys.addrs.v6addrs.src = fl6->saddr;
2003 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2004 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2005 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2010 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2011 struct flow_keys keys;
2013 /* short-circuit if we already have L4 hash present */
2015 return skb_get_hash_raw(skb) >> 1;
2017 memset(&hash_keys, 0, sizeof(hash_keys));
2020 skb_flow_dissect_flow_keys(skb, &keys, flag);
2023 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2024 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2025 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2026 hash_keys.ports.src = flkeys->ports.src;
2027 hash_keys.ports.dst = flkeys->ports.dst;
2028 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2030 memset(&hash_keys, 0, sizeof(hash_keys));
2031 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2032 hash_keys.addrs.v6addrs.src = fl6->saddr;
2033 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2034 hash_keys.ports.src = fl6->fl6_sport;
2035 hash_keys.ports.dst = fl6->fl6_dport;
2036 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2040 mhash = flow_hash_from_keys(&hash_keys);
2045 void ip6_route_input(struct sk_buff *skb)
2047 const struct ipv6hdr *iph = ipv6_hdr(skb);
2048 struct net *net = dev_net(skb->dev);
2049 int flags = RT6_LOOKUP_F_HAS_SADDR;
2050 struct ip_tunnel_info *tun_info;
2051 struct flowi6 fl6 = {
2052 .flowi6_iif = skb->dev->ifindex,
2053 .daddr = iph->daddr,
2054 .saddr = iph->saddr,
2055 .flowlabel = ip6_flowinfo(iph),
2056 .flowi6_mark = skb->mark,
2057 .flowi6_proto = iph->nexthdr,
2059 struct flow_keys *flkeys = NULL, _flkeys;
2061 tun_info = skb_tunnel_info(skb);
2062 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2063 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2065 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2068 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2069 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2072 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2075 static struct rt6_info *ip6_pol_route_output(struct net *net,
2076 struct fib6_table *table,
2078 const struct sk_buff *skb,
2081 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2084 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2085 struct flowi6 *fl6, int flags)
2089 if (ipv6_addr_type(&fl6->daddr) &
2090 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2091 struct dst_entry *dst;
2093 dst = l3mdev_link_scope_lookup(net, fl6);
2098 fl6->flowi6_iif = LOOPBACK_IFINDEX;
2100 any_src = ipv6_addr_any(&fl6->saddr);
2101 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2102 (fl6->flowi6_oif && any_src))
2103 flags |= RT6_LOOKUP_F_IFACE;
2106 flags |= RT6_LOOKUP_F_HAS_SADDR;
2108 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2110 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2112 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2114 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2116 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2117 struct net_device *loopback_dev = net->loopback_dev;
2118 struct dst_entry *new = NULL;
2120 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2121 DST_OBSOLETE_DEAD, 0);
2124 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2128 new->input = dst_discard;
2129 new->output = dst_discard_out;
2131 dst_copy_metrics(new, &ort->dst);
2133 rt->rt6i_idev = in6_dev_get(loopback_dev);
2134 rt->rt6i_gateway = ort->rt6i_gateway;
2135 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2137 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2138 #ifdef CONFIG_IPV6_SUBTREES
2139 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2143 dst_release(dst_orig);
2144 return new ? new : ERR_PTR(-ENOMEM);
2148 * Destination cache support functions
2151 static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2155 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2158 if (fib6_check_expired(f6i))
2164 static struct dst_entry *rt6_check(struct rt6_info *rt,
2165 struct fib6_info *from,
2170 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
2171 rt_cookie != cookie)
2174 if (rt6_check_expired(rt))
2180 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2181 struct fib6_info *from,
2184 if (!__rt6_check_expired(rt) &&
2185 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2186 fib6_check(from, cookie))
2192 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2194 struct dst_entry *dst_ret;
2195 struct fib6_info *from;
2196 struct rt6_info *rt;
2198 rt = container_of(dst, struct rt6_info, dst);
2202 /* All IPV6 dsts are created with ->obsolete set to the value
2203 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2204 * into this function always.
2207 from = rcu_dereference(rt->from);
2209 if (from && (rt->rt6i_flags & RTF_PCPU ||
2210 unlikely(!list_empty(&rt->rt6i_uncached))))
2211 dst_ret = rt6_dst_from_check(rt, from, cookie);
2213 dst_ret = rt6_check(rt, from, cookie);
2220 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2222 struct rt6_info *rt = (struct rt6_info *) dst;
2225 if (rt->rt6i_flags & RTF_CACHE) {
2227 if (rt6_check_expired(rt)) {
2228 rt6_remove_exception_rt(rt);
2240 static void ip6_link_failure(struct sk_buff *skb)
2242 struct rt6_info *rt;
2244 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2246 rt = (struct rt6_info *) skb_dst(skb);
2249 if (rt->rt6i_flags & RTF_CACHE) {
2250 rt6_remove_exception_rt(rt);
2252 struct fib6_info *from;
2253 struct fib6_node *fn;
2255 from = rcu_dereference(rt->from);
2257 fn = rcu_dereference(from->fib6_node);
2258 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2266 static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2268 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2269 struct fib6_info *from;
2272 from = rcu_dereference(rt0->from);
2274 rt0->dst.expires = from->expires;
2278 dst_set_expires(&rt0->dst, timeout);
2279 rt0->rt6i_flags |= RTF_EXPIRES;
2282 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2284 struct net *net = dev_net(rt->dst.dev);
2286 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2287 rt->rt6i_flags |= RTF_MODIFIED;
2288 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2291 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2293 return !(rt->rt6i_flags & RTF_CACHE) &&
2294 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2297 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2298 const struct ipv6hdr *iph, u32 mtu)
2300 const struct in6_addr *daddr, *saddr;
2301 struct rt6_info *rt6 = (struct rt6_info *)dst;
2303 if (dst_metric_locked(dst, RTAX_MTU))
2307 daddr = &iph->daddr;
2308 saddr = &iph->saddr;
2310 daddr = &sk->sk_v6_daddr;
2311 saddr = &inet6_sk(sk)->saddr;
2316 dst_confirm_neigh(dst, daddr);
2317 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2318 if (mtu >= dst_mtu(dst))
2321 if (!rt6_cache_allowed_for_pmtu(rt6)) {
2322 rt6_do_update_pmtu(rt6, mtu);
2323 /* update rt6_ex->stamp for cache */
2324 if (rt6->rt6i_flags & RTF_CACHE)
2325 rt6_update_exception_stamp_rt(rt6);
2327 struct fib6_info *from;
2328 struct rt6_info *nrt6;
2331 from = rcu_dereference(rt6->from);
2332 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
2334 rt6_do_update_pmtu(nrt6, mtu);
2335 if (rt6_insert_exception(nrt6, from))
2336 dst_release_immediate(&nrt6->dst);
2342 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2343 struct sk_buff *skb, u32 mtu)
2345 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2348 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2349 int oif, u32 mark, kuid_t uid)
2351 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2352 struct dst_entry *dst;
2353 struct flowi6 fl6 = {
2355 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2356 .daddr = iph->daddr,
2357 .saddr = iph->saddr,
2358 .flowlabel = ip6_flowinfo(iph),
2362 dst = ip6_route_output(net, NULL, &fl6);
2364 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2367 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2369 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2371 int oif = sk->sk_bound_dev_if;
2372 struct dst_entry *dst;
2374 if (!oif && skb->dev)
2375 oif = l3mdev_master_ifindex(skb->dev);
2377 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2379 dst = __sk_dst_get(sk);
2380 if (!dst || !dst->obsolete ||
2381 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2385 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2386 ip6_datagram_dst_update(sk, false);
2389 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2391 void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2392 const struct flowi6 *fl6)
2394 #ifdef CONFIG_IPV6_SUBTREES
2395 struct ipv6_pinfo *np = inet6_sk(sk);
2398 ip6_dst_store(sk, dst,
2399 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2400 &sk->sk_v6_daddr : NULL,
2401 #ifdef CONFIG_IPV6_SUBTREES
2402 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2408 /* Handle redirects */
2409 struct ip6rd_flowi {
2411 struct in6_addr gateway;
2414 static struct rt6_info *__ip6_route_redirect(struct net *net,
2415 struct fib6_table *table,
2417 const struct sk_buff *skb,
2420 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2421 struct rt6_info *ret = NULL, *rt_cache;
2422 struct fib6_info *rt;
2423 struct fib6_node *fn;
2425 /* Get the "current" route for this destination and
2426 * check if the redirect has come from appropriate router.
2428 * RFC 4861 specifies that redirects should only be
2429 * accepted if they come from the nexthop to the target.
2430 * Due to the way the routes are chosen, this notion
2431 * is a bit fuzzy and one might need to check all possible
2436 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2438 for_each_fib6_node_rt_rcu(fn) {
2439 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
2441 if (fib6_check_expired(rt))
2443 if (rt->fib6_flags & RTF_REJECT)
2445 if (!(rt->fib6_flags & RTF_GATEWAY))
2447 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2449 /* rt_cache's gateway might be different from its 'parent'
2450 * in the case of an ip redirect.
2451 * So we keep searching in the exception table if the gateway
2454 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2455 rt_cache = rt6_find_cached_rt(rt,
2459 ipv6_addr_equal(&rdfl->gateway,
2460 &rt_cache->rt6i_gateway)) {
2470 rt = net->ipv6.fib6_null_entry;
2471 else if (rt->fib6_flags & RTF_REJECT) {
2472 ret = net->ipv6.ip6_null_entry;
2476 if (rt == net->ipv6.fib6_null_entry) {
2477 fn = fib6_backtrack(fn, &fl6->saddr);
2484 ip6_hold_safe(net, &ret);
2486 ret = ip6_create_rt_rcu(rt);
2490 trace_fib6_table_lookup(net, rt, table, fl6);
2494 static struct dst_entry *ip6_route_redirect(struct net *net,
2495 const struct flowi6 *fl6,
2496 const struct sk_buff *skb,
2497 const struct in6_addr *gateway)
2499 int flags = RT6_LOOKUP_F_HAS_SADDR;
2500 struct ip6rd_flowi rdfl;
2503 rdfl.gateway = *gateway;
2505 return fib6_rule_lookup(net, &rdfl.fl6, skb,
2506 flags, __ip6_route_redirect);
2509 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2512 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2513 struct dst_entry *dst;
2514 struct flowi6 fl6 = {
2515 .flowi6_iif = LOOPBACK_IFINDEX,
2517 .flowi6_mark = mark,
2518 .daddr = iph->daddr,
2519 .saddr = iph->saddr,
2520 .flowlabel = ip6_flowinfo(iph),
2524 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2525 rt6_do_redirect(dst, NULL, skb);
2528 EXPORT_SYMBOL_GPL(ip6_redirect);
2530 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
2532 const struct ipv6hdr *iph = ipv6_hdr(skb);
2533 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2534 struct dst_entry *dst;
2535 struct flowi6 fl6 = {
2536 .flowi6_iif = LOOPBACK_IFINDEX,
2539 .saddr = iph->daddr,
2540 .flowi6_uid = sock_net_uid(net, NULL),
2543 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2544 rt6_do_redirect(dst, NULL, skb);
2548 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2550 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2553 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2555 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2557 struct net_device *dev = dst->dev;
2558 unsigned int mtu = dst_mtu(dst);
2559 struct net *net = dev_net(dev);
2561 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2563 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2564 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2567 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2568 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2569 * IPV6_MAXPLEN is also valid and means: "any MSS,
2570 * rely only on pmtu discovery"
2572 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2577 static unsigned int ip6_mtu(const struct dst_entry *dst)
2579 struct inet6_dev *idev;
2582 mtu = dst_metric_raw(dst, RTAX_MTU);
2589 idev = __in6_dev_get(dst->dev);
2591 mtu = idev->cnf.mtu6;
2595 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2597 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2601 * 1. mtu on route is locked - use it
2602 * 2. mtu from nexthop exception
2603 * 3. mtu from egress device
2605 * based on ip6_dst_mtu_forward and exception logic of
2606 * rt6_find_cached_rt; called with rcu_read_lock
2608 u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2609 struct in6_addr *saddr)
2611 struct rt6_exception_bucket *bucket;
2612 struct rt6_exception *rt6_ex;
2613 struct in6_addr *src_key;
2614 struct inet6_dev *idev;
2617 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2618 mtu = f6i->fib6_pmtu;
2624 #ifdef CONFIG_IPV6_SUBTREES
2625 if (f6i->fib6_src.plen)
2629 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2630 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2631 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2632 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2635 struct net_device *dev = fib6_info_nh_dev(f6i);
2638 idev = __in6_dev_get(dev);
2639 if (idev && idev->cnf.mtu6 > mtu)
2640 mtu = idev->cnf.mtu6;
2643 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2645 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2648 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2651 struct dst_entry *dst;
2652 struct rt6_info *rt;
2653 struct inet6_dev *idev = in6_dev_get(dev);
2654 struct net *net = dev_net(dev);
2656 if (unlikely(!idev))
2657 return ERR_PTR(-ENODEV);
2659 rt = ip6_dst_alloc(net, dev, 0);
2660 if (unlikely(!rt)) {
2662 dst = ERR_PTR(-ENOMEM);
2666 rt->dst.flags |= DST_HOST;
2667 rt->dst.input = ip6_input;
2668 rt->dst.output = ip6_output;
2669 rt->rt6i_gateway = fl6->daddr;
2670 rt->rt6i_dst.addr = fl6->daddr;
2671 rt->rt6i_dst.plen = 128;
2672 rt->rt6i_idev = idev;
2673 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2675 /* Add this dst into uncached_list so that rt6_disable_ip() can
2676 * do proper release of the net_device
2678 rt6_uncached_list_add(rt);
2679 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2681 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2687 static int ip6_dst_gc(struct dst_ops *ops)
2689 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2690 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2691 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2692 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2693 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2694 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2697 entries = dst_entries_get_fast(ops);
2698 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2699 entries <= rt_max_size)
2702 net->ipv6.ip6_rt_gc_expire++;
2703 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2704 entries = dst_entries_get_slow(ops);
2705 if (entries < ops->gc_thresh)
2706 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2708 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2709 return entries > rt_max_size;
2712 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2713 struct fib6_config *cfg,
2714 const struct in6_addr *gw_addr,
2715 u32 tbid, int flags)
2717 struct flowi6 fl6 = {
2718 .flowi6_oif = cfg->fc_ifindex,
2720 .saddr = cfg->fc_prefsrc,
2722 struct fib6_table *table;
2723 struct rt6_info *rt;
2725 table = fib6_get_table(net, tbid);
2729 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2730 flags |= RT6_LOOKUP_F_HAS_SADDR;
2732 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2733 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2735 /* if table lookup failed, fall back to full lookup */
2736 if (rt == net->ipv6.ip6_null_entry) {
2744 static int ip6_route_check_nh_onlink(struct net *net,
2745 struct fib6_config *cfg,
2746 const struct net_device *dev,
2747 struct netlink_ext_ack *extack)
2749 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2750 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2751 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2752 struct fib6_info *from;
2753 struct rt6_info *grt;
2757 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2760 from = rcu_dereference(grt->from);
2761 if (!grt->dst.error &&
2762 /* ignore match if it is the default route */
2763 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
2764 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2765 NL_SET_ERR_MSG(extack,
2766 "Nexthop has invalid gateway or device mismatch");
2777 static int ip6_route_check_nh(struct net *net,
2778 struct fib6_config *cfg,
2779 struct net_device **_dev,
2780 struct inet6_dev **idev)
2782 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2783 struct net_device *dev = _dev ? *_dev : NULL;
2784 struct rt6_info *grt = NULL;
2785 int err = -EHOSTUNREACH;
2787 if (cfg->fc_table) {
2788 int flags = RT6_LOOKUP_F_IFACE;
2790 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2791 cfg->fc_table, flags);
2793 if (grt->rt6i_flags & RTF_GATEWAY ||
2794 (dev && dev != grt->dst.dev)) {
2802 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2808 if (dev != grt->dst.dev) {
2813 *_dev = dev = grt->dst.dev;
2814 *idev = grt->rt6i_idev;
2816 in6_dev_hold(grt->rt6i_idev);
2819 if (!(grt->rt6i_flags & RTF_GATEWAY))
2828 static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2829 struct net_device **_dev, struct inet6_dev **idev,
2830 struct netlink_ext_ack *extack)
2832 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2833 int gwa_type = ipv6_addr_type(gw_addr);
2834 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2835 const struct net_device *dev = *_dev;
2836 bool need_addr_check = !dev;
2839 /* if gw_addr is local we will fail to detect this in case
2840 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2841 * will return already-added prefix route via interface that
2842 * prefix route was assigned to, which might be non-loopback.
2845 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2846 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2850 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2851 /* IPv6 strictly inhibits using not link-local
2852 * addresses as nexthop address.
2853 * Otherwise, router will not able to send redirects.
2854 * It is very good, but in some (rare!) circumstances
2855 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2856 * some exceptions. --ANK
2857 * We allow IPv4-mapped nexthops to support RFC4798-type
2860 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2861 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2865 if (cfg->fc_flags & RTNH_F_ONLINK)
2866 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2868 err = ip6_route_check_nh(net, cfg, _dev, idev);
2874 /* reload in case device was changed */
2879 NL_SET_ERR_MSG(extack, "Egress device not specified");
2881 } else if (dev->flags & IFF_LOOPBACK) {
2882 NL_SET_ERR_MSG(extack,
2883 "Egress device can not be loopback device for this route");
2887 /* if we did not check gw_addr above, do so now that the
2888 * egress device has been resolved.
2890 if (need_addr_check &&
2891 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2892 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2901 static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
2903 if ((flags & RTF_REJECT) ||
2904 (dev && (dev->flags & IFF_LOOPBACK) &&
2905 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2906 !(flags & RTF_LOCAL)))
2912 int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
2913 struct fib6_config *cfg, gfp_t gfp_flags,
2914 struct netlink_ext_ack *extack)
2916 struct net_device *dev = NULL;
2917 struct inet6_dev *idev = NULL;
2922 if (cfg->fc_ifindex) {
2923 dev = dev_get_by_index(net, cfg->fc_ifindex);
2926 idev = in6_dev_get(dev);
2931 if (cfg->fc_flags & RTNH_F_ONLINK) {
2933 NL_SET_ERR_MSG(extack,
2934 "Nexthop device required for onlink");
2938 if (!(dev->flags & IFF_UP)) {
2939 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2944 fib6_nh->nh_flags |= RTNH_F_ONLINK;
2947 if (cfg->fc_encap) {
2948 struct lwtunnel_state *lwtstate;
2950 err = lwtunnel_build_state(cfg->fc_encap_type,
2951 cfg->fc_encap, AF_INET6, cfg,
2956 fib6_nh->nh_lwtstate = lwtstate_get(lwtstate);
2959 fib6_nh->nh_weight = 1;
2961 /* We cannot add true routes via loopback here,
2962 * they would result in kernel looping; promote them to reject routes
2964 addr_type = ipv6_addr_type(&cfg->fc_dst);
2965 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
2966 /* hold loopback dev/idev if we haven't done so. */
2967 if (dev != net->loopback_dev) {
2972 dev = net->loopback_dev;
2974 idev = in6_dev_get(dev);
2983 if (cfg->fc_flags & RTF_GATEWAY) {
2984 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2988 fib6_nh->nh_gw = cfg->fc_gateway;
2995 if (idev->cnf.disable_ipv6) {
2996 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3001 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3002 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3007 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3008 !netif_carrier_ok(dev))
3009 fib6_nh->nh_flags |= RTNH_F_LINKDOWN;
3012 fib6_nh->nh_dev = dev;
3019 lwtstate_put(fib6_nh->nh_lwtstate);
3020 fib6_nh->nh_lwtstate = NULL;
3028 void fib6_nh_release(struct fib6_nh *fib6_nh)
3030 lwtstate_put(fib6_nh->nh_lwtstate);
3032 if (fib6_nh->nh_dev)
3033 dev_put(fib6_nh->nh_dev);
3036 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3038 struct netlink_ext_ack *extack)
3040 struct net *net = cfg->fc_nlinfo.nl_net;
3041 struct fib6_info *rt = NULL;
3042 struct fib6_table *table;
3046 /* RTF_PCPU is an internal flag; can not be set by userspace */
3047 if (cfg->fc_flags & RTF_PCPU) {
3048 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3052 /* RTF_CACHE is an internal flag; can not be set by userspace */
3053 if (cfg->fc_flags & RTF_CACHE) {
3054 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3058 if (cfg->fc_type > RTN_MAX) {
3059 NL_SET_ERR_MSG(extack, "Invalid route type");
3063 if (cfg->fc_dst_len > 128) {
3064 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3067 if (cfg->fc_src_len > 128) {
3068 NL_SET_ERR_MSG(extack, "Invalid source address length");
3071 #ifndef CONFIG_IPV6_SUBTREES
3072 if (cfg->fc_src_len) {
3073 NL_SET_ERR_MSG(extack,
3074 "Specifying source address requires IPV6_SUBTREES to be enabled");
3080 if (cfg->fc_nlinfo.nlh &&
3081 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3082 table = fib6_get_table(net, cfg->fc_table);
3084 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3085 table = fib6_new_table(net, cfg->fc_table);
3088 table = fib6_new_table(net, cfg->fc_table);
3095 rt = fib6_info_alloc(gfp_flags);
3099 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3101 if (IS_ERR(rt->fib6_metrics)) {
3102 err = PTR_ERR(rt->fib6_metrics);
3103 /* Do not leave garbage there. */
3104 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3108 if (cfg->fc_flags & RTF_ADDRCONF)
3109 rt->dst_nocount = true;
3111 if (cfg->fc_flags & RTF_EXPIRES)
3112 fib6_set_expires(rt, jiffies +
3113 clock_t_to_jiffies(cfg->fc_expires));
3115 fib6_clean_expires(rt);
3117 if (cfg->fc_protocol == RTPROT_UNSPEC)
3118 cfg->fc_protocol = RTPROT_BOOT;
3119 rt->fib6_protocol = cfg->fc_protocol;
3121 rt->fib6_table = table;
3122 rt->fib6_metric = cfg->fc_metric;
3123 rt->fib6_type = cfg->fc_type;
3124 rt->fib6_flags = cfg->fc_flags;
3126 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3127 rt->fib6_dst.plen = cfg->fc_dst_len;
3128 if (rt->fib6_dst.plen == 128)
3129 rt->dst_host = true;
3131 #ifdef CONFIG_IPV6_SUBTREES
3132 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3133 rt->fib6_src.plen = cfg->fc_src_len;
3135 err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
3139 /* We cannot add true routes via loopback here,
3140 * they would result in kernel looping; promote them to reject routes
3142 addr_type = ipv6_addr_type(&cfg->fc_dst);
3143 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.nh_dev, addr_type))
3144 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3146 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3147 struct net_device *dev = fib6_info_nh_dev(rt);
3149 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3150 NL_SET_ERR_MSG(extack, "Invalid source address");
3154 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3155 rt->fib6_prefsrc.plen = 128;
3157 rt->fib6_prefsrc.plen = 0;
3161 fib6_info_release(rt);
3162 return ERR_PTR(err);
3165 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3166 struct netlink_ext_ack *extack)
3168 struct fib6_info *rt;
3171 rt = ip6_route_info_create(cfg, gfp_flags, extack);
3175 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3176 fib6_info_release(rt);
3181 static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3183 struct net *net = info->nl_net;
3184 struct fib6_table *table;
3187 if (rt == net->ipv6.fib6_null_entry) {
3192 table = rt->fib6_table;
3193 spin_lock_bh(&table->tb6_lock);
3194 err = fib6_del(rt, info);
3195 spin_unlock_bh(&table->tb6_lock);
3198 fib6_info_release(rt);
3202 int ip6_del_rt(struct net *net, struct fib6_info *rt)
3204 struct nl_info info = { .nl_net = net };
3206 return __ip6_del_rt(rt, &info);
3209 static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3211 struct nl_info *info = &cfg->fc_nlinfo;
3212 struct net *net = info->nl_net;
3213 struct sk_buff *skb = NULL;
3214 struct fib6_table *table;
3217 if (rt == net->ipv6.fib6_null_entry)
3219 table = rt->fib6_table;
3220 spin_lock_bh(&table->tb6_lock);
3222 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3223 struct fib6_info *sibling, *next_sibling;
3225 /* prefer to send a single notification with all hops */
3226 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3228 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3230 if (rt6_fill_node(net, skb, rt, NULL,
3231 NULL, NULL, 0, RTM_DELROUTE,
3232 info->portid, seq, 0) < 0) {
3236 info->skip_notify = 1;
3239 list_for_each_entry_safe(sibling, next_sibling,
3242 err = fib6_del(sibling, info);
3248 err = fib6_del(rt, info);
3250 spin_unlock_bh(&table->tb6_lock);
3252 fib6_info_release(rt);
3255 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3256 info->nlh, gfp_any());
3261 static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3265 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3268 if (cfg->fc_flags & RTF_GATEWAY &&
3269 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3272 rc = rt6_remove_exception_rt(rt);
3277 static int ip6_route_del(struct fib6_config *cfg,
3278 struct netlink_ext_ack *extack)
3280 struct rt6_info *rt_cache;
3281 struct fib6_table *table;
3282 struct fib6_info *rt;
3283 struct fib6_node *fn;
3286 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3288 NL_SET_ERR_MSG(extack, "FIB table does not exist");
3294 fn = fib6_locate(&table->tb6_root,
3295 &cfg->fc_dst, cfg->fc_dst_len,
3296 &cfg->fc_src, cfg->fc_src_len,
3297 !(cfg->fc_flags & RTF_CACHE));
3300 for_each_fib6_node_rt_rcu(fn) {
3301 if (cfg->fc_flags & RTF_CACHE) {
3304 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3307 rc = ip6_del_cached_rt(rt_cache, cfg);
3315 if (cfg->fc_ifindex &&
3316 (!rt->fib6_nh.nh_dev ||
3317 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
3319 if (cfg->fc_flags & RTF_GATEWAY &&
3320 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
3322 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3324 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3326 if (!fib6_info_hold_safe(rt))
3330 /* if gateway was specified only delete the one hop */
3331 if (cfg->fc_flags & RTF_GATEWAY)
3332 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3334 return __ip6_del_rt_siblings(rt, cfg);
3342 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3344 struct netevent_redirect netevent;
3345 struct rt6_info *rt, *nrt = NULL;
3346 struct ndisc_options ndopts;
3347 struct inet6_dev *in6_dev;
3348 struct neighbour *neigh;
3349 struct fib6_info *from;
3351 int optlen, on_link;
3354 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3355 optlen -= sizeof(*msg);
3358 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3362 msg = (struct rd_msg *)icmp6_hdr(skb);
3364 if (ipv6_addr_is_multicast(&msg->dest)) {
3365 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3370 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3372 } else if (ipv6_addr_type(&msg->target) !=
3373 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3374 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3378 in6_dev = __in6_dev_get(skb->dev);
3381 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3385 * The IP source address of the Redirect MUST be the same as the current
3386 * first-hop router for the specified ICMP Destination Address.
3389 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3390 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3395 if (ndopts.nd_opts_tgt_lladdr) {
3396 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3399 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3404 rt = (struct rt6_info *) dst;
3405 if (rt->rt6i_flags & RTF_REJECT) {
3406 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3410 /* Redirect received -> path was valid.
3411 * Look, redirects are sent only in response to data packets,
3412 * so that this nexthop apparently is reachable. --ANK
3414 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3416 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3421 * We have finally decided to accept it.
3424 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3425 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3426 NEIGH_UPDATE_F_OVERRIDE|
3427 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3428 NEIGH_UPDATE_F_ISROUTER)),
3429 NDISC_REDIRECT, &ndopts);
3432 from = rcu_dereference(rt->from);
3433 /* This fib6_info_hold() is safe here because we hold reference to rt
3434 * and rt already holds reference to fib6_info.
3436 fib6_info_hold(from);
3439 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
3443 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3445 nrt->rt6i_flags &= ~RTF_GATEWAY;
3447 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3449 /* No need to remove rt from the exception table if rt is
3450 * a cached route because rt6_insert_exception() will
3453 if (rt6_insert_exception(nrt, from)) {
3454 dst_release_immediate(&nrt->dst);
3458 netevent.old = &rt->dst;
3459 netevent.new = &nrt->dst;
3460 netevent.daddr = &msg->dest;
3461 netevent.neigh = neigh;
3462 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3465 fib6_info_release(from);
3466 neigh_release(neigh);
3469 #ifdef CONFIG_IPV6_ROUTE_INFO
3470 static struct fib6_info *rt6_get_route_info(struct net *net,
3471 const struct in6_addr *prefix, int prefixlen,
3472 const struct in6_addr *gwaddr,
3473 struct net_device *dev)
3475 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3476 int ifindex = dev->ifindex;
3477 struct fib6_node *fn;
3478 struct fib6_info *rt = NULL;
3479 struct fib6_table *table;
3481 table = fib6_get_table(net, tb_id);
3486 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3490 for_each_fib6_node_rt_rcu(fn) {
3491 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
3493 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3495 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
3497 if (!fib6_info_hold_safe(rt))
3506 static struct fib6_info *rt6_add_route_info(struct net *net,
3507 const struct in6_addr *prefix, int prefixlen,
3508 const struct in6_addr *gwaddr,
3509 struct net_device *dev,
3512 struct fib6_config cfg = {
3513 .fc_metric = IP6_RT_PRIO_USER,
3514 .fc_ifindex = dev->ifindex,
3515 .fc_dst_len = prefixlen,
3516 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3517 RTF_UP | RTF_PREF(pref),
3518 .fc_protocol = RTPROT_RA,
3519 .fc_type = RTN_UNICAST,
3520 .fc_nlinfo.portid = 0,
3521 .fc_nlinfo.nlh = NULL,
3522 .fc_nlinfo.nl_net = net,
3525 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3526 cfg.fc_dst = *prefix;
3527 cfg.fc_gateway = *gwaddr;
3529 /* We should treat it as a default route if prefix length is 0. */
3531 cfg.fc_flags |= RTF_DEFAULT;
3533 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3535 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3539 struct fib6_info *rt6_get_dflt_router(struct net *net,
3540 const struct in6_addr *addr,
3541 struct net_device *dev)
3543 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3544 struct fib6_info *rt;
3545 struct fib6_table *table;
3547 table = fib6_get_table(net, tb_id);
3552 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3553 if (dev == rt->fib6_nh.nh_dev &&
3554 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3555 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
3558 if (rt && !fib6_info_hold_safe(rt))
3564 struct fib6_info *rt6_add_dflt_router(struct net *net,
3565 const struct in6_addr *gwaddr,
3566 struct net_device *dev,
3569 struct fib6_config cfg = {
3570 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3571 .fc_metric = IP6_RT_PRIO_USER,
3572 .fc_ifindex = dev->ifindex,
3573 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3574 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3575 .fc_protocol = RTPROT_RA,
3576 .fc_type = RTN_UNICAST,
3577 .fc_nlinfo.portid = 0,
3578 .fc_nlinfo.nlh = NULL,
3579 .fc_nlinfo.nl_net = net,
3582 cfg.fc_gateway = *gwaddr;
3584 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3585 struct fib6_table *table;
3587 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3589 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3592 return rt6_get_dflt_router(net, gwaddr, dev);
3595 static void __rt6_purge_dflt_routers(struct net *net,
3596 struct fib6_table *table)
3598 struct fib6_info *rt;
3602 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3603 struct net_device *dev = fib6_info_nh_dev(rt);
3604 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3606 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3607 (!idev || idev->cnf.accept_ra != 2) &&
3608 fib6_info_hold_safe(rt)) {
3610 ip6_del_rt(net, rt);
3616 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3619 void rt6_purge_dflt_routers(struct net *net)
3621 struct fib6_table *table;
3622 struct hlist_head *head;
3627 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3628 head = &net->ipv6.fib_table_hash[h];
3629 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3630 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3631 __rt6_purge_dflt_routers(net, table);
3638 static void rtmsg_to_fib6_config(struct net *net,
3639 struct in6_rtmsg *rtmsg,
3640 struct fib6_config *cfg)
3642 *cfg = (struct fib6_config){
3643 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3645 .fc_ifindex = rtmsg->rtmsg_ifindex,
3646 .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
3647 .fc_expires = rtmsg->rtmsg_info,
3648 .fc_dst_len = rtmsg->rtmsg_dst_len,
3649 .fc_src_len = rtmsg->rtmsg_src_len,
3650 .fc_flags = rtmsg->rtmsg_flags,
3651 .fc_type = rtmsg->rtmsg_type,
3653 .fc_nlinfo.nl_net = net,
3655 .fc_dst = rtmsg->rtmsg_dst,
3656 .fc_src = rtmsg->rtmsg_src,
3657 .fc_gateway = rtmsg->rtmsg_gateway,
3661 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3663 struct fib6_config cfg;
3664 struct in6_rtmsg rtmsg;
3668 case SIOCADDRT: /* Add a route */
3669 case SIOCDELRT: /* Delete a route */
3670 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3672 err = copy_from_user(&rtmsg, arg,
3673 sizeof(struct in6_rtmsg));
3677 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3682 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3685 err = ip6_route_del(&cfg, NULL);
3699 * Drop the packet on the floor
3702 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3705 struct dst_entry *dst = skb_dst(skb);
3706 switch (ipstats_mib_noroutes) {
3707 case IPSTATS_MIB_INNOROUTES:
3708 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3709 if (type == IPV6_ADDR_ANY) {
3710 IP6_INC_STATS(dev_net(dst->dev),
3711 __in6_dev_get_safely(skb->dev),
3712 IPSTATS_MIB_INADDRERRORS);
3716 case IPSTATS_MIB_OUTNOROUTES:
3717 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3718 ipstats_mib_noroutes);
3721 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3726 static int ip6_pkt_discard(struct sk_buff *skb)
3728 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3731 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3733 skb->dev = skb_dst(skb)->dev;
3734 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3737 static int ip6_pkt_prohibit(struct sk_buff *skb)
3739 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3742 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3744 skb->dev = skb_dst(skb)->dev;
3745 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3749 * Allocate a dst for local (unicast / anycast) address.
3752 struct fib6_info *addrconf_f6i_alloc(struct net *net,
3753 struct inet6_dev *idev,
3754 const struct in6_addr *addr,
3755 bool anycast, gfp_t gfp_flags)
3757 struct fib6_config cfg = {
3758 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3759 .fc_ifindex = idev->dev->ifindex,
3760 .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3763 .fc_protocol = RTPROT_KERNEL,
3764 .fc_nlinfo.nl_net = net,
3765 .fc_ignore_dev_down = true,
3769 cfg.fc_type = RTN_ANYCAST;
3770 cfg.fc_flags |= RTF_ANYCAST;
3772 cfg.fc_type = RTN_LOCAL;
3773 cfg.fc_flags |= RTF_LOCAL;
3776 return ip6_route_info_create(&cfg, gfp_flags, NULL);
3779 /* remove deleted ip from prefsrc entries */
3780 struct arg_dev_net_ip {
3781 struct net_device *dev;
3783 struct in6_addr *addr;
3786 static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3788 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3789 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3790 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3792 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3793 rt != net->ipv6.fib6_null_entry &&
3794 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3795 spin_lock_bh(&rt6_exception_lock);
3796 /* remove prefsrc entry */
3797 rt->fib6_prefsrc.plen = 0;
3798 spin_unlock_bh(&rt6_exception_lock);
3803 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3805 struct net *net = dev_net(ifp->idev->dev);
3806 struct arg_dev_net_ip adni = {
3807 .dev = ifp->idev->dev,
3811 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3814 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3816 /* Remove routers and update dst entries when gateway turn into host. */
3817 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3819 struct in6_addr *gateway = (struct in6_addr *)arg;
3821 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3822 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3826 /* Further clean up cached routes in exception table.
3827 * This is needed because cached route may have a different
3828 * gateway than its 'parent' in the case of an ip redirect.
3830 rt6_exceptions_clean_tohost(rt, gateway);
3835 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3837 fib6_clean_all(net, fib6_clean_tohost, gateway);
3840 struct arg_netdev_event {
3841 const struct net_device *dev;
3843 unsigned int nh_flags;
3844 unsigned long event;
3848 static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3850 struct fib6_info *iter;
3851 struct fib6_node *fn;
3853 fn = rcu_dereference_protected(rt->fib6_node,
3854 lockdep_is_held(&rt->fib6_table->tb6_lock));
3855 iter = rcu_dereference_protected(fn->leaf,
3856 lockdep_is_held(&rt->fib6_table->tb6_lock));
3858 if (iter->fib6_metric == rt->fib6_metric &&
3859 rt6_qualify_for_ecmp(iter))
3861 iter = rcu_dereference_protected(iter->fib6_next,
3862 lockdep_is_held(&rt->fib6_table->tb6_lock));
3868 static bool rt6_is_dead(const struct fib6_info *rt)
3870 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3871 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
3872 fib6_ignore_linkdown(rt)))
3878 static int rt6_multipath_total_weight(const struct fib6_info *rt)
3880 struct fib6_info *iter;
3883 if (!rt6_is_dead(rt))
3884 total += rt->fib6_nh.nh_weight;
3886 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3887 if (!rt6_is_dead(iter))
3888 total += iter->fib6_nh.nh_weight;
3894 static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3896 int upper_bound = -1;
3898 if (!rt6_is_dead(rt)) {
3899 *weight += rt->fib6_nh.nh_weight;
3900 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3903 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3906 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3908 struct fib6_info *iter;
3911 rt6_upper_bound_set(rt, &weight, total);
3913 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3914 rt6_upper_bound_set(iter, &weight, total);
3917 void rt6_multipath_rebalance(struct fib6_info *rt)
3919 struct fib6_info *first;
3922 /* In case the entire multipath route was marked for flushing,
3923 * then there is no need to rebalance upon the removal of every
3926 if (!rt->fib6_nsiblings || rt->should_flush)
3929 /* During lookup routes are evaluated in order, so we need to
3930 * make sure upper bounds are assigned from the first sibling
3933 first = rt6_multipath_first_sibling(rt);
3934 if (WARN_ON_ONCE(!first))
3937 total = rt6_multipath_total_weight(first);
3938 rt6_multipath_upper_bound_set(first, total);
3941 static int fib6_ifup(struct fib6_info *rt, void *p_arg)
3943 const struct arg_netdev_event *arg = p_arg;
3944 struct net *net = dev_net(arg->dev);
3946 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
3947 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
3948 fib6_update_sernum_upto_root(net, rt);
3949 rt6_multipath_rebalance(rt);
3955 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3957 struct arg_netdev_event arg = {
3960 .nh_flags = nh_flags,
3964 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3965 arg.nh_flags |= RTNH_F_LINKDOWN;
3967 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3970 static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
3971 const struct net_device *dev)
3973 struct fib6_info *iter;
3975 if (rt->fib6_nh.nh_dev == dev)
3977 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3978 if (iter->fib6_nh.nh_dev == dev)
3984 static void rt6_multipath_flush(struct fib6_info *rt)
3986 struct fib6_info *iter;
3988 rt->should_flush = 1;
3989 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3990 iter->should_flush = 1;
3993 static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
3994 const struct net_device *down_dev)
3996 struct fib6_info *iter;
3997 unsigned int dead = 0;
3999 if (rt->fib6_nh.nh_dev == down_dev ||
4000 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4002 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4003 if (iter->fib6_nh.nh_dev == down_dev ||
4004 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
4010 static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4011 const struct net_device *dev,
4012 unsigned int nh_flags)
4014 struct fib6_info *iter;
4016 if (rt->fib6_nh.nh_dev == dev)
4017 rt->fib6_nh.nh_flags |= nh_flags;
4018 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4019 if (iter->fib6_nh.nh_dev == dev)
4020 iter->fib6_nh.nh_flags |= nh_flags;
4023 /* called with write lock held for table with rt */
4024 static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4026 const struct arg_netdev_event *arg = p_arg;
4027 const struct net_device *dev = arg->dev;
4028 struct net *net = dev_net(dev);
4030 if (rt == net->ipv6.fib6_null_entry)
4033 switch (arg->event) {
4034 case NETDEV_UNREGISTER:
4035 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
4037 if (rt->should_flush)
4039 if (!rt->fib6_nsiblings)
4040 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
4041 if (rt6_multipath_uses_dev(rt, dev)) {
4044 count = rt6_multipath_dead_count(rt, dev);
4045 if (rt->fib6_nsiblings + 1 == count) {
4046 rt6_multipath_flush(rt);
4049 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4051 fib6_update_sernum(net, rt);
4052 rt6_multipath_rebalance(rt);
4056 if (rt->fib6_nh.nh_dev != dev ||
4057 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4059 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
4060 rt6_multipath_rebalance(rt);
4067 void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4069 struct arg_netdev_event arg = {
4075 struct net *net = dev_net(dev);
4077 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4078 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4080 fib6_clean_all(net, fib6_ifdown, &arg);
4083 void rt6_disable_ip(struct net_device *dev, unsigned long event)
4085 rt6_sync_down_dev(dev, event);
4086 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4087 neigh_ifdown(&nd_tbl, dev);
4090 struct rt6_mtu_change_arg {
4091 struct net_device *dev;
4095 static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4097 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4098 struct inet6_dev *idev;
4100 /* In IPv6 pmtu discovery is not optional,
4101 so that RTAX_MTU lock cannot disable it.
4102 We still use this lock to block changes
4103 caused by addrconf/ndisc.
4106 idev = __in6_dev_get(arg->dev);
4110 /* For administrative MTU increase, there is no way to discover
4111 IPv6 PMTU increase, so PMTU increase should be updated here.
4112 Since RFC 1981 doesn't include administrative MTU increase
4113 update PMTU increase is a MUST. (i.e. jumbo frame)
4115 if (rt->fib6_nh.nh_dev == arg->dev &&
4116 !fib6_metric_locked(rt, RTAX_MTU)) {
4117 u32 mtu = rt->fib6_pmtu;
4119 if (mtu >= arg->mtu ||
4120 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4121 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4123 spin_lock_bh(&rt6_exception_lock);
4124 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4125 spin_unlock_bh(&rt6_exception_lock);
4130 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4132 struct rt6_mtu_change_arg arg = {
4137 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4140 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4141 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
4142 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
4143 [RTA_OIF] = { .type = NLA_U32 },
4144 [RTA_IIF] = { .type = NLA_U32 },
4145 [RTA_PRIORITY] = { .type = NLA_U32 },
4146 [RTA_METRICS] = { .type = NLA_NESTED },
4147 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4148 [RTA_PREF] = { .type = NLA_U8 },
4149 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4150 [RTA_ENCAP] = { .type = NLA_NESTED },
4151 [RTA_EXPIRES] = { .type = NLA_U32 },
4152 [RTA_UID] = { .type = NLA_U32 },
4153 [RTA_MARK] = { .type = NLA_U32 },
4154 [RTA_TABLE] = { .type = NLA_U32 },
4155 [RTA_IP_PROTO] = { .type = NLA_U8 },
4156 [RTA_SPORT] = { .type = NLA_U16 },
4157 [RTA_DPORT] = { .type = NLA_U16 },
4160 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4161 struct fib6_config *cfg,
4162 struct netlink_ext_ack *extack)
4165 struct nlattr *tb[RTA_MAX+1];
4169 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4175 rtm = nlmsg_data(nlh);
4177 *cfg = (struct fib6_config){
4178 .fc_table = rtm->rtm_table,
4179 .fc_dst_len = rtm->rtm_dst_len,
4180 .fc_src_len = rtm->rtm_src_len,
4182 .fc_protocol = rtm->rtm_protocol,
4183 .fc_type = rtm->rtm_type,
4185 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4186 .fc_nlinfo.nlh = nlh,
4187 .fc_nlinfo.nl_net = sock_net(skb->sk),
4190 if (rtm->rtm_type == RTN_UNREACHABLE ||
4191 rtm->rtm_type == RTN_BLACKHOLE ||
4192 rtm->rtm_type == RTN_PROHIBIT ||
4193 rtm->rtm_type == RTN_THROW)
4194 cfg->fc_flags |= RTF_REJECT;
4196 if (rtm->rtm_type == RTN_LOCAL)
4197 cfg->fc_flags |= RTF_LOCAL;
4199 if (rtm->rtm_flags & RTM_F_CLONED)
4200 cfg->fc_flags |= RTF_CACHE;
4202 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4204 if (tb[RTA_GATEWAY]) {
4205 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4206 cfg->fc_flags |= RTF_GATEWAY;
4209 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4214 int plen = (rtm->rtm_dst_len + 7) >> 3;
4216 if (nla_len(tb[RTA_DST]) < plen)
4219 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4223 int plen = (rtm->rtm_src_len + 7) >> 3;
4225 if (nla_len(tb[RTA_SRC]) < plen)
4228 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4231 if (tb[RTA_PREFSRC])
4232 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4235 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4237 if (tb[RTA_PRIORITY])
4238 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4240 if (tb[RTA_METRICS]) {
4241 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4242 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4246 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4248 if (tb[RTA_MULTIPATH]) {
4249 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4250 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4252 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4253 cfg->fc_mp_len, extack);
4259 pref = nla_get_u8(tb[RTA_PREF]);
4260 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4261 pref != ICMPV6_ROUTER_PREF_HIGH)
4262 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4263 cfg->fc_flags |= RTF_PREF(pref);
4267 cfg->fc_encap = tb[RTA_ENCAP];
4269 if (tb[RTA_ENCAP_TYPE]) {
4270 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4272 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4277 if (tb[RTA_EXPIRES]) {
4278 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4280 if (addrconf_finite_timeout(timeout)) {
4281 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4282 cfg->fc_flags |= RTF_EXPIRES;
4292 struct fib6_info *fib6_info;
4293 struct fib6_config r_cfg;
4294 struct list_head next;
4297 static int ip6_route_info_append(struct net *net,
4298 struct list_head *rt6_nh_list,
4299 struct fib6_info *rt,
4300 struct fib6_config *r_cfg)
4305 list_for_each_entry(nh, rt6_nh_list, next) {
4306 /* check if fib6_info already exists */
4307 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4311 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4315 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4316 list_add_tail(&nh->next, rt6_nh_list);
4321 static void ip6_route_mpath_notify(struct fib6_info *rt,
4322 struct fib6_info *rt_last,
4323 struct nl_info *info,
4326 /* if this is an APPEND route, then rt points to the first route
4327 * inserted and rt_last points to last route inserted. Userspace
4328 * wants a consistent dump of the route which starts at the first
4329 * nexthop. Since sibling routes are always added at the end of
4330 * the list, find the first sibling of the last route appended
4332 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4333 rt = list_first_entry(&rt_last->fib6_siblings,
4339 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4342 static int ip6_route_multipath_add(struct fib6_config *cfg,
4343 struct netlink_ext_ack *extack)
4345 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4346 struct nl_info *info = &cfg->fc_nlinfo;
4347 struct fib6_config r_cfg;
4348 struct rtnexthop *rtnh;
4349 struct fib6_info *rt;
4350 struct rt6_nh *err_nh;
4351 struct rt6_nh *nh, *nh_safe;
4357 int replace = (cfg->fc_nlinfo.nlh &&
4358 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4359 LIST_HEAD(rt6_nh_list);
4361 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4362 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4363 nlflags |= NLM_F_APPEND;
4365 remaining = cfg->fc_mp_len;
4366 rtnh = (struct rtnexthop *)cfg->fc_mp;
4368 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4369 * fib6_info structs per nexthop
4371 while (rtnh_ok(rtnh, remaining)) {
4372 memcpy(&r_cfg, cfg, sizeof(*cfg));
4373 if (rtnh->rtnh_ifindex)
4374 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4376 attrlen = rtnh_attrlen(rtnh);
4378 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4380 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4382 r_cfg.fc_gateway = nla_get_in6_addr(nla);
4383 r_cfg.fc_flags |= RTF_GATEWAY;
4385 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4386 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4388 r_cfg.fc_encap_type = nla_get_u16(nla);
4391 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4392 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4398 if (!rt6_qualify_for_ecmp(rt)) {
4400 NL_SET_ERR_MSG(extack,
4401 "Device only routes can not be added for IPv6 using the multipath API.");
4402 fib6_info_release(rt);
4406 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4408 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4411 fib6_info_release(rt);
4415 rtnh = rtnh_next(rtnh, &remaining);
4418 /* for add and replace send one notification with all nexthops.
4419 * Skip the notification in fib6_add_rt2node and send one with
4420 * the full route when done
4422 info->skip_notify = 1;
4425 list_for_each_entry(nh, &rt6_nh_list, next) {
4426 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4427 fib6_info_release(nh->fib6_info);
4430 /* save reference to last route successfully inserted */
4431 rt_last = nh->fib6_info;
4433 /* save reference to first route for notification */
4435 rt_notif = nh->fib6_info;
4438 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4439 nh->fib6_info = NULL;
4442 NL_SET_ERR_MSG_MOD(extack,
4443 "multipath route replace failed (check consistency of installed routes)");
4448 /* Because each route is added like a single route we remove
4449 * these flags after the first nexthop: if there is a collision,
4450 * we have already failed to add the first nexthop:
4451 * fib6_add_rt2node() has rejected it; when replacing, old
4452 * nexthops have been replaced by first new, the rest should
4455 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4460 /* success ... tell user about new route */
4461 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4465 /* send notification for routes that were added so that
4466 * the delete notifications sent by ip6_route_del are
4470 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4472 /* Delete routes that were already added */
4473 list_for_each_entry(nh, &rt6_nh_list, next) {
4476 ip6_route_del(&nh->r_cfg, extack);
4480 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4482 fib6_info_release(nh->fib6_info);
4483 list_del(&nh->next);
4490 static int ip6_route_multipath_del(struct fib6_config *cfg,
4491 struct netlink_ext_ack *extack)
4493 struct fib6_config r_cfg;
4494 struct rtnexthop *rtnh;
4497 int err = 1, last_err = 0;
4499 remaining = cfg->fc_mp_len;
4500 rtnh = (struct rtnexthop *)cfg->fc_mp;
4502 /* Parse a Multipath Entry */
4503 while (rtnh_ok(rtnh, remaining)) {
4504 memcpy(&r_cfg, cfg, sizeof(*cfg));
4505 if (rtnh->rtnh_ifindex)
4506 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4508 attrlen = rtnh_attrlen(rtnh);
4510 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4512 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4514 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4515 r_cfg.fc_flags |= RTF_GATEWAY;
4518 err = ip6_route_del(&r_cfg, extack);
4522 rtnh = rtnh_next(rtnh, &remaining);
4528 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4529 struct netlink_ext_ack *extack)
4531 struct fib6_config cfg;
4534 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4539 return ip6_route_multipath_del(&cfg, extack);
4541 cfg.fc_delete_all_nh = 1;
4542 return ip6_route_del(&cfg, extack);
4546 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4547 struct netlink_ext_ack *extack)
4549 struct fib6_config cfg;
4552 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4556 if (cfg.fc_metric == 0)
4557 cfg.fc_metric = IP6_RT_PRIO_USER;
4560 return ip6_route_multipath_add(&cfg, extack);
4562 return ip6_route_add(&cfg, GFP_KERNEL, extack);
4565 static size_t rt6_nlmsg_size(struct fib6_info *rt)
4567 int nexthop_len = 0;
4569 if (rt->fib6_nsiblings) {
4570 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4571 + NLA_ALIGN(sizeof(struct rtnexthop))
4572 + nla_total_size(16) /* RTA_GATEWAY */
4573 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4575 nexthop_len *= rt->fib6_nsiblings;
4578 return NLMSG_ALIGN(sizeof(struct rtmsg))
4579 + nla_total_size(16) /* RTA_SRC */
4580 + nla_total_size(16) /* RTA_DST */
4581 + nla_total_size(16) /* RTA_GATEWAY */
4582 + nla_total_size(16) /* RTA_PREFSRC */
4583 + nla_total_size(4) /* RTA_TABLE */
4584 + nla_total_size(4) /* RTA_IIF */
4585 + nla_total_size(4) /* RTA_OIF */
4586 + nla_total_size(4) /* RTA_PRIORITY */
4587 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4588 + nla_total_size(sizeof(struct rta_cacheinfo))
4589 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
4590 + nla_total_size(1) /* RTA_PREF */
4591 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4595 static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
4596 unsigned int *flags, bool skip_oif)
4598 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4599 *flags |= RTNH_F_DEAD;
4601 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4602 *flags |= RTNH_F_LINKDOWN;
4605 if (fib6_ignore_linkdown(rt))
4606 *flags |= RTNH_F_DEAD;
4610 if (rt->fib6_flags & RTF_GATEWAY) {
4611 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4612 goto nla_put_failure;
4615 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4616 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
4617 *flags |= RTNH_F_OFFLOAD;
4619 /* not needed for multipath encoding b/c it has a rtnexthop struct */
4620 if (!skip_oif && rt->fib6_nh.nh_dev &&
4621 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4622 goto nla_put_failure;
4624 if (rt->fib6_nh.nh_lwtstate &&
4625 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4626 goto nla_put_failure;
4634 /* add multipath next hop */
4635 static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
4637 const struct net_device *dev = rt->fib6_nh.nh_dev;
4638 struct rtnexthop *rtnh;
4639 unsigned int flags = 0;
4641 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4643 goto nla_put_failure;
4645 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4646 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4648 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4649 goto nla_put_failure;
4651 rtnh->rtnh_flags = flags;
4653 /* length of rtnetlink header + attributes */
4654 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4662 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4663 struct fib6_info *rt, struct dst_entry *dst,
4664 struct in6_addr *dest, struct in6_addr *src,
4665 int iif, int type, u32 portid, u32 seq,
4668 struct rt6_info *rt6 = (struct rt6_info *)dst;
4669 struct rt6key *rt6_dst, *rt6_src;
4670 u32 *pmetrics, table, rt6_flags;
4671 struct nlmsghdr *nlh;
4675 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4680 rt6_dst = &rt6->rt6i_dst;
4681 rt6_src = &rt6->rt6i_src;
4682 rt6_flags = rt6->rt6i_flags;
4684 rt6_dst = &rt->fib6_dst;
4685 rt6_src = &rt->fib6_src;
4686 rt6_flags = rt->fib6_flags;
4689 rtm = nlmsg_data(nlh);
4690 rtm->rtm_family = AF_INET6;
4691 rtm->rtm_dst_len = rt6_dst->plen;
4692 rtm->rtm_src_len = rt6_src->plen;
4695 table = rt->fib6_table->tb6_id;
4697 table = RT6_TABLE_UNSPEC;
4698 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
4699 if (nla_put_u32(skb, RTA_TABLE, table))
4700 goto nla_put_failure;
4702 rtm->rtm_type = rt->fib6_type;
4704 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4705 rtm->rtm_protocol = rt->fib6_protocol;
4707 if (rt6_flags & RTF_CACHE)
4708 rtm->rtm_flags |= RTM_F_CLONED;
4711 if (nla_put_in6_addr(skb, RTA_DST, dest))
4712 goto nla_put_failure;
4713 rtm->rtm_dst_len = 128;
4714 } else if (rtm->rtm_dst_len)
4715 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4716 goto nla_put_failure;
4717 #ifdef CONFIG_IPV6_SUBTREES
4719 if (nla_put_in6_addr(skb, RTA_SRC, src))
4720 goto nla_put_failure;
4721 rtm->rtm_src_len = 128;
4722 } else if (rtm->rtm_src_len &&
4723 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4724 goto nla_put_failure;
4727 #ifdef CONFIG_IPV6_MROUTE
4728 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4729 int err = ip6mr_get_route(net, skb, rtm, portid);
4734 goto nla_put_failure;
4737 if (nla_put_u32(skb, RTA_IIF, iif))
4738 goto nla_put_failure;
4740 struct in6_addr saddr_buf;
4741 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4742 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4743 goto nla_put_failure;
4746 if (rt->fib6_prefsrc.plen) {
4747 struct in6_addr saddr_buf;
4748 saddr_buf = rt->fib6_prefsrc.addr;
4749 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4750 goto nla_put_failure;
4753 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4754 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4755 goto nla_put_failure;
4757 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4758 goto nla_put_failure;
4760 /* For multipath routes, walk the siblings list and add
4761 * each as a nexthop within RTA_MULTIPATH.
4764 if (rt6_flags & RTF_GATEWAY &&
4765 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4766 goto nla_put_failure;
4768 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4769 goto nla_put_failure;
4770 } else if (rt->fib6_nsiblings) {
4771 struct fib6_info *sibling, *next_sibling;
4774 mp = nla_nest_start(skb, RTA_MULTIPATH);
4776 goto nla_put_failure;
4778 if (rt6_add_nexthop(skb, rt) < 0)
4779 goto nla_put_failure;
4781 list_for_each_entry_safe(sibling, next_sibling,
4782 &rt->fib6_siblings, fib6_siblings) {
4783 if (rt6_add_nexthop(skb, sibling) < 0)
4784 goto nla_put_failure;
4787 nla_nest_end(skb, mp);
4789 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4790 goto nla_put_failure;
4793 if (rt6_flags & RTF_EXPIRES) {
4794 expires = dst ? dst->expires : rt->expires;
4798 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4799 goto nla_put_failure;
4801 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4802 goto nla_put_failure;
4805 nlmsg_end(skb, nlh);
4809 nlmsg_cancel(skb, nlh);
4813 static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4814 const struct net_device *dev)
4816 if (f6i->fib6_nh.nh_dev == dev)
4819 if (f6i->fib6_nsiblings) {
4820 struct fib6_info *sibling, *next_sibling;
4822 list_for_each_entry_safe(sibling, next_sibling,
4823 &f6i->fib6_siblings, fib6_siblings) {
4824 if (sibling->fib6_nh.nh_dev == dev)
4832 int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4834 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4835 struct fib_dump_filter *filter = &arg->filter;
4836 unsigned int flags = NLM_F_MULTI;
4837 struct net *net = arg->net;
4839 if (rt == net->ipv6.fib6_null_entry)
4842 if ((filter->flags & RTM_F_PREFIX) &&
4843 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4844 /* success since this is not a prefix route */
4847 if (filter->filter_set) {
4848 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4849 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4850 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
4853 flags |= NLM_F_DUMP_FILTERED;
4856 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4857 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4858 arg->cb->nlh->nlmsg_seq, flags);
4861 static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4862 const struct nlmsghdr *nlh,
4864 struct netlink_ext_ack *extack)
4869 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4870 NL_SET_ERR_MSG_MOD(extack,
4871 "Invalid header for get route request");
4875 if (!netlink_strict_get_check(skb))
4876 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
4877 rtm_ipv6_policy, extack);
4879 rtm = nlmsg_data(nlh);
4880 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4881 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4882 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4884 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4887 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4888 NL_SET_ERR_MSG_MOD(extack,
4889 "Invalid flags for get route request");
4893 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4894 rtm_ipv6_policy, extack);
4898 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4899 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4900 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4904 for (i = 0; i <= RTA_MAX; i++) {
4920 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4928 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4929 struct netlink_ext_ack *extack)
4931 struct net *net = sock_net(in_skb->sk);
4932 struct nlattr *tb[RTA_MAX+1];
4933 int err, iif = 0, oif = 0;
4934 struct fib6_info *from;
4935 struct dst_entry *dst;
4936 struct rt6_info *rt;
4937 struct sk_buff *skb;
4939 struct flowi6 fl6 = {};
4942 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
4947 rtm = nlmsg_data(nlh);
4948 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4949 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4952 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4955 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4959 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4962 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4966 iif = nla_get_u32(tb[RTA_IIF]);
4969 oif = nla_get_u32(tb[RTA_OIF]);
4972 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4975 fl6.flowi6_uid = make_kuid(current_user_ns(),
4976 nla_get_u32(tb[RTA_UID]));
4978 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4981 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4984 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4986 if (tb[RTA_IP_PROTO]) {
4987 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4988 &fl6.flowi6_proto, AF_INET6,
4995 struct net_device *dev;
5000 dev = dev_get_by_index_rcu(net, iif);
5007 fl6.flowi6_iif = iif;
5009 if (!ipv6_addr_any(&fl6.saddr))
5010 flags |= RT6_LOOKUP_F_HAS_SADDR;
5012 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
5016 fl6.flowi6_oif = oif;
5018 dst = ip6_route_output(net, NULL, &fl6);
5022 rt = container_of(dst, struct rt6_info, dst);
5023 if (rt->dst.error) {
5024 err = rt->dst.error;
5029 if (rt == net->ipv6.ip6_null_entry) {
5030 err = rt->dst.error;
5035 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5042 skb_dst_set(skb, &rt->dst);
5045 from = rcu_dereference(rt->from);
5048 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
5049 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
5052 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5053 &fl6.saddr, iif, RTM_NEWROUTE,
5054 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5063 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5068 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
5069 unsigned int nlm_flags)
5071 struct sk_buff *skb;
5072 struct net *net = info->nl_net;
5077 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
5079 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
5083 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5084 event, info->portid, seq, nlm_flags);
5086 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5087 WARN_ON(err == -EMSGSIZE);
5091 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
5092 info->nlh, gfp_any());
5096 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
5099 static int ip6_route_dev_notify(struct notifier_block *this,
5100 unsigned long event, void *ptr)
5102 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5103 struct net *net = dev_net(dev);
5105 if (!(dev->flags & IFF_LOOPBACK))
5108 if (event == NETDEV_REGISTER) {
5109 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
5110 net->ipv6.ip6_null_entry->dst.dev = dev;
5111 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5112 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5113 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
5114 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5115 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
5116 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
5118 } else if (event == NETDEV_UNREGISTER &&
5119 dev->reg_state != NETREG_UNREGISTERED) {
5120 /* NETDEV_UNREGISTER could be fired for multiple times by
5121 * netdev_wait_allrefs(). Make sure we only call this once.
5123 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5124 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5125 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5126 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5137 #ifdef CONFIG_PROC_FS
5138 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5140 struct net *net = (struct net *)seq->private;
5141 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
5142 net->ipv6.rt6_stats->fib_nodes,
5143 net->ipv6.rt6_stats->fib_route_nodes,
5144 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
5145 net->ipv6.rt6_stats->fib_rt_entries,
5146 net->ipv6.rt6_stats->fib_rt_cache,
5147 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
5148 net->ipv6.rt6_stats->fib_discarded_routes);
5152 #endif /* CONFIG_PROC_FS */
5154 #ifdef CONFIG_SYSCTL
5157 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
5158 void __user *buffer, size_t *lenp, loff_t *ppos)
5166 net = (struct net *)ctl->extra1;
5167 delay = net->ipv6.sysctl.flush_delay;
5168 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5172 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
5179 static struct ctl_table ipv6_route_table_template[] = {
5181 .procname = "flush",
5182 .data = &init_net.ipv6.sysctl.flush_delay,
5183 .maxlen = sizeof(int),
5185 .proc_handler = ipv6_sysctl_rtcache_flush
5188 .procname = "gc_thresh",
5189 .data = &ip6_dst_ops_template.gc_thresh,
5190 .maxlen = sizeof(int),
5192 .proc_handler = proc_dointvec,
5195 .procname = "max_size",
5196 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
5197 .maxlen = sizeof(int),
5199 .proc_handler = proc_dointvec,
5202 .procname = "gc_min_interval",
5203 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5204 .maxlen = sizeof(int),
5206 .proc_handler = proc_dointvec_jiffies,
5209 .procname = "gc_timeout",
5210 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5211 .maxlen = sizeof(int),
5213 .proc_handler = proc_dointvec_jiffies,
5216 .procname = "gc_interval",
5217 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
5218 .maxlen = sizeof(int),
5220 .proc_handler = proc_dointvec_jiffies,
5223 .procname = "gc_elasticity",
5224 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5225 .maxlen = sizeof(int),
5227 .proc_handler = proc_dointvec,
5230 .procname = "mtu_expires",
5231 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5232 .maxlen = sizeof(int),
5234 .proc_handler = proc_dointvec_jiffies,
5237 .procname = "min_adv_mss",
5238 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
5239 .maxlen = sizeof(int),
5241 .proc_handler = proc_dointvec,
5244 .procname = "gc_min_interval_ms",
5245 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5246 .maxlen = sizeof(int),
5248 .proc_handler = proc_dointvec_ms_jiffies,
5251 .procname = "skip_notify_on_dev_down",
5252 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5253 .maxlen = sizeof(int),
5255 .proc_handler = proc_dointvec,
5262 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5264 struct ctl_table *table;
5266 table = kmemdup(ipv6_route_table_template,
5267 sizeof(ipv6_route_table_template),
5271 table[0].data = &net->ipv6.sysctl.flush_delay;
5272 table[0].extra1 = net;
5273 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5274 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5275 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5276 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5277 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5278 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5279 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5280 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5281 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5282 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5284 /* Don't export sysctls to unprivileged users */
5285 if (net->user_ns != &init_user_ns)
5286 table[0].procname = NULL;
5293 static int __net_init ip6_route_net_init(struct net *net)
5297 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5298 sizeof(net->ipv6.ip6_dst_ops));
5300 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5301 goto out_ip6_dst_ops;
5303 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5304 sizeof(*net->ipv6.fib6_null_entry),
5306 if (!net->ipv6.fib6_null_entry)
5307 goto out_ip6_dst_entries;
5309 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5310 sizeof(*net->ipv6.ip6_null_entry),
5312 if (!net->ipv6.ip6_null_entry)
5313 goto out_fib6_null_entry;
5314 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5315 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5316 ip6_template_metrics, true);
5318 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5319 net->ipv6.fib6_has_custom_rules = false;
5320 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5321 sizeof(*net->ipv6.ip6_prohibit_entry),
5323 if (!net->ipv6.ip6_prohibit_entry)
5324 goto out_ip6_null_entry;
5325 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5326 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5327 ip6_template_metrics, true);
5329 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5330 sizeof(*net->ipv6.ip6_blk_hole_entry),
5332 if (!net->ipv6.ip6_blk_hole_entry)
5333 goto out_ip6_prohibit_entry;
5334 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5335 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5336 ip6_template_metrics, true);
5339 net->ipv6.sysctl.flush_delay = 0;
5340 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5341 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5342 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5343 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5344 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5345 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5346 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5347 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
5349 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5355 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5356 out_ip6_prohibit_entry:
5357 kfree(net->ipv6.ip6_prohibit_entry);
5359 kfree(net->ipv6.ip6_null_entry);
5361 out_fib6_null_entry:
5362 kfree(net->ipv6.fib6_null_entry);
5363 out_ip6_dst_entries:
5364 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5369 static void __net_exit ip6_route_net_exit(struct net *net)
5371 kfree(net->ipv6.fib6_null_entry);
5372 kfree(net->ipv6.ip6_null_entry);
5373 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5374 kfree(net->ipv6.ip6_prohibit_entry);
5375 kfree(net->ipv6.ip6_blk_hole_entry);
5377 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5380 static int __net_init ip6_route_net_init_late(struct net *net)
5382 #ifdef CONFIG_PROC_FS
5383 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5384 sizeof(struct ipv6_route_iter));
5385 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5386 rt6_stats_seq_show, NULL);
5391 static void __net_exit ip6_route_net_exit_late(struct net *net)
5393 #ifdef CONFIG_PROC_FS
5394 remove_proc_entry("ipv6_route", net->proc_net);
5395 remove_proc_entry("rt6_stats", net->proc_net);
5399 static struct pernet_operations ip6_route_net_ops = {
5400 .init = ip6_route_net_init,
5401 .exit = ip6_route_net_exit,
5404 static int __net_init ipv6_inetpeer_init(struct net *net)
5406 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5410 inet_peer_base_init(bp);
5411 net->ipv6.peers = bp;
5415 static void __net_exit ipv6_inetpeer_exit(struct net *net)
5417 struct inet_peer_base *bp = net->ipv6.peers;
5419 net->ipv6.peers = NULL;
5420 inetpeer_invalidate_tree(bp);
5424 static struct pernet_operations ipv6_inetpeer_ops = {
5425 .init = ipv6_inetpeer_init,
5426 .exit = ipv6_inetpeer_exit,
5429 static struct pernet_operations ip6_route_net_late_ops = {
5430 .init = ip6_route_net_init_late,
5431 .exit = ip6_route_net_exit_late,
5434 static struct notifier_block ip6_route_dev_notifier = {
5435 .notifier_call = ip6_route_dev_notify,
5436 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5439 void __init ip6_route_init_special_entries(void)
5441 /* Registering of the loopback is done before this portion of code,
5442 * the loopback reference in rt6_info will not be taken, do it
5443 * manually for init_net */
5444 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5445 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5446 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5447 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5448 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5449 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5450 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5451 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5455 int __init ip6_route_init(void)
5461 ip6_dst_ops_template.kmem_cachep =
5462 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5463 SLAB_HWCACHE_ALIGN, NULL);
5464 if (!ip6_dst_ops_template.kmem_cachep)
5467 ret = dst_entries_init(&ip6_dst_blackhole_ops);
5469 goto out_kmem_cache;
5471 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5473 goto out_dst_entries;
5475 ret = register_pernet_subsys(&ip6_route_net_ops);
5477 goto out_register_inetpeer;
5479 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5483 goto out_register_subsys;
5489 ret = fib6_rules_init();
5493 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5495 goto fib6_rules_init;
5497 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5498 inet6_rtm_newroute, NULL, 0);
5500 goto out_register_late_subsys;
5502 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5503 inet6_rtm_delroute, NULL, 0);
5505 goto out_register_late_subsys;
5507 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5508 inet6_rtm_getroute, NULL,
5509 RTNL_FLAG_DOIT_UNLOCKED);
5511 goto out_register_late_subsys;
5513 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5515 goto out_register_late_subsys;
5517 for_each_possible_cpu(cpu) {
5518 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5520 INIT_LIST_HEAD(&ul->head);
5521 spin_lock_init(&ul->lock);
5527 out_register_late_subsys:
5528 rtnl_unregister_all(PF_INET6);
5529 unregister_pernet_subsys(&ip6_route_net_late_ops);
5531 fib6_rules_cleanup();
5536 out_register_subsys:
5537 unregister_pernet_subsys(&ip6_route_net_ops);
5538 out_register_inetpeer:
5539 unregister_pernet_subsys(&ipv6_inetpeer_ops);
5541 dst_entries_destroy(&ip6_dst_blackhole_ops);
5543 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5547 void ip6_route_cleanup(void)
5549 unregister_netdevice_notifier(&ip6_route_dev_notifier);
5550 unregister_pernet_subsys(&ip6_route_net_late_ops);
5551 fib6_rules_cleanup();
5554 unregister_pernet_subsys(&ipv6_inetpeer_ops);
5555 unregister_pernet_subsys(&ip6_route_net_ops);
5556 dst_entries_destroy(&ip6_dst_blackhole_ops);
5557 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);