2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
24 #include <linux/kernel.h>
25 #include <linux/fcntl.h>
26 #include <linux/stat.h>
27 #include <linux/socket.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/init.h>
34 #include <linux/compat.h>
35 #include <linux/rhashtable.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
39 #include <linux/notifier.h>
40 #include <linux/if_arp.h>
41 #include <net/checksum.h>
42 #include <net/netlink.h>
43 #include <net/fib_rules.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 #include <linux/export.h>
52 #include <net/ip6_checksum.h>
53 #include <linux/netconf.h>
55 #include <linux/nospec.h>
58 struct fib_rule common;
65 /* Big lock, protecting vif table, mrt cache and mroute socket state.
66 Note that the changes are semaphored via rtnl_lock.
69 static DEFINE_RWLOCK(mrt_lock);
71 /* Multicast router control variables */
73 /* Special spinlock for queue of unresolved entries */
74 static DEFINE_SPINLOCK(mfc_unres_lock);
76 /* We return to original Alan's scheme. Hash table of resolved
77 entries is changed only in process context and protected
78 with weak lock mrt_lock. Queue of unresolved entries is protected
79 with strong spinlock mfc_unres_lock.
81 In this case data path is free of exclusive locks at all.
84 static struct kmem_cache *mrt_cachep __read_mostly;
86 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
87 static void ip6mr_free_table(struct mr_table *mrt);
89 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
90 struct net_device *dev, struct sk_buff *skb,
91 struct mfc6_cache *cache);
92 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
93 mifi_t mifi, int assert);
94 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
96 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
97 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
98 struct netlink_callback *cb);
99 static void mroute_clean_tables(struct mr_table *mrt, bool all);
100 static void ipmr_expire_process(struct timer_list *t);
102 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
103 #define ip6mr_for_each_table(mrt, net) \
104 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
106 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
107 struct mr_table *mrt)
109 struct mr_table *ret;
112 ret = list_entry_rcu(net->ipv6.mr6_tables.next,
113 struct mr_table, list);
115 ret = list_entry_rcu(mrt->list.next,
116 struct mr_table, list);
118 if (&ret->list == &net->ipv6.mr6_tables)
123 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
125 struct mr_table *mrt;
127 ip6mr_for_each_table(mrt, net) {
134 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
135 struct mr_table **mrt)
138 struct ip6mr_result res;
139 struct fib_lookup_arg arg = {
141 .flags = FIB_LOOKUP_NOREF,
144 /* update flow if oif or iif point to device enslaved to l3mdev */
145 l3mdev_update_flow(net, flowi6_to_flowi(flp6));
147 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
148 flowi6_to_flowi(flp6), 0, &arg);
155 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
156 int flags, struct fib_lookup_arg *arg)
158 struct ip6mr_result *res = arg->result;
159 struct mr_table *mrt;
161 switch (rule->action) {
164 case FR_ACT_UNREACHABLE:
166 case FR_ACT_PROHIBIT:
168 case FR_ACT_BLACKHOLE:
173 arg->table = fib_rule_get_table(rule, arg);
175 mrt = ip6mr_get_table(rule->fr_net, arg->table);
182 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
187 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
191 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192 struct fib_rule_hdr *frh, struct nlattr **tb,
193 struct netlink_ext_ack *extack)
198 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
204 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
205 struct fib_rule_hdr *frh)
213 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
214 .family = RTNL_FAMILY_IP6MR,
215 .rule_size = sizeof(struct ip6mr_rule),
216 .addr_size = sizeof(struct in6_addr),
217 .action = ip6mr_rule_action,
218 .match = ip6mr_rule_match,
219 .configure = ip6mr_rule_configure,
220 .compare = ip6mr_rule_compare,
221 .fill = ip6mr_rule_fill,
222 .nlgroup = RTNLGRP_IPV6_RULE,
223 .policy = ip6mr_rule_policy,
224 .owner = THIS_MODULE,
227 static int __net_init ip6mr_rules_init(struct net *net)
229 struct fib_rules_ops *ops;
230 struct mr_table *mrt;
233 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
237 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
239 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
245 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
249 net->ipv6.mr6_rules_ops = ops;
253 ip6mr_free_table(mrt);
255 fib_rules_unregister(ops);
259 static void __net_exit ip6mr_rules_exit(struct net *net)
261 struct mr_table *mrt, *next;
264 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
265 list_del(&mrt->list);
266 ip6mr_free_table(mrt);
268 fib_rules_unregister(net->ipv6.mr6_rules_ops);
272 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
274 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
277 static unsigned int ip6mr_rules_seq_read(struct net *net)
279 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
282 bool ip6mr_rule_default(const struct fib_rule *rule)
284 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
285 rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
287 EXPORT_SYMBOL(ip6mr_rule_default);
289 #define ip6mr_for_each_table(mrt, net) \
290 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
292 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
293 struct mr_table *mrt)
296 return net->ipv6.mrt6;
300 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
302 return net->ipv6.mrt6;
305 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
306 struct mr_table **mrt)
308 *mrt = net->ipv6.mrt6;
312 static int __net_init ip6mr_rules_init(struct net *net)
314 struct mr_table *mrt;
316 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
319 net->ipv6.mrt6 = mrt;
323 static void __net_exit ip6mr_rules_exit(struct net *net)
326 ip6mr_free_table(net->ipv6.mrt6);
327 net->ipv6.mrt6 = NULL;
331 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
336 static unsigned int ip6mr_rules_seq_read(struct net *net)
342 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
345 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
346 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
348 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
349 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
352 static const struct rhashtable_params ip6mr_rht_params = {
353 .head_offset = offsetof(struct mr_mfc, mnode),
354 .key_offset = offsetof(struct mfc6_cache, cmparg),
355 .key_len = sizeof(struct mfc6_cache_cmp_arg),
358 .obj_cmpfn = ip6mr_hash_cmp,
359 .automatic_shrinking = true,
362 static void ip6mr_new_table_set(struct mr_table *mrt,
365 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
366 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
370 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
371 .mf6c_origin = IN6ADDR_ANY_INIT,
372 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
375 static struct mr_table_ops ip6mr_mr_table_ops = {
376 .rht_params = &ip6mr_rht_params,
377 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
380 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
382 struct mr_table *mrt;
384 mrt = ip6mr_get_table(net, id);
388 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
389 ipmr_expire_process, ip6mr_new_table_set);
392 static void ip6mr_free_table(struct mr_table *mrt)
394 del_timer_sync(&mrt->ipmr_expire_timer);
395 mroute_clean_tables(mrt, true);
396 rhltable_destroy(&mrt->mfc_hash);
400 #ifdef CONFIG_PROC_FS
401 /* The /proc interfaces to multicast routing
402 * /proc/ip6_mr_cache /proc/ip6_mr_vif
405 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
408 struct mr_vif_iter *iter = seq->private;
409 struct net *net = seq_file_net(seq);
410 struct mr_table *mrt;
412 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
414 return ERR_PTR(-ENOENT);
418 read_lock(&mrt_lock);
419 return mr_vif_seq_start(seq, pos);
422 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
425 read_unlock(&mrt_lock);
428 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
430 struct mr_vif_iter *iter = seq->private;
431 struct mr_table *mrt = iter->mrt;
433 if (v == SEQ_START_TOKEN) {
435 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
437 const struct vif_device *vif = v;
438 const char *name = vif->dev ? vif->dev->name : "none";
441 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
442 vif - mrt->vif_table,
443 name, vif->bytes_in, vif->pkt_in,
444 vif->bytes_out, vif->pkt_out,
450 static const struct seq_operations ip6mr_vif_seq_ops = {
451 .start = ip6mr_vif_seq_start,
452 .next = mr_vif_seq_next,
453 .stop = ip6mr_vif_seq_stop,
454 .show = ip6mr_vif_seq_show,
457 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
459 struct net *net = seq_file_net(seq);
460 struct mr_table *mrt;
462 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
464 return ERR_PTR(-ENOENT);
466 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
469 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
473 if (v == SEQ_START_TOKEN) {
477 "Iif Pkts Bytes Wrong Oifs\n");
479 const struct mfc6_cache *mfc = v;
480 const struct mr_mfc_iter *it = seq->private;
481 struct mr_table *mrt = it->mrt;
483 seq_printf(seq, "%pI6 %pI6 %-3hd",
484 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
487 if (it->cache != &mrt->mfc_unres_queue) {
488 seq_printf(seq, " %8lu %8lu %8lu",
489 mfc->_c.mfc_un.res.pkt,
490 mfc->_c.mfc_un.res.bytes,
491 mfc->_c.mfc_un.res.wrong_if);
492 for (n = mfc->_c.mfc_un.res.minvif;
493 n < mfc->_c.mfc_un.res.maxvif; n++) {
494 if (VIF_EXISTS(mrt, n) &&
495 mfc->_c.mfc_un.res.ttls[n] < 255)
498 mfc->_c.mfc_un.res.ttls[n]);
501 /* unresolved mfc_caches don't contain
502 * pkt, bytes and wrong_if values
504 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
511 static const struct seq_operations ipmr_mfc_seq_ops = {
512 .start = ipmr_mfc_seq_start,
513 .next = mr_mfc_seq_next,
514 .stop = mr_mfc_seq_stop,
515 .show = ipmr_mfc_seq_show,
519 #ifdef CONFIG_IPV6_PIMSM_V2
521 static int pim6_rcv(struct sk_buff *skb)
523 struct pimreghdr *pim;
524 struct ipv6hdr *encap;
525 struct net_device *reg_dev = NULL;
526 struct net *net = dev_net(skb->dev);
527 struct mr_table *mrt;
528 struct flowi6 fl6 = {
529 .flowi6_iif = skb->dev->ifindex,
530 .flowi6_mark = skb->mark,
534 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
537 pim = (struct pimreghdr *)skb_transport_header(skb);
538 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
539 (pim->flags & PIM_NULL_REGISTER) ||
540 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
541 sizeof(*pim), IPPROTO_PIM,
542 csum_partial((void *)pim, sizeof(*pim), 0)) &&
543 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
546 /* check if the inner packet is destined to mcast group */
547 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
550 if (!ipv6_addr_is_multicast(&encap->daddr) ||
551 encap->payload_len == 0 ||
552 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
555 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
557 reg_vif_num = mrt->mroute_reg_vif_num;
559 read_lock(&mrt_lock);
560 if (reg_vif_num >= 0)
561 reg_dev = mrt->vif_table[reg_vif_num].dev;
564 read_unlock(&mrt_lock);
569 skb->mac_header = skb->network_header;
570 skb_pull(skb, (u8 *)encap - skb->data);
571 skb_reset_network_header(skb);
572 skb->protocol = htons(ETH_P_IPV6);
573 skb->ip_summed = CHECKSUM_NONE;
575 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
586 static const struct inet6_protocol pim6_protocol = {
590 /* Service routines creating virtual interfaces: PIMREG */
592 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
593 struct net_device *dev)
595 struct net *net = dev_net(dev);
596 struct mr_table *mrt;
597 struct flowi6 fl6 = {
598 .flowi6_oif = dev->ifindex,
599 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
600 .flowi6_mark = skb->mark,
604 err = ip6mr_fib_lookup(net, &fl6, &mrt);
610 read_lock(&mrt_lock);
611 dev->stats.tx_bytes += skb->len;
612 dev->stats.tx_packets++;
613 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
614 read_unlock(&mrt_lock);
619 static int reg_vif_get_iflink(const struct net_device *dev)
624 static const struct net_device_ops reg_vif_netdev_ops = {
625 .ndo_start_xmit = reg_vif_xmit,
626 .ndo_get_iflink = reg_vif_get_iflink,
629 static void reg_vif_setup(struct net_device *dev)
631 dev->type = ARPHRD_PIMREG;
632 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
633 dev->flags = IFF_NOARP;
634 dev->netdev_ops = ®_vif_netdev_ops;
635 dev->needs_free_netdev = true;
636 dev->features |= NETIF_F_NETNS_LOCAL;
639 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
641 struct net_device *dev;
644 if (mrt->id == RT6_TABLE_DFLT)
645 sprintf(name, "pim6reg");
647 sprintf(name, "pim6reg%u", mrt->id);
649 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
653 dev_net_set(dev, net);
655 if (register_netdevice(dev)) {
667 unregister_netdevice(dev);
672 static int call_ip6mr_vif_entry_notifiers(struct net *net,
673 enum fib_event_type event_type,
674 struct vif_device *vif,
675 mifi_t vif_index, u32 tb_id)
677 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
678 vif, vif_index, tb_id,
679 &net->ipv6.ipmr_seq);
682 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
683 enum fib_event_type event_type,
684 struct mfc6_cache *mfc, u32 tb_id)
686 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
687 &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
690 /* Delete a VIF entry */
691 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
692 struct list_head *head)
694 struct vif_device *v;
695 struct net_device *dev;
696 struct inet6_dev *in6_dev;
698 if (vifi < 0 || vifi >= mrt->maxvif)
699 return -EADDRNOTAVAIL;
701 v = &mrt->vif_table[vifi];
703 if (VIF_EXISTS(mrt, vifi))
704 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
705 FIB_EVENT_VIF_DEL, v, vifi,
708 write_lock_bh(&mrt_lock);
713 write_unlock_bh(&mrt_lock);
714 return -EADDRNOTAVAIL;
717 #ifdef CONFIG_IPV6_PIMSM_V2
718 if (vifi == mrt->mroute_reg_vif_num)
719 mrt->mroute_reg_vif_num = -1;
722 if (vifi + 1 == mrt->maxvif) {
724 for (tmp = vifi - 1; tmp >= 0; tmp--) {
725 if (VIF_EXISTS(mrt, tmp))
728 mrt->maxvif = tmp + 1;
731 write_unlock_bh(&mrt_lock);
733 dev_set_allmulti(dev, -1);
735 in6_dev = __in6_dev_get(dev);
737 in6_dev->cnf.mc_forwarding--;
738 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
739 NETCONFA_MC_FORWARDING,
740 dev->ifindex, &in6_dev->cnf);
743 if ((v->flags & MIFF_REGISTER) && !notify)
744 unregister_netdevice_queue(dev, head);
750 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
752 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
754 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
757 static inline void ip6mr_cache_free(struct mfc6_cache *c)
759 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
762 /* Destroy an unresolved cache entry, killing queued skbs
763 and reporting error to netlink readers.
766 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
768 struct net *net = read_pnet(&mrt->net);
771 atomic_dec(&mrt->cache_resolve_queue_len);
773 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
774 if (ipv6_hdr(skb)->version == 0) {
775 struct nlmsghdr *nlh = skb_pull(skb,
776 sizeof(struct ipv6hdr));
777 nlh->nlmsg_type = NLMSG_ERROR;
778 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
779 skb_trim(skb, nlh->nlmsg_len);
780 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
781 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
790 /* Timer process for all the unresolved queue. */
792 static void ipmr_do_expire_process(struct mr_table *mrt)
794 unsigned long now = jiffies;
795 unsigned long expires = 10 * HZ;
796 struct mr_mfc *c, *next;
798 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
799 if (time_after(c->mfc_un.unres.expires, now)) {
801 unsigned long interval = c->mfc_un.unres.expires - now;
802 if (interval < expires)
808 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
809 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
812 if (!list_empty(&mrt->mfc_unres_queue))
813 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
816 static void ipmr_expire_process(struct timer_list *t)
818 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
820 if (!spin_trylock(&mfc_unres_lock)) {
821 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
825 if (!list_empty(&mrt->mfc_unres_queue))
826 ipmr_do_expire_process(mrt);
828 spin_unlock(&mfc_unres_lock);
831 /* Fill oifs list. It is called under write locked mrt_lock. */
833 static void ip6mr_update_thresholds(struct mr_table *mrt,
834 struct mr_mfc *cache,
839 cache->mfc_un.res.minvif = MAXMIFS;
840 cache->mfc_un.res.maxvif = 0;
841 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
843 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
844 if (VIF_EXISTS(mrt, vifi) &&
845 ttls[vifi] && ttls[vifi] < 255) {
846 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
847 if (cache->mfc_un.res.minvif > vifi)
848 cache->mfc_un.res.minvif = vifi;
849 if (cache->mfc_un.res.maxvif <= vifi)
850 cache->mfc_un.res.maxvif = vifi + 1;
853 cache->mfc_un.res.lastuse = jiffies;
856 static int mif6_add(struct net *net, struct mr_table *mrt,
857 struct mif6ctl *vifc, int mrtsock)
859 int vifi = vifc->mif6c_mifi;
860 struct vif_device *v = &mrt->vif_table[vifi];
861 struct net_device *dev;
862 struct inet6_dev *in6_dev;
866 if (VIF_EXISTS(mrt, vifi))
869 switch (vifc->mif6c_flags) {
870 #ifdef CONFIG_IPV6_PIMSM_V2
873 * Special Purpose VIF in PIM
874 * All the packets will be sent to the daemon
876 if (mrt->mroute_reg_vif_num >= 0)
878 dev = ip6mr_reg_vif(net, mrt);
881 err = dev_set_allmulti(dev, 1);
883 unregister_netdevice(dev);
890 dev = dev_get_by_index(net, vifc->mif6c_pifi);
892 return -EADDRNOTAVAIL;
893 err = dev_set_allmulti(dev, 1);
903 in6_dev = __in6_dev_get(dev);
905 in6_dev->cnf.mc_forwarding++;
906 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
907 NETCONFA_MC_FORWARDING,
908 dev->ifindex, &in6_dev->cnf);
911 /* Fill in the VIF structures */
912 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
913 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
916 /* And finish update writing critical data */
917 write_lock_bh(&mrt_lock);
919 #ifdef CONFIG_IPV6_PIMSM_V2
920 if (v->flags & MIFF_REGISTER)
921 mrt->mroute_reg_vif_num = vifi;
923 if (vifi + 1 > mrt->maxvif)
924 mrt->maxvif = vifi + 1;
925 write_unlock_bh(&mrt_lock);
926 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
931 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
932 const struct in6_addr *origin,
933 const struct in6_addr *mcastgrp)
935 struct mfc6_cache_cmp_arg arg = {
936 .mf6c_origin = *origin,
937 .mf6c_mcastgrp = *mcastgrp,
940 return mr_mfc_find(mrt, &arg);
943 /* Look for a (*,G) entry */
944 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
945 struct in6_addr *mcastgrp,
948 struct mfc6_cache_cmp_arg arg = {
949 .mf6c_origin = in6addr_any,
950 .mf6c_mcastgrp = *mcastgrp,
953 if (ipv6_addr_any(mcastgrp))
954 return mr_mfc_find_any_parent(mrt, mifi);
955 return mr_mfc_find_any(mrt, mifi, &arg);
958 /* Look for a (S,G,iif) entry if parent != -1 */
959 static struct mfc6_cache *
960 ip6mr_cache_find_parent(struct mr_table *mrt,
961 const struct in6_addr *origin,
962 const struct in6_addr *mcastgrp,
965 struct mfc6_cache_cmp_arg arg = {
966 .mf6c_origin = *origin,
967 .mf6c_mcastgrp = *mcastgrp,
970 return mr_mfc_find_parent(mrt, &arg, parent);
973 /* Allocate a multicast cache entry */
974 static struct mfc6_cache *ip6mr_cache_alloc(void)
976 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
979 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
980 c->_c.mfc_un.res.minvif = MAXMIFS;
981 c->_c.free = ip6mr_cache_free_rcu;
982 refcount_set(&c->_c.mfc_un.res.refcount, 1);
986 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
988 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
991 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
992 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
997 * A cache entry has gone into a resolved state from queued
1000 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1001 struct mfc6_cache *uc, struct mfc6_cache *c)
1003 struct sk_buff *skb;
1006 * Play the pending entries through our router
1009 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1010 if (ipv6_hdr(skb)->version == 0) {
1011 struct nlmsghdr *nlh = skb_pull(skb,
1012 sizeof(struct ipv6hdr));
1014 if (mr_fill_mroute(mrt, skb, &c->_c,
1015 nlmsg_data(nlh)) > 0) {
1016 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1018 nlh->nlmsg_type = NLMSG_ERROR;
1019 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1020 skb_trim(skb, nlh->nlmsg_len);
1021 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1023 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1025 ip6_mr_forward(net, mrt, skb->dev, skb, c);
1030 * Bounce a cache query up to pim6sd and netlink.
1032 * Called under mrt_lock.
1035 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1036 mifi_t mifi, int assert)
1038 struct sock *mroute6_sk;
1039 struct sk_buff *skb;
1040 struct mrt6msg *msg;
1043 #ifdef CONFIG_IPV6_PIMSM_V2
1044 if (assert == MRT6MSG_WHOLEPKT)
1045 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1049 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1054 /* I suppose that internal messages
1055 * do not require checksums */
1057 skb->ip_summed = CHECKSUM_UNNECESSARY;
1059 #ifdef CONFIG_IPV6_PIMSM_V2
1060 if (assert == MRT6MSG_WHOLEPKT) {
1061 /* Ugly, but we have no choice with this interface.
1062 Duplicate old header, fix length etc.
1063 And all this only to mangle msg->im6_msgtype and
1064 to set msg->im6_mbz to "mbz" :-)
1066 skb_push(skb, -skb_network_offset(pkt));
1068 skb_push(skb, sizeof(*msg));
1069 skb_reset_transport_header(skb);
1070 msg = (struct mrt6msg *)skb_transport_header(skb);
1072 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1073 msg->im6_mif = mrt->mroute_reg_vif_num;
1075 msg->im6_src = ipv6_hdr(pkt)->saddr;
1076 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1078 skb->ip_summed = CHECKSUM_UNNECESSARY;
1083 * Copy the IP header
1086 skb_put(skb, sizeof(struct ipv6hdr));
1087 skb_reset_network_header(skb);
1088 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1093 skb_put(skb, sizeof(*msg));
1094 skb_reset_transport_header(skb);
1095 msg = (struct mrt6msg *)skb_transport_header(skb);
1098 msg->im6_msgtype = assert;
1099 msg->im6_mif = mifi;
1101 msg->im6_src = ipv6_hdr(pkt)->saddr;
1102 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1104 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1105 skb->ip_summed = CHECKSUM_UNNECESSARY;
1109 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1116 mrt6msg_netlink_event(mrt, skb);
1118 /* Deliver to user space multicast routing algorithms */
1119 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1122 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1129 /* Queue a packet for resolution. It gets locked cache entry! */
1130 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1131 struct sk_buff *skb, struct net_device *dev)
1133 struct mfc6_cache *c;
1137 spin_lock_bh(&mfc_unres_lock);
1138 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1139 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1140 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1148 * Create a new entry if allowable
1151 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1152 (c = ip6mr_cache_alloc_unres()) == NULL) {
1153 spin_unlock_bh(&mfc_unres_lock);
1159 /* Fill in the new cache entry */
1160 c->_c.mfc_parent = -1;
1161 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1162 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1165 * Reflect first query at pim6sd
1167 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1169 /* If the report failed throw the cache entry
1172 spin_unlock_bh(&mfc_unres_lock);
1174 ip6mr_cache_free(c);
1179 atomic_inc(&mrt->cache_resolve_queue_len);
1180 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1181 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1183 ipmr_do_expire_process(mrt);
1186 /* See if we can append the packet */
1187 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1193 skb->skb_iif = dev->ifindex;
1195 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1199 spin_unlock_bh(&mfc_unres_lock);
1204 * MFC6 cache manipulation by user space
1207 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1210 struct mfc6_cache *c;
1212 /* The entries are added/deleted only under RTNL */
1214 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1215 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1219 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1220 list_del_rcu(&c->_c.list);
1222 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1223 FIB_EVENT_ENTRY_DEL, c, mrt->id);
1224 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1225 mr_cache_put(&c->_c);
1229 static int ip6mr_device_event(struct notifier_block *this,
1230 unsigned long event, void *ptr)
1232 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1233 struct net *net = dev_net(dev);
1234 struct mr_table *mrt;
1235 struct vif_device *v;
1238 if (event != NETDEV_UNREGISTER)
1241 ip6mr_for_each_table(mrt, net) {
1242 v = &mrt->vif_table[0];
1243 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1245 mif6_delete(mrt, ct, 1, NULL);
1252 static unsigned int ip6mr_seq_read(struct net *net)
1256 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1259 static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1261 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1262 ip6mr_mr_table_iter, &mrt_lock);
1265 static struct notifier_block ip6_mr_notifier = {
1266 .notifier_call = ip6mr_device_event
1269 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1270 .family = RTNL_FAMILY_IP6MR,
1271 .fib_seq_read = ip6mr_seq_read,
1272 .fib_dump = ip6mr_dump,
1273 .owner = THIS_MODULE,
1276 static int __net_init ip6mr_notifier_init(struct net *net)
1278 struct fib_notifier_ops *ops;
1280 net->ipv6.ipmr_seq = 0;
1282 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1284 return PTR_ERR(ops);
1286 net->ipv6.ip6mr_notifier_ops = ops;
1291 static void __net_exit ip6mr_notifier_exit(struct net *net)
1293 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1294 net->ipv6.ip6mr_notifier_ops = NULL;
1297 /* Setup for IP multicast routing */
1298 static int __net_init ip6mr_net_init(struct net *net)
1302 err = ip6mr_notifier_init(net);
1306 err = ip6mr_rules_init(net);
1308 goto ip6mr_rules_fail;
1310 #ifdef CONFIG_PROC_FS
1312 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1313 sizeof(struct mr_vif_iter)))
1315 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1316 sizeof(struct mr_mfc_iter)))
1317 goto proc_cache_fail;
1322 #ifdef CONFIG_PROC_FS
1324 remove_proc_entry("ip6_mr_vif", net->proc_net);
1326 ip6mr_rules_exit(net);
1329 ip6mr_notifier_exit(net);
1333 static void __net_exit ip6mr_net_exit(struct net *net)
1335 #ifdef CONFIG_PROC_FS
1336 remove_proc_entry("ip6_mr_cache", net->proc_net);
1337 remove_proc_entry("ip6_mr_vif", net->proc_net);
1339 ip6mr_rules_exit(net);
1340 ip6mr_notifier_exit(net);
1343 static struct pernet_operations ip6mr_net_ops = {
1344 .init = ip6mr_net_init,
1345 .exit = ip6mr_net_exit,
1348 int __init ip6_mr_init(void)
1352 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1353 sizeof(struct mfc6_cache),
1354 0, SLAB_HWCACHE_ALIGN,
1359 err = register_pernet_subsys(&ip6mr_net_ops);
1361 goto reg_pernet_fail;
1363 err = register_netdevice_notifier(&ip6_mr_notifier);
1365 goto reg_notif_fail;
1366 #ifdef CONFIG_IPV6_PIMSM_V2
1367 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1368 pr_err("%s: can't add PIM protocol\n", __func__);
1370 goto add_proto_fail;
1373 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1374 NULL, ip6mr_rtm_dumproute, 0);
1378 #ifdef CONFIG_IPV6_PIMSM_V2
1379 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1381 unregister_netdevice_notifier(&ip6_mr_notifier);
1384 unregister_pernet_subsys(&ip6mr_net_ops);
1386 kmem_cache_destroy(mrt_cachep);
1390 void ip6_mr_cleanup(void)
1392 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1393 #ifdef CONFIG_IPV6_PIMSM_V2
1394 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1396 unregister_netdevice_notifier(&ip6_mr_notifier);
1397 unregister_pernet_subsys(&ip6mr_net_ops);
1398 kmem_cache_destroy(mrt_cachep);
1401 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1402 struct mf6cctl *mfc, int mrtsock, int parent)
1404 unsigned char ttls[MAXMIFS];
1405 struct mfc6_cache *uc, *c;
1410 if (mfc->mf6cc_parent >= MAXMIFS)
1413 memset(ttls, 255, MAXMIFS);
1414 for (i = 0; i < MAXMIFS; i++) {
1415 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1419 /* The entries are added/deleted only under RTNL */
1421 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1422 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1425 write_lock_bh(&mrt_lock);
1426 c->_c.mfc_parent = mfc->mf6cc_parent;
1427 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1429 c->_c.mfc_flags |= MFC_STATIC;
1430 write_unlock_bh(&mrt_lock);
1431 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1433 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1437 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1438 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1441 c = ip6mr_cache_alloc();
1445 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1446 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1447 c->_c.mfc_parent = mfc->mf6cc_parent;
1448 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1450 c->_c.mfc_flags |= MFC_STATIC;
1452 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1455 pr_err("ip6mr: rhtable insert error %d\n", err);
1456 ip6mr_cache_free(c);
1459 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1461 /* Check to see if we resolved a queued list. If so we
1462 * need to send on the frames and tidy up.
1465 spin_lock_bh(&mfc_unres_lock);
1466 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1467 uc = (struct mfc6_cache *)_uc;
1468 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1469 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1470 list_del(&_uc->list);
1471 atomic_dec(&mrt->cache_resolve_queue_len);
1476 if (list_empty(&mrt->mfc_unres_queue))
1477 del_timer(&mrt->ipmr_expire_timer);
1478 spin_unlock_bh(&mfc_unres_lock);
1481 ip6mr_cache_resolve(net, mrt, uc, c);
1482 ip6mr_cache_free(uc);
1484 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1486 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1491 * Close the multicast socket, and clear the vif tables etc
1494 static void mroute_clean_tables(struct mr_table *mrt, bool all)
1496 struct mr_mfc *c, *tmp;
1500 /* Shut down all active vif entries */
1501 for (i = 0; i < mrt->maxvif; i++) {
1502 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1504 mif6_delete(mrt, i, 0, &list);
1506 unregister_netdevice_many(&list);
1508 /* Wipe the cache */
1509 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1510 if (!all && (c->mfc_flags & MFC_STATIC))
1512 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1513 list_del_rcu(&c->list);
1514 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1518 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1519 spin_lock_bh(&mfc_unres_lock);
1520 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1522 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1523 FIB_EVENT_ENTRY_DEL,
1524 (struct mfc6_cache *)c,
1526 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1528 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1530 spin_unlock_bh(&mfc_unres_lock);
1534 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1537 struct net *net = sock_net(sk);
1540 write_lock_bh(&mrt_lock);
1541 if (rtnl_dereference(mrt->mroute_sk)) {
1544 rcu_assign_pointer(mrt->mroute_sk, sk);
1545 sock_set_flag(sk, SOCK_RCU_FREE);
1546 net->ipv6.devconf_all->mc_forwarding++;
1548 write_unlock_bh(&mrt_lock);
1551 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1552 NETCONFA_MC_FORWARDING,
1553 NETCONFA_IFINDEX_ALL,
1554 net->ipv6.devconf_all);
1560 int ip6mr_sk_done(struct sock *sk)
1563 struct net *net = sock_net(sk);
1564 struct mr_table *mrt;
1566 if (sk->sk_type != SOCK_RAW ||
1567 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1571 ip6mr_for_each_table(mrt, net) {
1572 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1573 write_lock_bh(&mrt_lock);
1574 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1575 /* Note that mroute_sk had SOCK_RCU_FREE set,
1576 * so the RCU grace period before sk freeing
1577 * is guaranteed by sk_destruct()
1579 net->ipv6.devconf_all->mc_forwarding--;
1580 write_unlock_bh(&mrt_lock);
1581 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1582 NETCONFA_MC_FORWARDING,
1583 NETCONFA_IFINDEX_ALL,
1584 net->ipv6.devconf_all);
1586 mroute_clean_tables(mrt, false);
1596 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1598 struct mr_table *mrt;
1599 struct flowi6 fl6 = {
1600 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1601 .flowi6_oif = skb->dev->ifindex,
1602 .flowi6_mark = skb->mark,
1605 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1608 return rcu_access_pointer(mrt->mroute_sk);
1610 EXPORT_SYMBOL(mroute6_is_socket);
1613 * Socket options and virtual interface manipulation. The whole
1614 * virtual interface system is a complete heap, but unfortunately
1615 * that's how BSD mrouted happens to think. Maybe one day with a proper
1616 * MOSPF/PIM router set up we can clean this up.
1619 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1621 int ret, parent = 0;
1625 struct net *net = sock_net(sk);
1626 struct mr_table *mrt;
1628 if (sk->sk_type != SOCK_RAW ||
1629 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1632 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1636 if (optname != MRT6_INIT) {
1637 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1638 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1644 if (optlen < sizeof(int))
1647 return ip6mr_sk_init(mrt, sk);
1650 return ip6mr_sk_done(sk);
1653 if (optlen < sizeof(vif))
1655 if (copy_from_user(&vif, optval, sizeof(vif)))
1657 if (vif.mif6c_mifi >= MAXMIFS)
1660 ret = mif6_add(net, mrt, &vif,
1661 sk == rtnl_dereference(mrt->mroute_sk));
1666 if (optlen < sizeof(mifi_t))
1668 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1671 ret = mif6_delete(mrt, mifi, 0, NULL);
1676 * Manipulate the forwarding caches. These live
1677 * in a sort of kernel/user symbiosis.
1683 case MRT6_ADD_MFC_PROXY:
1684 case MRT6_DEL_MFC_PROXY:
1685 if (optlen < sizeof(mfc))
1687 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1690 parent = mfc.mf6cc_parent;
1692 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1693 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1695 ret = ip6mr_mfc_add(net, mrt, &mfc,
1697 rtnl_dereference(mrt->mroute_sk),
1703 * Control PIM assert (to activate pim will activate assert)
1709 if (optlen != sizeof(v))
1711 if (get_user(v, (int __user *)optval))
1713 mrt->mroute_do_assert = v;
1717 #ifdef CONFIG_IPV6_PIMSM_V2
1722 if (optlen != sizeof(v))
1724 if (get_user(v, (int __user *)optval))
1729 if (v != mrt->mroute_do_pim) {
1730 mrt->mroute_do_pim = v;
1731 mrt->mroute_do_assert = v;
1738 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1743 if (optlen != sizeof(u32))
1745 if (get_user(v, (u32 __user *)optval))
1747 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1748 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1750 if (sk == rcu_access_pointer(mrt->mroute_sk))
1755 mrt = ip6mr_new_table(net, v);
1759 raw6_sk(sk)->ip6mr_table = v;
1765 * Spurious command, or MRT6_VERSION which you cannot
1769 return -ENOPROTOOPT;
1774 * Getsock opt support for the multicast routing system.
1777 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1782 struct net *net = sock_net(sk);
1783 struct mr_table *mrt;
1785 if (sk->sk_type != SOCK_RAW ||
1786 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1789 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1797 #ifdef CONFIG_IPV6_PIMSM_V2
1799 val = mrt->mroute_do_pim;
1803 val = mrt->mroute_do_assert;
1806 return -ENOPROTOOPT;
1809 if (get_user(olr, optlen))
1812 olr = min_t(int, olr, sizeof(int));
1816 if (put_user(olr, optlen))
1818 if (copy_to_user(optval, &val, olr))
1824 * The IP multicast ioctl support routines.
1827 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1829 struct sioc_sg_req6 sr;
1830 struct sioc_mif_req6 vr;
1831 struct vif_device *vif;
1832 struct mfc6_cache *c;
1833 struct net *net = sock_net(sk);
1834 struct mr_table *mrt;
1836 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1841 case SIOCGETMIFCNT_IN6:
1842 if (copy_from_user(&vr, arg, sizeof(vr)))
1844 if (vr.mifi >= mrt->maxvif)
1846 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1847 read_lock(&mrt_lock);
1848 vif = &mrt->vif_table[vr.mifi];
1849 if (VIF_EXISTS(mrt, vr.mifi)) {
1850 vr.icount = vif->pkt_in;
1851 vr.ocount = vif->pkt_out;
1852 vr.ibytes = vif->bytes_in;
1853 vr.obytes = vif->bytes_out;
1854 read_unlock(&mrt_lock);
1856 if (copy_to_user(arg, &vr, sizeof(vr)))
1860 read_unlock(&mrt_lock);
1861 return -EADDRNOTAVAIL;
1862 case SIOCGETSGCNT_IN6:
1863 if (copy_from_user(&sr, arg, sizeof(sr)))
1867 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1869 sr.pktcnt = c->_c.mfc_un.res.pkt;
1870 sr.bytecnt = c->_c.mfc_un.res.bytes;
1871 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1874 if (copy_to_user(arg, &sr, sizeof(sr)))
1879 return -EADDRNOTAVAIL;
1881 return -ENOIOCTLCMD;
1885 #ifdef CONFIG_COMPAT
1886 struct compat_sioc_sg_req6 {
1887 struct sockaddr_in6 src;
1888 struct sockaddr_in6 grp;
1889 compat_ulong_t pktcnt;
1890 compat_ulong_t bytecnt;
1891 compat_ulong_t wrong_if;
1894 struct compat_sioc_mif_req6 {
1896 compat_ulong_t icount;
1897 compat_ulong_t ocount;
1898 compat_ulong_t ibytes;
1899 compat_ulong_t obytes;
1902 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1904 struct compat_sioc_sg_req6 sr;
1905 struct compat_sioc_mif_req6 vr;
1906 struct vif_device *vif;
1907 struct mfc6_cache *c;
1908 struct net *net = sock_net(sk);
1909 struct mr_table *mrt;
1911 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1916 case SIOCGETMIFCNT_IN6:
1917 if (copy_from_user(&vr, arg, sizeof(vr)))
1919 if (vr.mifi >= mrt->maxvif)
1921 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1922 read_lock(&mrt_lock);
1923 vif = &mrt->vif_table[vr.mifi];
1924 if (VIF_EXISTS(mrt, vr.mifi)) {
1925 vr.icount = vif->pkt_in;
1926 vr.ocount = vif->pkt_out;
1927 vr.ibytes = vif->bytes_in;
1928 vr.obytes = vif->bytes_out;
1929 read_unlock(&mrt_lock);
1931 if (copy_to_user(arg, &vr, sizeof(vr)))
1935 read_unlock(&mrt_lock);
1936 return -EADDRNOTAVAIL;
1937 case SIOCGETSGCNT_IN6:
1938 if (copy_from_user(&sr, arg, sizeof(sr)))
1942 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1944 sr.pktcnt = c->_c.mfc_un.res.pkt;
1945 sr.bytecnt = c->_c.mfc_un.res.bytes;
1946 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1949 if (copy_to_user(arg, &sr, sizeof(sr)))
1954 return -EADDRNOTAVAIL;
1956 return -ENOIOCTLCMD;
1961 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1963 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1964 IPSTATS_MIB_OUTFORWDATAGRAMS);
1965 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1966 IPSTATS_MIB_OUTOCTETS, skb->len);
1967 return dst_output(net, sk, skb);
1971 * Processing handlers for ip6mr_forward
1974 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1975 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1977 struct ipv6hdr *ipv6h;
1978 struct vif_device *vif = &mrt->vif_table[vifi];
1979 struct net_device *dev;
1980 struct dst_entry *dst;
1986 #ifdef CONFIG_IPV6_PIMSM_V2
1987 if (vif->flags & MIFF_REGISTER) {
1989 vif->bytes_out += skb->len;
1990 vif->dev->stats.tx_bytes += skb->len;
1991 vif->dev->stats.tx_packets++;
1992 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1997 ipv6h = ipv6_hdr(skb);
1999 fl6 = (struct flowi6) {
2000 .flowi6_oif = vif->link,
2001 .daddr = ipv6h->daddr,
2004 dst = ip6_route_output(net, NULL, &fl6);
2011 skb_dst_set(skb, dst);
2014 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2015 * not only before forwarding, but after forwarding on all output
2016 * interfaces. It is clear, if mrouter runs a multicasting
2017 * program, it should receive packets not depending to what interface
2018 * program is joined.
2019 * If we will not make it, the program will have to join on all
2020 * interfaces. On the other hand, multihoming host (or router, but
2021 * not mrouter) cannot join to more than one interface - it will
2022 * result in receiving multiple packets.
2027 vif->bytes_out += skb->len;
2029 /* We are about to write */
2030 /* XXX: extension headers? */
2031 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2034 ipv6h = ipv6_hdr(skb);
2037 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2039 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2040 net, NULL, skb, skb->dev, dev,
2041 ip6mr_forward2_finish);
2048 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2052 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2053 if (mrt->vif_table[ct].dev == dev)
2059 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2060 struct net_device *dev, struct sk_buff *skb,
2061 struct mfc6_cache *c)
2065 int true_vifi = ip6mr_find_vif(mrt, dev);
2067 vif = c->_c.mfc_parent;
2068 c->_c.mfc_un.res.pkt++;
2069 c->_c.mfc_un.res.bytes += skb->len;
2070 c->_c.mfc_un.res.lastuse = jiffies;
2072 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2073 struct mfc6_cache *cache_proxy;
2075 /* For an (*,G) entry, we only check that the incoming
2076 * interface is part of the static tree.
2079 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2081 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2089 * Wrong interface: drop packet and (maybe) send PIM assert.
2091 if (mrt->vif_table[vif].dev != dev) {
2092 c->_c.mfc_un.res.wrong_if++;
2094 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2095 /* pimsm uses asserts, when switching from RPT to SPT,
2096 so that we cannot check that packet arrived on an oif.
2097 It is bad, but otherwise we would need to move pretty
2098 large chunk of pimd to kernel. Ough... --ANK
2100 (mrt->mroute_do_pim ||
2101 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2103 c->_c.mfc_un.res.last_assert +
2104 MFC_ASSERT_THRESH)) {
2105 c->_c.mfc_un.res.last_assert = jiffies;
2106 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2112 mrt->vif_table[vif].pkt_in++;
2113 mrt->vif_table[vif].bytes_in += skb->len;
2118 if (ipv6_addr_any(&c->mf6c_origin) &&
2119 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2120 if (true_vifi >= 0 &&
2121 true_vifi != c->_c.mfc_parent &&
2122 ipv6_hdr(skb)->hop_limit >
2123 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2124 /* It's an (*,*) entry and the packet is not coming from
2125 * the upstream: forward the packet to the upstream
2128 psend = c->_c.mfc_parent;
2133 for (ct = c->_c.mfc_un.res.maxvif - 1;
2134 ct >= c->_c.mfc_un.res.minvif; ct--) {
2135 /* For (*,G) entry, don't forward to the incoming interface */
2136 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2137 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2139 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2141 ip6mr_forward2(net, mrt, skb2,
2149 ip6mr_forward2(net, mrt, skb, c, psend);
2159 * Multicast packets for forwarding arrive here
2162 int ip6_mr_input(struct sk_buff *skb)
2164 struct mfc6_cache *cache;
2165 struct net *net = dev_net(skb->dev);
2166 struct mr_table *mrt;
2167 struct flowi6 fl6 = {
2168 .flowi6_iif = skb->dev->ifindex,
2169 .flowi6_mark = skb->mark,
2172 struct net_device *dev;
2174 /* skb->dev passed in is the master dev for vrfs.
2175 * Get the proper interface that does have a vif associated with it.
2178 if (netif_is_l3_master(skb->dev)) {
2179 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2186 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2192 read_lock(&mrt_lock);
2193 cache = ip6mr_cache_find(mrt,
2194 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2196 int vif = ip6mr_find_vif(mrt, dev);
2199 cache = ip6mr_cache_find_any(mrt,
2200 &ipv6_hdr(skb)->daddr,
2205 * No usable cache entry
2210 vif = ip6mr_find_vif(mrt, dev);
2212 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2213 read_unlock(&mrt_lock);
2217 read_unlock(&mrt_lock);
2222 ip6_mr_forward(net, mrt, dev, skb, cache);
2224 read_unlock(&mrt_lock);
2229 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2233 struct mr_table *mrt;
2234 struct mfc6_cache *cache;
2235 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2237 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2241 read_lock(&mrt_lock);
2242 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2243 if (!cache && skb->dev) {
2244 int vif = ip6mr_find_vif(mrt, skb->dev);
2247 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2252 struct sk_buff *skb2;
2253 struct ipv6hdr *iph;
2254 struct net_device *dev;
2258 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2259 read_unlock(&mrt_lock);
2263 /* really correct? */
2264 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2266 read_unlock(&mrt_lock);
2270 NETLINK_CB(skb2).portid = portid;
2271 skb_reset_transport_header(skb2);
2273 skb_put(skb2, sizeof(struct ipv6hdr));
2274 skb_reset_network_header(skb2);
2276 iph = ipv6_hdr(skb2);
2279 iph->flow_lbl[0] = 0;
2280 iph->flow_lbl[1] = 0;
2281 iph->flow_lbl[2] = 0;
2282 iph->payload_len = 0;
2283 iph->nexthdr = IPPROTO_NONE;
2285 iph->saddr = rt->rt6i_src.addr;
2286 iph->daddr = rt->rt6i_dst.addr;
2288 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2289 read_unlock(&mrt_lock);
2294 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2295 read_unlock(&mrt_lock);
2299 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2300 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2303 struct nlmsghdr *nlh;
2307 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2311 rtm = nlmsg_data(nlh);
2312 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2313 rtm->rtm_dst_len = 128;
2314 rtm->rtm_src_len = 128;
2316 rtm->rtm_table = mrt->id;
2317 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2318 goto nla_put_failure;
2319 rtm->rtm_type = RTN_MULTICAST;
2320 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2321 if (c->_c.mfc_flags & MFC_STATIC)
2322 rtm->rtm_protocol = RTPROT_STATIC;
2324 rtm->rtm_protocol = RTPROT_MROUTED;
2327 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2328 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2329 goto nla_put_failure;
2330 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2331 /* do not break the dump if cache is unresolved */
2332 if (err < 0 && err != -ENOENT)
2333 goto nla_put_failure;
2335 nlmsg_end(skb, nlh);
2339 nlmsg_cancel(skb, nlh);
2343 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2344 u32 portid, u32 seq, struct mr_mfc *c,
2347 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2351 static int mr6_msgsize(bool unresolved, int maxvif)
2354 NLMSG_ALIGN(sizeof(struct rtmsg))
2355 + nla_total_size(4) /* RTA_TABLE */
2356 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2357 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2362 + nla_total_size(4) /* RTA_IIF */
2363 + nla_total_size(0) /* RTA_MULTIPATH */
2364 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2366 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2372 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2375 struct net *net = read_pnet(&mrt->net);
2376 struct sk_buff *skb;
2379 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2384 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2388 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2394 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2397 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2400 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2401 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2402 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2403 /* IP6MRA_CREPORT_SRC_ADDR */
2404 + nla_total_size(sizeof(struct in6_addr))
2405 /* IP6MRA_CREPORT_DST_ADDR */
2406 + nla_total_size(sizeof(struct in6_addr))
2407 /* IP6MRA_CREPORT_PKT */
2408 + nla_total_size(payloadlen)
2414 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2416 struct net *net = read_pnet(&mrt->net);
2417 struct nlmsghdr *nlh;
2418 struct rtgenmsg *rtgenm;
2419 struct mrt6msg *msg;
2420 struct sk_buff *skb;
2424 payloadlen = pkt->len - sizeof(struct mrt6msg);
2425 msg = (struct mrt6msg *)skb_transport_header(pkt);
2427 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2431 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2432 sizeof(struct rtgenmsg), 0);
2435 rtgenm = nlmsg_data(nlh);
2436 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2437 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2438 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2439 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2441 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2443 goto nla_put_failure;
2445 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2446 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2447 nla_data(nla), payloadlen))
2448 goto nla_put_failure;
2450 nlmsg_end(skb, nlh);
2452 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2456 nlmsg_cancel(skb, nlh);
2459 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2462 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2464 const struct nlmsghdr *nlh = cb->nlh;
2465 struct fib_dump_filter filter = {};
2468 if (cb->strict_check) {
2469 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2475 if (filter.table_id) {
2476 struct mr_table *mrt;
2478 mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2480 if (filter.dump_all_families)
2483 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2486 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2487 &mfc_unres_lock, &filter);
2488 return skb->len ? : err;
2491 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2492 _ip6mr_fill_mroute, &mfc_unres_lock, &filter);