2 * Generic address resolution entity
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
29 #include <linux/sysctl.h>
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
36 #include <net/netevent.h>
37 #include <net/netlink.h>
38 #include <linux/rtnetlink.h>
39 #include <linux/random.h>
40 #include <linux/string.h>
41 #include <linux/log2.h>
42 #include <linux/inetdevice.h>
43 #include <net/addrconf.h>
45 #include <trace/events/neigh.h>
49 #define neigh_dbg(level, fmt, ...) \
51 if (level <= NEIGH_DEBUG) \
52 pr_debug(fmt, ##__VA_ARGS__); \
55 #define PNEIGH_HASHMASK 0xF
57 static void neigh_timer_handler(struct timer_list *t);
58 static void __neigh_notify(struct neighbour *n, int type, int flags,
60 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
61 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
62 struct net_device *dev);
65 static const struct seq_operations neigh_stat_seq_ops;
69 Neighbour hash table buckets are protected with rwlock tbl->lock.
71 - All the scans/updates to hash buckets MUST be made under this lock.
72 - NOTHING clever should be made under this lock: no callbacks
73 to protocol backends, no attempts to send something to network.
74 It will result in deadlocks, if backend/driver wants to use neighbour
76 - If the entry requires some non-trivial actions, increase
77 its reference count and release table lock.
79 Neighbour entries are protected:
80 - with reference count.
81 - with rwlock neigh->lock
83 Reference count prevents destruction.
85 neigh->lock mainly serializes ll address data and its validity state.
86 However, the same lock is used to protect another entry fields:
90 Again, nothing clever shall be made under neigh->lock,
91 the most complicated procedure, which we allow is dev->hard_header.
92 It is supposed, that dev->hard_header is simplistic and does
93 not make callbacks to neighbour tables.
96 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 static void neigh_cleanup_and_release(struct neighbour *neigh)
104 if (neigh->parms->neigh_cleanup)
105 neigh->parms->neigh_cleanup(neigh);
107 trace_neigh_cleanup_and_release(neigh, 0);
108 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
109 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
110 neigh_release(neigh);
114 * It is random distribution in the interval (1/2)*base...(3/2)*base.
115 * It corresponds to default IPv6 settings and is not overridable,
116 * because it is really reasonable choice.
119 unsigned long neigh_rand_reach_time(unsigned long base)
121 return base ? (prandom_u32() % base) + (base >> 1) : 0;
123 EXPORT_SYMBOL(neigh_rand_reach_time);
125 static void neigh_mark_dead(struct neighbour *n)
128 if (!list_empty(&n->gc_list)) {
129 list_del_init(&n->gc_list);
130 atomic_dec(&n->tbl->gc_entries);
134 static void neigh_update_gc_list(struct neighbour *n)
136 bool on_gc_list, exempt_from_gc;
138 write_lock_bh(&n->tbl->lock);
139 write_lock(&n->lock);
141 /* remove from the gc list if new state is permanent or if neighbor
142 * is externally learned; otherwise entry should be on the gc list
144 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
145 n->flags & NTF_EXT_LEARNED;
146 on_gc_list = !list_empty(&n->gc_list);
148 if (exempt_from_gc && on_gc_list) {
149 list_del_init(&n->gc_list);
150 atomic_dec(&n->tbl->gc_entries);
151 } else if (!exempt_from_gc && !on_gc_list) {
152 /* add entries to the tail; cleaning removes from the front */
153 list_add_tail(&n->gc_list, &n->tbl->gc_list);
154 atomic_inc(&n->tbl->gc_entries);
157 write_unlock(&n->lock);
158 write_unlock_bh(&n->tbl->lock);
161 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
167 if (!(flags & NEIGH_UPDATE_F_ADMIN))
170 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
171 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
172 if (ndm_flags & NTF_EXT_LEARNED)
173 neigh->flags |= NTF_EXT_LEARNED;
175 neigh->flags &= ~NTF_EXT_LEARNED;
183 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
184 struct neigh_table *tbl)
188 write_lock(&n->lock);
189 if (refcount_read(&n->refcnt) == 1) {
190 struct neighbour *neigh;
192 neigh = rcu_dereference_protected(n->next,
193 lockdep_is_held(&tbl->lock));
194 rcu_assign_pointer(*np, neigh);
198 write_unlock(&n->lock);
200 neigh_cleanup_and_release(n);
204 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
206 struct neigh_hash_table *nht;
207 void *pkey = ndel->primary_key;
210 struct neighbour __rcu **np;
212 nht = rcu_dereference_protected(tbl->nht,
213 lockdep_is_held(&tbl->lock));
214 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
215 hash_val = hash_val >> (32 - nht->hash_shift);
217 np = &nht->hash_buckets[hash_val];
218 while ((n = rcu_dereference_protected(*np,
219 lockdep_is_held(&tbl->lock)))) {
221 return neigh_del(n, np, tbl);
227 static int neigh_forced_gc(struct neigh_table *tbl)
229 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
230 unsigned long tref = jiffies - 5 * HZ;
231 struct neighbour *n, *tmp;
234 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
236 write_lock_bh(&tbl->lock);
238 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
239 if (refcount_read(&n->refcnt) == 1) {
242 write_lock(&n->lock);
243 if ((n->nud_state == NUD_FAILED) ||
244 time_after(tref, n->updated))
246 write_unlock(&n->lock);
248 if (remove && neigh_remove_one(n, tbl))
250 if (shrunk >= max_clean)
255 tbl->last_flush = jiffies;
257 write_unlock_bh(&tbl->lock);
262 static void neigh_add_timer(struct neighbour *n, unsigned long when)
265 if (unlikely(mod_timer(&n->timer, when))) {
266 printk("NEIGH: BUG, double timer add, state is %x\n",
272 static int neigh_del_timer(struct neighbour *n)
274 if ((n->nud_state & NUD_IN_TIMER) &&
275 del_timer(&n->timer)) {
282 static void pneigh_queue_purge(struct sk_buff_head *list)
286 while ((skb = skb_dequeue(list)) != NULL) {
292 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
296 struct neigh_hash_table *nht;
298 nht = rcu_dereference_protected(tbl->nht,
299 lockdep_is_held(&tbl->lock));
301 for (i = 0; i < (1 << nht->hash_shift); i++) {
303 struct neighbour __rcu **np = &nht->hash_buckets[i];
305 while ((n = rcu_dereference_protected(*np,
306 lockdep_is_held(&tbl->lock))) != NULL) {
307 if (dev && n->dev != dev) {
311 if (skip_perm && n->nud_state & NUD_PERMANENT) {
315 rcu_assign_pointer(*np,
316 rcu_dereference_protected(n->next,
317 lockdep_is_held(&tbl->lock)));
318 write_lock(&n->lock);
321 if (refcount_read(&n->refcnt) != 1) {
322 /* The most unpleasant situation.
323 We must destroy neighbour entry,
324 but someone still uses it.
326 The destroy will be delayed until
327 the last user releases us, but
328 we must kill timers etc. and move
331 __skb_queue_purge(&n->arp_queue);
332 n->arp_queue_len_bytes = 0;
333 n->output = neigh_blackhole;
334 if (n->nud_state & NUD_VALID)
335 n->nud_state = NUD_NOARP;
337 n->nud_state = NUD_NONE;
338 neigh_dbg(2, "neigh %p is stray\n", n);
340 write_unlock(&n->lock);
341 neigh_cleanup_and_release(n);
346 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
348 write_lock_bh(&tbl->lock);
349 neigh_flush_dev(tbl, dev, false);
350 write_unlock_bh(&tbl->lock);
352 EXPORT_SYMBOL(neigh_changeaddr);
354 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
357 write_lock_bh(&tbl->lock);
358 neigh_flush_dev(tbl, dev, skip_perm);
359 pneigh_ifdown_and_unlock(tbl, dev);
361 del_timer_sync(&tbl->proxy_timer);
362 pneigh_queue_purge(&tbl->proxy_queue);
366 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
368 __neigh_ifdown(tbl, dev, true);
371 EXPORT_SYMBOL(neigh_carrier_down);
373 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
375 __neigh_ifdown(tbl, dev, false);
378 EXPORT_SYMBOL(neigh_ifdown);
380 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
381 struct net_device *dev,
384 struct neighbour *n = NULL;
385 unsigned long now = jiffies;
391 entries = atomic_inc_return(&tbl->gc_entries) - 1;
392 if (entries >= tbl->gc_thresh3 ||
393 (entries >= tbl->gc_thresh2 &&
394 time_after(now, tbl->last_flush + 5 * HZ))) {
395 if (!neigh_forced_gc(tbl) &&
396 entries >= tbl->gc_thresh3) {
397 net_info_ratelimited("%s: neighbor table overflow!\n",
399 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
405 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
409 __skb_queue_head_init(&n->arp_queue);
410 rwlock_init(&n->lock);
411 seqlock_init(&n->ha_lock);
412 n->updated = n->used = now;
413 n->nud_state = NUD_NONE;
414 n->output = neigh_blackhole;
415 seqlock_init(&n->hh.hh_lock);
416 n->parms = neigh_parms_clone(&tbl->parms);
417 timer_setup(&n->timer, neigh_timer_handler, 0);
419 NEIGH_CACHE_STAT_INC(tbl, allocs);
421 refcount_set(&n->refcnt, 1);
423 INIT_LIST_HEAD(&n->gc_list);
425 atomic_inc(&tbl->entries);
431 atomic_dec(&tbl->gc_entries);
435 static void neigh_get_hash_rnd(u32 *x)
437 *x = get_random_u32() | 1;
440 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
442 size_t size = (1 << shift) * sizeof(struct neighbour *);
443 struct neigh_hash_table *ret;
444 struct neighbour __rcu **buckets;
447 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
450 if (size <= PAGE_SIZE) {
451 buckets = kzalloc(size, GFP_ATOMIC);
453 buckets = (struct neighbour __rcu **)
454 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
456 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
462 ret->hash_buckets = buckets;
463 ret->hash_shift = shift;
464 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
465 neigh_get_hash_rnd(&ret->hash_rnd[i]);
469 static void neigh_hash_free_rcu(struct rcu_head *head)
471 struct neigh_hash_table *nht = container_of(head,
472 struct neigh_hash_table,
474 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
475 struct neighbour __rcu **buckets = nht->hash_buckets;
477 if (size <= PAGE_SIZE) {
480 kmemleak_free(buckets);
481 free_pages((unsigned long)buckets, get_order(size));
486 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
487 unsigned long new_shift)
489 unsigned int i, hash;
490 struct neigh_hash_table *new_nht, *old_nht;
492 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
494 old_nht = rcu_dereference_protected(tbl->nht,
495 lockdep_is_held(&tbl->lock));
496 new_nht = neigh_hash_alloc(new_shift);
500 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
501 struct neighbour *n, *next;
503 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
504 lockdep_is_held(&tbl->lock));
507 hash = tbl->hash(n->primary_key, n->dev,
510 hash >>= (32 - new_nht->hash_shift);
511 next = rcu_dereference_protected(n->next,
512 lockdep_is_held(&tbl->lock));
514 rcu_assign_pointer(n->next,
515 rcu_dereference_protected(
516 new_nht->hash_buckets[hash],
517 lockdep_is_held(&tbl->lock)));
518 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
522 rcu_assign_pointer(tbl->nht, new_nht);
523 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
527 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
528 struct net_device *dev)
532 NEIGH_CACHE_STAT_INC(tbl, lookups);
535 n = __neigh_lookup_noref(tbl, pkey, dev);
537 if (!refcount_inc_not_zero(&n->refcnt))
539 NEIGH_CACHE_STAT_INC(tbl, hits);
542 rcu_read_unlock_bh();
545 EXPORT_SYMBOL(neigh_lookup);
547 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
551 unsigned int key_len = tbl->key_len;
553 struct neigh_hash_table *nht;
555 NEIGH_CACHE_STAT_INC(tbl, lookups);
558 nht = rcu_dereference_bh(tbl->nht);
559 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
561 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
563 n = rcu_dereference_bh(n->next)) {
564 if (!memcmp(n->primary_key, pkey, key_len) &&
565 net_eq(dev_net(n->dev), net)) {
566 if (!refcount_inc_not_zero(&n->refcnt))
568 NEIGH_CACHE_STAT_INC(tbl, hits);
573 rcu_read_unlock_bh();
576 EXPORT_SYMBOL(neigh_lookup_nodev);
578 static struct neighbour *___neigh_create(struct neigh_table *tbl,
580 struct net_device *dev,
581 bool exempt_from_gc, bool want_ref)
583 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
585 unsigned int key_len = tbl->key_len;
587 struct neigh_hash_table *nht;
590 rc = ERR_PTR(-ENOBUFS);
594 memcpy(n->primary_key, pkey, key_len);
598 /* Protocol specific setup. */
599 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
601 goto out_neigh_release;
604 if (dev->netdev_ops->ndo_neigh_construct) {
605 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
608 goto out_neigh_release;
612 /* Device specific setup. */
613 if (n->parms->neigh_setup &&
614 (error = n->parms->neigh_setup(n)) < 0) {
616 goto out_neigh_release;
619 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
621 write_lock_bh(&tbl->lock);
622 nht = rcu_dereference_protected(tbl->nht,
623 lockdep_is_held(&tbl->lock));
625 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
626 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
628 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
630 if (n->parms->dead) {
631 rc = ERR_PTR(-EINVAL);
635 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
636 lockdep_is_held(&tbl->lock));
638 n1 = rcu_dereference_protected(n1->next,
639 lockdep_is_held(&tbl->lock))) {
640 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
650 list_add_tail(&n->gc_list, &n->tbl->gc_list);
654 rcu_assign_pointer(n->next,
655 rcu_dereference_protected(nht->hash_buckets[hash_val],
656 lockdep_is_held(&tbl->lock)));
657 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
658 write_unlock_bh(&tbl->lock);
659 neigh_dbg(2, "neigh %p is created\n", n);
664 write_unlock_bh(&tbl->lock);
670 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
671 struct net_device *dev, bool want_ref)
673 return ___neigh_create(tbl, pkey, dev, false, want_ref);
675 EXPORT_SYMBOL(__neigh_create);
677 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
679 u32 hash_val = *(u32 *)(pkey + key_len - 4);
680 hash_val ^= (hash_val >> 16);
681 hash_val ^= hash_val >> 8;
682 hash_val ^= hash_val >> 4;
683 hash_val &= PNEIGH_HASHMASK;
687 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
690 unsigned int key_len,
691 struct net_device *dev)
694 if (!memcmp(n->key, pkey, key_len) &&
695 net_eq(pneigh_net(n), net) &&
696 (n->dev == dev || !n->dev))
703 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
704 struct net *net, const void *pkey, struct net_device *dev)
706 unsigned int key_len = tbl->key_len;
707 u32 hash_val = pneigh_hash(pkey, key_len);
709 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
710 net, pkey, key_len, dev);
712 EXPORT_SYMBOL_GPL(__pneigh_lookup);
714 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
715 struct net *net, const void *pkey,
716 struct net_device *dev, int creat)
718 struct pneigh_entry *n;
719 unsigned int key_len = tbl->key_len;
720 u32 hash_val = pneigh_hash(pkey, key_len);
722 read_lock_bh(&tbl->lock);
723 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
724 net, pkey, key_len, dev);
725 read_unlock_bh(&tbl->lock);
732 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
737 write_pnet(&n->net, net);
738 memcpy(n->key, pkey, key_len);
743 if (tbl->pconstructor && tbl->pconstructor(n)) {
751 write_lock_bh(&tbl->lock);
752 n->next = tbl->phash_buckets[hash_val];
753 tbl->phash_buckets[hash_val] = n;
754 write_unlock_bh(&tbl->lock);
758 EXPORT_SYMBOL(pneigh_lookup);
761 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
762 struct net_device *dev)
764 struct pneigh_entry *n, **np;
765 unsigned int key_len = tbl->key_len;
766 u32 hash_val = pneigh_hash(pkey, key_len);
768 write_lock_bh(&tbl->lock);
769 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
771 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
772 net_eq(pneigh_net(n), net)) {
774 write_unlock_bh(&tbl->lock);
775 if (tbl->pdestructor)
783 write_unlock_bh(&tbl->lock);
787 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
788 struct net_device *dev)
790 struct pneigh_entry *n, **np, *freelist = NULL;
793 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
794 np = &tbl->phash_buckets[h];
795 while ((n = *np) != NULL) {
796 if (!dev || n->dev == dev) {
805 write_unlock_bh(&tbl->lock);
806 while ((n = freelist)) {
809 if (tbl->pdestructor)
818 static void neigh_parms_destroy(struct neigh_parms *parms);
820 static inline void neigh_parms_put(struct neigh_parms *parms)
822 if (refcount_dec_and_test(&parms->refcnt))
823 neigh_parms_destroy(parms);
827 * neighbour must already be out of the table;
830 void neigh_destroy(struct neighbour *neigh)
832 struct net_device *dev = neigh->dev;
834 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
837 pr_warn("Destroying alive neighbour %p\n", neigh);
842 if (neigh_del_timer(neigh))
843 pr_warn("Impossible event\n");
845 write_lock_bh(&neigh->lock);
846 __skb_queue_purge(&neigh->arp_queue);
847 write_unlock_bh(&neigh->lock);
848 neigh->arp_queue_len_bytes = 0;
850 if (dev->netdev_ops->ndo_neigh_destroy)
851 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
854 neigh_parms_put(neigh->parms);
856 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
858 atomic_dec(&neigh->tbl->entries);
859 kfree_rcu(neigh, rcu);
861 EXPORT_SYMBOL(neigh_destroy);
863 /* Neighbour state is suspicious;
866 Called with write_locked neigh.
868 static void neigh_suspect(struct neighbour *neigh)
870 neigh_dbg(2, "neigh %p is suspected\n", neigh);
872 neigh->output = neigh->ops->output;
875 /* Neighbour state is OK;
878 Called with write_locked neigh.
880 static void neigh_connect(struct neighbour *neigh)
882 neigh_dbg(2, "neigh %p is connected\n", neigh);
884 neigh->output = neigh->ops->connected_output;
887 static void neigh_periodic_work(struct work_struct *work)
889 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
891 struct neighbour __rcu **np;
893 struct neigh_hash_table *nht;
895 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
897 write_lock_bh(&tbl->lock);
898 nht = rcu_dereference_protected(tbl->nht,
899 lockdep_is_held(&tbl->lock));
902 * periodically recompute ReachableTime from random function
905 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
906 struct neigh_parms *p;
907 tbl->last_rand = jiffies;
908 list_for_each_entry(p, &tbl->parms_list, list)
910 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
913 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
916 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
917 np = &nht->hash_buckets[i];
919 while ((n = rcu_dereference_protected(*np,
920 lockdep_is_held(&tbl->lock))) != NULL) {
923 write_lock(&n->lock);
925 state = n->nud_state;
926 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
927 (n->flags & NTF_EXT_LEARNED)) {
928 write_unlock(&n->lock);
932 if (time_before(n->used, n->confirmed))
933 n->used = n->confirmed;
935 if (refcount_read(&n->refcnt) == 1 &&
936 (state == NUD_FAILED ||
937 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
940 write_unlock(&n->lock);
941 neigh_cleanup_and_release(n);
944 write_unlock(&n->lock);
950 * It's fine to release lock here, even if hash table
951 * grows while we are preempted.
953 write_unlock_bh(&tbl->lock);
955 write_lock_bh(&tbl->lock);
956 nht = rcu_dereference_protected(tbl->nht,
957 lockdep_is_held(&tbl->lock));
960 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
961 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
962 * BASE_REACHABLE_TIME.
964 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
965 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
966 write_unlock_bh(&tbl->lock);
969 static __inline__ int neigh_max_probes(struct neighbour *n)
971 struct neigh_parms *p = n->parms;
972 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
973 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
974 NEIGH_VAR(p, MCAST_PROBES));
977 static void neigh_invalidate(struct neighbour *neigh)
978 __releases(neigh->lock)
979 __acquires(neigh->lock)
983 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
984 neigh_dbg(2, "neigh %p is failed\n", neigh);
985 neigh->updated = jiffies;
987 /* It is very thin place. report_unreachable is very complicated
988 routine. Particularly, it can hit the same neighbour entry!
990 So that, we try to be accurate and avoid dead loop. --ANK
992 while (neigh->nud_state == NUD_FAILED &&
993 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
994 write_unlock(&neigh->lock);
995 neigh->ops->error_report(neigh, skb);
996 write_lock(&neigh->lock);
998 __skb_queue_purge(&neigh->arp_queue);
999 neigh->arp_queue_len_bytes = 0;
1002 static void neigh_probe(struct neighbour *neigh)
1003 __releases(neigh->lock)
1005 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1006 /* keep skb alive even if arp_queue overflows */
1008 skb = skb_clone(skb, GFP_ATOMIC);
1009 write_unlock(&neigh->lock);
1010 if (neigh->ops->solicit)
1011 neigh->ops->solicit(neigh, skb);
1012 atomic_inc(&neigh->probes);
1016 /* Called when a timer expires for a neighbour entry. */
1018 static void neigh_timer_handler(struct timer_list *t)
1020 unsigned long now, next;
1021 struct neighbour *neigh = from_timer(neigh, t, timer);
1025 write_lock(&neigh->lock);
1027 state = neigh->nud_state;
1031 if (!(state & NUD_IN_TIMER))
1034 if (state & NUD_REACHABLE) {
1035 if (time_before_eq(now,
1036 neigh->confirmed + neigh->parms->reachable_time)) {
1037 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1038 next = neigh->confirmed + neigh->parms->reachable_time;
1039 } else if (time_before_eq(now,
1041 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1042 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1043 neigh->nud_state = NUD_DELAY;
1044 neigh->updated = jiffies;
1045 neigh_suspect(neigh);
1046 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1048 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1049 neigh->nud_state = NUD_STALE;
1050 neigh->updated = jiffies;
1051 neigh_suspect(neigh);
1054 } else if (state & NUD_DELAY) {
1055 if (time_before_eq(now,
1057 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1058 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1059 neigh->nud_state = NUD_REACHABLE;
1060 neigh->updated = jiffies;
1061 neigh_connect(neigh);
1063 next = neigh->confirmed + neigh->parms->reachable_time;
1065 neigh_dbg(2, "neigh %p is probed\n", neigh);
1066 neigh->nud_state = NUD_PROBE;
1067 neigh->updated = jiffies;
1068 atomic_set(&neigh->probes, 0);
1070 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1073 /* NUD_PROBE|NUD_INCOMPLETE */
1074 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1077 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1078 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1079 neigh->nud_state = NUD_FAILED;
1081 neigh_invalidate(neigh);
1085 if (neigh->nud_state & NUD_IN_TIMER) {
1086 if (time_before(next, jiffies + HZ/2))
1087 next = jiffies + HZ/2;
1088 if (!mod_timer(&neigh->timer, next))
1091 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1095 write_unlock(&neigh->lock);
1099 neigh_update_notify(neigh, 0);
1101 trace_neigh_timer_handler(neigh, 0);
1103 neigh_release(neigh);
1106 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1109 bool immediate_probe = false;
1111 write_lock_bh(&neigh->lock);
1114 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1119 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1120 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1121 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1122 unsigned long next, now = jiffies;
1124 atomic_set(&neigh->probes,
1125 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1126 neigh->nud_state = NUD_INCOMPLETE;
1127 neigh->updated = now;
1128 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1130 neigh_add_timer(neigh, next);
1131 immediate_probe = true;
1133 neigh->nud_state = NUD_FAILED;
1134 neigh->updated = jiffies;
1135 write_unlock_bh(&neigh->lock);
1140 } else if (neigh->nud_state & NUD_STALE) {
1141 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1142 neigh->nud_state = NUD_DELAY;
1143 neigh->updated = jiffies;
1144 neigh_add_timer(neigh, jiffies +
1145 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1148 if (neigh->nud_state == NUD_INCOMPLETE) {
1150 while (neigh->arp_queue_len_bytes + skb->truesize >
1151 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1152 struct sk_buff *buff;
1154 buff = __skb_dequeue(&neigh->arp_queue);
1157 neigh->arp_queue_len_bytes -= buff->truesize;
1159 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1162 __skb_queue_tail(&neigh->arp_queue, skb);
1163 neigh->arp_queue_len_bytes += skb->truesize;
1168 if (immediate_probe)
1171 write_unlock(&neigh->lock);
1173 trace_neigh_event_send_done(neigh, rc);
1177 if (neigh->nud_state & NUD_STALE)
1179 write_unlock_bh(&neigh->lock);
1181 trace_neigh_event_send_dead(neigh, 1);
1184 EXPORT_SYMBOL(__neigh_event_send);
1186 static void neigh_update_hhs(struct neighbour *neigh)
1188 struct hh_cache *hh;
1189 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1192 if (neigh->dev->header_ops)
1193 update = neigh->dev->header_ops->cache_update;
1198 write_seqlock_bh(&hh->hh_lock);
1199 update(hh, neigh->dev, neigh->ha);
1200 write_sequnlock_bh(&hh->hh_lock);
1207 /* Generic update routine.
1208 -- lladdr is new lladdr or NULL, if it is not supplied.
1209 -- new is new state.
1211 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1213 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1214 lladdr instead of overriding it
1216 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1218 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1220 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1223 Caller MUST hold reference count on the entry.
1226 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1227 u8 new, u32 flags, u32 nlmsg_pid,
1228 struct netlink_ext_ack *extack)
1230 bool ext_learn_change = false;
1234 struct net_device *dev;
1235 int update_isrouter = 0;
1237 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1239 write_lock_bh(&neigh->lock);
1242 old = neigh->nud_state;
1245 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1246 (old & (NUD_NOARP | NUD_PERMANENT)))
1249 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1253 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify);
1255 if (!(new & NUD_VALID)) {
1256 neigh_del_timer(neigh);
1257 if (old & NUD_CONNECTED)
1258 neigh_suspect(neigh);
1259 neigh->nud_state = new;
1261 notify = old & NUD_VALID;
1262 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1263 (new & NUD_FAILED)) {
1264 neigh_invalidate(neigh);
1270 /* Compare new lladdr with cached one */
1271 if (!dev->addr_len) {
1272 /* First case: device needs no address. */
1274 } else if (lladdr) {
1275 /* The second case: if something is already cached
1276 and a new address is proposed:
1278 - if they are different, check override flag
1280 if ((old & NUD_VALID) &&
1281 !memcmp(lladdr, neigh->ha, dev->addr_len))
1284 /* No address is supplied; if we know something,
1285 use it, otherwise discard the request.
1288 if (!(old & NUD_VALID)) {
1289 NL_SET_ERR_MSG(extack, "No link layer address given");
1295 /* Update confirmed timestamp for neighbour entry after we
1296 * received ARP packet even if it doesn't change IP to MAC binding.
1298 if (new & NUD_CONNECTED)
1299 neigh->confirmed = jiffies;
1301 /* If entry was valid and address is not changed,
1302 do not change entry state, if new one is STALE.
1305 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1306 if (old & NUD_VALID) {
1307 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1308 update_isrouter = 0;
1309 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1310 (old & NUD_CONNECTED)) {
1316 if (lladdr == neigh->ha && new == NUD_STALE &&
1317 !(flags & NEIGH_UPDATE_F_ADMIN))
1322 /* Update timestamp only once we know we will make a change to the
1323 * neighbour entry. Otherwise we risk to move the locktime window with
1324 * noop updates and ignore relevant ARP updates.
1326 if (new != old || lladdr != neigh->ha)
1327 neigh->updated = jiffies;
1330 neigh_del_timer(neigh);
1331 if (new & NUD_PROBE)
1332 atomic_set(&neigh->probes, 0);
1333 if (new & NUD_IN_TIMER)
1334 neigh_add_timer(neigh, (jiffies +
1335 ((new & NUD_REACHABLE) ?
1336 neigh->parms->reachable_time :
1338 neigh->nud_state = new;
1342 if (lladdr != neigh->ha) {
1343 write_seqlock(&neigh->ha_lock);
1344 memcpy(&neigh->ha, lladdr, dev->addr_len);
1345 write_sequnlock(&neigh->ha_lock);
1346 neigh_update_hhs(neigh);
1347 if (!(new & NUD_CONNECTED))
1348 neigh->confirmed = jiffies -
1349 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1354 if (new & NUD_CONNECTED)
1355 neigh_connect(neigh);
1357 neigh_suspect(neigh);
1358 if (!(old & NUD_VALID)) {
1359 struct sk_buff *skb;
1361 /* Again: avoid dead loop if something went wrong */
1363 while (neigh->nud_state & NUD_VALID &&
1364 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1365 struct dst_entry *dst = skb_dst(skb);
1366 struct neighbour *n2, *n1 = neigh;
1367 write_unlock_bh(&neigh->lock);
1371 /* Why not just use 'neigh' as-is? The problem is that
1372 * things such as shaper, eql, and sch_teql can end up
1373 * using alternative, different, neigh objects to output
1374 * the packet in the output path. So what we need to do
1375 * here is re-lookup the top-level neigh in the path so
1376 * we can reinject the packet there.
1380 n2 = dst_neigh_lookup_skb(dst, skb);
1384 n1->output(n1, skb);
1389 write_lock_bh(&neigh->lock);
1391 __skb_queue_purge(&neigh->arp_queue);
1392 neigh->arp_queue_len_bytes = 0;
1395 if (update_isrouter)
1396 neigh_update_is_router(neigh, flags, ¬ify);
1397 write_unlock_bh(&neigh->lock);
1399 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1400 neigh_update_gc_list(neigh);
1403 neigh_update_notify(neigh, nlmsg_pid);
1405 trace_neigh_update_done(neigh, err);
1410 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1411 u32 flags, u32 nlmsg_pid)
1413 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1415 EXPORT_SYMBOL(neigh_update);
1417 /* Update the neigh to listen temporarily for probe responses, even if it is
1418 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1420 void __neigh_set_probe_once(struct neighbour *neigh)
1424 neigh->updated = jiffies;
1425 if (!(neigh->nud_state & NUD_FAILED))
1427 neigh->nud_state = NUD_INCOMPLETE;
1428 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1429 neigh_add_timer(neigh,
1430 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1432 EXPORT_SYMBOL(__neigh_set_probe_once);
1434 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1435 u8 *lladdr, void *saddr,
1436 struct net_device *dev)
1438 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1439 lladdr || !dev->addr_len);
1441 neigh_update(neigh, lladdr, NUD_STALE,
1442 NEIGH_UPDATE_F_OVERRIDE, 0);
1445 EXPORT_SYMBOL(neigh_event_ns);
1447 /* called with read_lock_bh(&n->lock); */
1448 static void neigh_hh_init(struct neighbour *n)
1450 struct net_device *dev = n->dev;
1451 __be16 prot = n->tbl->protocol;
1452 struct hh_cache *hh = &n->hh;
1454 write_lock_bh(&n->lock);
1456 /* Only one thread can come in here and initialize the
1460 dev->header_ops->cache(n, hh, prot);
1462 write_unlock_bh(&n->lock);
1465 /* Slow and careful. */
1467 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1471 if (!neigh_event_send(neigh, skb)) {
1473 struct net_device *dev = neigh->dev;
1476 if (dev->header_ops->cache && !neigh->hh.hh_len)
1477 neigh_hh_init(neigh);
1480 __skb_pull(skb, skb_network_offset(skb));
1481 seq = read_seqbegin(&neigh->ha_lock);
1482 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1483 neigh->ha, NULL, skb->len);
1484 } while (read_seqretry(&neigh->ha_lock, seq));
1487 rc = dev_queue_xmit(skb);
1498 EXPORT_SYMBOL(neigh_resolve_output);
1500 /* As fast as possible without hh cache */
1502 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1504 struct net_device *dev = neigh->dev;
1509 __skb_pull(skb, skb_network_offset(skb));
1510 seq = read_seqbegin(&neigh->ha_lock);
1511 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1512 neigh->ha, NULL, skb->len);
1513 } while (read_seqretry(&neigh->ha_lock, seq));
1516 err = dev_queue_xmit(skb);
1523 EXPORT_SYMBOL(neigh_connected_output);
1525 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1527 return dev_queue_xmit(skb);
1529 EXPORT_SYMBOL(neigh_direct_output);
1531 static void neigh_proxy_process(struct timer_list *t)
1533 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1534 long sched_next = 0;
1535 unsigned long now = jiffies;
1536 struct sk_buff *skb, *n;
1538 spin_lock(&tbl->proxy_queue.lock);
1540 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1541 long tdif = NEIGH_CB(skb)->sched_next - now;
1544 struct net_device *dev = skb->dev;
1546 __skb_unlink(skb, &tbl->proxy_queue);
1547 if (tbl->proxy_redo && netif_running(dev)) {
1549 tbl->proxy_redo(skb);
1556 } else if (!sched_next || tdif < sched_next)
1559 del_timer(&tbl->proxy_timer);
1561 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1562 spin_unlock(&tbl->proxy_queue.lock);
1565 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1566 struct sk_buff *skb)
1568 unsigned long now = jiffies;
1570 unsigned long sched_next = now + (prandom_u32() %
1571 NEIGH_VAR(p, PROXY_DELAY));
1573 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1578 NEIGH_CB(skb)->sched_next = sched_next;
1579 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1581 spin_lock(&tbl->proxy_queue.lock);
1582 if (del_timer(&tbl->proxy_timer)) {
1583 if (time_before(tbl->proxy_timer.expires, sched_next))
1584 sched_next = tbl->proxy_timer.expires;
1588 __skb_queue_tail(&tbl->proxy_queue, skb);
1589 mod_timer(&tbl->proxy_timer, sched_next);
1590 spin_unlock(&tbl->proxy_queue.lock);
1592 EXPORT_SYMBOL(pneigh_enqueue);
1594 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1595 struct net *net, int ifindex)
1597 struct neigh_parms *p;
1599 list_for_each_entry(p, &tbl->parms_list, list) {
1600 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1601 (!p->dev && !ifindex && net_eq(net, &init_net)))
1608 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1609 struct neigh_table *tbl)
1611 struct neigh_parms *p;
1612 struct net *net = dev_net(dev);
1613 const struct net_device_ops *ops = dev->netdev_ops;
1615 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1618 refcount_set(&p->refcnt, 1);
1620 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1623 write_pnet(&p->net, net);
1624 p->sysctl_table = NULL;
1626 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1632 write_lock_bh(&tbl->lock);
1633 list_add(&p->list, &tbl->parms.list);
1634 write_unlock_bh(&tbl->lock);
1636 neigh_parms_data_state_cleanall(p);
1640 EXPORT_SYMBOL(neigh_parms_alloc);
1642 static void neigh_rcu_free_parms(struct rcu_head *head)
1644 struct neigh_parms *parms =
1645 container_of(head, struct neigh_parms, rcu_head);
1647 neigh_parms_put(parms);
1650 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1652 if (!parms || parms == &tbl->parms)
1654 write_lock_bh(&tbl->lock);
1655 list_del(&parms->list);
1657 write_unlock_bh(&tbl->lock);
1659 dev_put(parms->dev);
1660 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1662 EXPORT_SYMBOL(neigh_parms_release);
1664 static void neigh_parms_destroy(struct neigh_parms *parms)
1669 static struct lock_class_key neigh_table_proxy_queue_class;
1671 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1673 void neigh_table_init(int index, struct neigh_table *tbl)
1675 unsigned long now = jiffies;
1676 unsigned long phsize;
1678 INIT_LIST_HEAD(&tbl->parms_list);
1679 INIT_LIST_HEAD(&tbl->gc_list);
1680 list_add(&tbl->parms.list, &tbl->parms_list);
1681 write_pnet(&tbl->parms.net, &init_net);
1682 refcount_set(&tbl->parms.refcnt, 1);
1683 tbl->parms.reachable_time =
1684 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1686 tbl->stats = alloc_percpu(struct neigh_statistics);
1688 panic("cannot create neighbour cache statistics");
1690 #ifdef CONFIG_PROC_FS
1691 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1692 &neigh_stat_seq_ops, tbl))
1693 panic("cannot create neighbour proc dir entry");
1696 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1698 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1699 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1701 if (!tbl->nht || !tbl->phash_buckets)
1702 panic("cannot allocate neighbour cache hashes");
1704 if (!tbl->entry_size)
1705 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1706 tbl->key_len, NEIGH_PRIV_ALIGN);
1708 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1710 rwlock_init(&tbl->lock);
1711 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1712 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1713 tbl->parms.reachable_time);
1714 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1715 skb_queue_head_init_class(&tbl->proxy_queue,
1716 &neigh_table_proxy_queue_class);
1718 tbl->last_flush = now;
1719 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1721 neigh_tables[index] = tbl;
1723 EXPORT_SYMBOL(neigh_table_init);
1725 int neigh_table_clear(int index, struct neigh_table *tbl)
1727 neigh_tables[index] = NULL;
1728 /* It is not clean... Fix it to unload IPv6 module safely */
1729 cancel_delayed_work_sync(&tbl->gc_work);
1730 del_timer_sync(&tbl->proxy_timer);
1731 pneigh_queue_purge(&tbl->proxy_queue);
1732 neigh_ifdown(tbl, NULL);
1733 if (atomic_read(&tbl->entries))
1734 pr_crit("neighbour leakage\n");
1736 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1737 neigh_hash_free_rcu);
1740 kfree(tbl->phash_buckets);
1741 tbl->phash_buckets = NULL;
1743 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1745 free_percpu(tbl->stats);
1750 EXPORT_SYMBOL(neigh_table_clear);
1752 static struct neigh_table *neigh_find_table(int family)
1754 struct neigh_table *tbl = NULL;
1758 tbl = neigh_tables[NEIGH_ARP_TABLE];
1761 tbl = neigh_tables[NEIGH_ND_TABLE];
1764 tbl = neigh_tables[NEIGH_DN_TABLE];
1771 const struct nla_policy nda_policy[NDA_MAX+1] = {
1772 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1773 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1774 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1775 [NDA_PROBES] = { .type = NLA_U32 },
1776 [NDA_VLAN] = { .type = NLA_U16 },
1777 [NDA_PORT] = { .type = NLA_U16 },
1778 [NDA_VNI] = { .type = NLA_U32 },
1779 [NDA_IFINDEX] = { .type = NLA_U32 },
1780 [NDA_MASTER] = { .type = NLA_U32 },
1781 [NDA_PROTOCOL] = { .type = NLA_U8 },
1784 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1785 struct netlink_ext_ack *extack)
1787 struct net *net = sock_net(skb->sk);
1789 struct nlattr *dst_attr;
1790 struct neigh_table *tbl;
1791 struct neighbour *neigh;
1792 struct net_device *dev = NULL;
1796 if (nlmsg_len(nlh) < sizeof(*ndm))
1799 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1801 NL_SET_ERR_MSG(extack, "Network address not specified");
1805 ndm = nlmsg_data(nlh);
1806 if (ndm->ndm_ifindex) {
1807 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1814 tbl = neigh_find_table(ndm->ndm_family);
1816 return -EAFNOSUPPORT;
1818 if (nla_len(dst_attr) < (int)tbl->key_len) {
1819 NL_SET_ERR_MSG(extack, "Invalid network address");
1823 if (ndm->ndm_flags & NTF_PROXY) {
1824 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1831 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1832 if (neigh == NULL) {
1837 err = __neigh_update(neigh, NULL, NUD_FAILED,
1838 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1839 NETLINK_CB(skb).portid, extack);
1840 write_lock_bh(&tbl->lock);
1841 neigh_release(neigh);
1842 neigh_remove_one(neigh, tbl);
1843 write_unlock_bh(&tbl->lock);
1849 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1850 struct netlink_ext_ack *extack)
1852 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1853 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1854 struct net *net = sock_net(skb->sk);
1856 struct nlattr *tb[NDA_MAX+1];
1857 struct neigh_table *tbl;
1858 struct net_device *dev = NULL;
1859 struct neighbour *neigh;
1865 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack);
1871 NL_SET_ERR_MSG(extack, "Network address not specified");
1875 ndm = nlmsg_data(nlh);
1876 if (ndm->ndm_ifindex) {
1877 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1883 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1884 NL_SET_ERR_MSG(extack, "Invalid link address");
1889 tbl = neigh_find_table(ndm->ndm_family);
1891 return -EAFNOSUPPORT;
1893 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1894 NL_SET_ERR_MSG(extack, "Invalid network address");
1898 dst = nla_data(tb[NDA_DST]);
1899 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1901 if (tb[NDA_PROTOCOL])
1902 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1904 if (ndm->ndm_flags & NTF_PROXY) {
1905 struct pneigh_entry *pn;
1908 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1910 pn->flags = ndm->ndm_flags;
1912 pn->protocol = protocol;
1919 NL_SET_ERR_MSG(extack, "Device not specified");
1923 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
1928 neigh = neigh_lookup(tbl, dst, dev);
1929 if (neigh == NULL) {
1930 bool exempt_from_gc;
1932 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1937 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1938 ndm->ndm_flags & NTF_EXT_LEARNED;
1939 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
1940 if (IS_ERR(neigh)) {
1941 err = PTR_ERR(neigh);
1945 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1947 neigh_release(neigh);
1951 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1952 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1953 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1956 if (ndm->ndm_flags & NTF_EXT_LEARNED)
1957 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1959 if (ndm->ndm_flags & NTF_ROUTER)
1960 flags |= NEIGH_UPDATE_F_ISROUTER;
1962 if (ndm->ndm_flags & NTF_USE) {
1963 neigh_event_send(neigh, NULL);
1966 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1967 NETLINK_CB(skb).portid, extack);
1970 neigh->protocol = protocol;
1972 neigh_release(neigh);
1978 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1980 struct nlattr *nest;
1982 nest = nla_nest_start(skb, NDTA_PARMS);
1987 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1988 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1989 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1990 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1991 /* approximative value for deprecated QUEUE_LEN (in packets) */
1992 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1993 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1994 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1995 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1996 nla_put_u32(skb, NDTPA_UCAST_PROBES,
1997 NEIGH_VAR(parms, UCAST_PROBES)) ||
1998 nla_put_u32(skb, NDTPA_MCAST_PROBES,
1999 NEIGH_VAR(parms, MCAST_PROBES)) ||
2000 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2001 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2002 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2004 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2005 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2006 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2007 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2008 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2009 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2010 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2011 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2012 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2013 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2014 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2015 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2016 nla_put_msecs(skb, NDTPA_LOCKTIME,
2017 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
2018 goto nla_put_failure;
2019 return nla_nest_end(skb, nest);
2022 nla_nest_cancel(skb, nest);
2026 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2027 u32 pid, u32 seq, int type, int flags)
2029 struct nlmsghdr *nlh;
2030 struct ndtmsg *ndtmsg;
2032 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2036 ndtmsg = nlmsg_data(nlh);
2038 read_lock_bh(&tbl->lock);
2039 ndtmsg->ndtm_family = tbl->family;
2040 ndtmsg->ndtm_pad1 = 0;
2041 ndtmsg->ndtm_pad2 = 0;
2043 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2044 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2045 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2046 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2047 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2048 goto nla_put_failure;
2050 unsigned long now = jiffies;
2051 unsigned int flush_delta = now - tbl->last_flush;
2052 unsigned int rand_delta = now - tbl->last_rand;
2053 struct neigh_hash_table *nht;
2054 struct ndt_config ndc = {
2055 .ndtc_key_len = tbl->key_len,
2056 .ndtc_entry_size = tbl->entry_size,
2057 .ndtc_entries = atomic_read(&tbl->entries),
2058 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2059 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2060 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2064 nht = rcu_dereference_bh(tbl->nht);
2065 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2066 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2067 rcu_read_unlock_bh();
2069 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2070 goto nla_put_failure;
2075 struct ndt_stats ndst;
2077 memset(&ndst, 0, sizeof(ndst));
2079 for_each_possible_cpu(cpu) {
2080 struct neigh_statistics *st;
2082 st = per_cpu_ptr(tbl->stats, cpu);
2083 ndst.ndts_allocs += st->allocs;
2084 ndst.ndts_destroys += st->destroys;
2085 ndst.ndts_hash_grows += st->hash_grows;
2086 ndst.ndts_res_failed += st->res_failed;
2087 ndst.ndts_lookups += st->lookups;
2088 ndst.ndts_hits += st->hits;
2089 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2090 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2091 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2092 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2093 ndst.ndts_table_fulls += st->table_fulls;
2096 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2098 goto nla_put_failure;
2101 BUG_ON(tbl->parms.dev);
2102 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2103 goto nla_put_failure;
2105 read_unlock_bh(&tbl->lock);
2106 nlmsg_end(skb, nlh);
2110 read_unlock_bh(&tbl->lock);
2111 nlmsg_cancel(skb, nlh);
2115 static int neightbl_fill_param_info(struct sk_buff *skb,
2116 struct neigh_table *tbl,
2117 struct neigh_parms *parms,
2118 u32 pid, u32 seq, int type,
2121 struct ndtmsg *ndtmsg;
2122 struct nlmsghdr *nlh;
2124 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2128 ndtmsg = nlmsg_data(nlh);
2130 read_lock_bh(&tbl->lock);
2131 ndtmsg->ndtm_family = tbl->family;
2132 ndtmsg->ndtm_pad1 = 0;
2133 ndtmsg->ndtm_pad2 = 0;
2135 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2136 neightbl_fill_parms(skb, parms) < 0)
2139 read_unlock_bh(&tbl->lock);
2140 nlmsg_end(skb, nlh);
2143 read_unlock_bh(&tbl->lock);
2144 nlmsg_cancel(skb, nlh);
2148 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2149 [NDTA_NAME] = { .type = NLA_STRING },
2150 [NDTA_THRESH1] = { .type = NLA_U32 },
2151 [NDTA_THRESH2] = { .type = NLA_U32 },
2152 [NDTA_THRESH3] = { .type = NLA_U32 },
2153 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2154 [NDTA_PARMS] = { .type = NLA_NESTED },
2157 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2158 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2159 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2160 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2161 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2162 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2163 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2164 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2165 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2166 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2167 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2168 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2169 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2170 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2171 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2174 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2175 struct netlink_ext_ack *extack)
2177 struct net *net = sock_net(skb->sk);
2178 struct neigh_table *tbl;
2179 struct ndtmsg *ndtmsg;
2180 struct nlattr *tb[NDTA_MAX+1];
2184 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2185 nl_neightbl_policy, extack);
2189 if (tb[NDTA_NAME] == NULL) {
2194 ndtmsg = nlmsg_data(nlh);
2196 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2197 tbl = neigh_tables[tidx];
2200 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2202 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2212 * We acquire tbl->lock to be nice to the periodic timers and
2213 * make sure they always see a consistent set of values.
2215 write_lock_bh(&tbl->lock);
2217 if (tb[NDTA_PARMS]) {
2218 struct nlattr *tbp[NDTPA_MAX+1];
2219 struct neigh_parms *p;
2222 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2223 nl_ntbl_parm_policy, extack);
2225 goto errout_tbl_lock;
2227 if (tbp[NDTPA_IFINDEX])
2228 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2230 p = lookup_neigh_parms(tbl, net, ifindex);
2233 goto errout_tbl_lock;
2236 for (i = 1; i <= NDTPA_MAX; i++) {
2241 case NDTPA_QUEUE_LEN:
2242 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2243 nla_get_u32(tbp[i]) *
2244 SKB_TRUESIZE(ETH_FRAME_LEN));
2246 case NDTPA_QUEUE_LENBYTES:
2247 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2248 nla_get_u32(tbp[i]));
2250 case NDTPA_PROXY_QLEN:
2251 NEIGH_VAR_SET(p, PROXY_QLEN,
2252 nla_get_u32(tbp[i]));
2254 case NDTPA_APP_PROBES:
2255 NEIGH_VAR_SET(p, APP_PROBES,
2256 nla_get_u32(tbp[i]));
2258 case NDTPA_UCAST_PROBES:
2259 NEIGH_VAR_SET(p, UCAST_PROBES,
2260 nla_get_u32(tbp[i]));
2262 case NDTPA_MCAST_PROBES:
2263 NEIGH_VAR_SET(p, MCAST_PROBES,
2264 nla_get_u32(tbp[i]));
2266 case NDTPA_MCAST_REPROBES:
2267 NEIGH_VAR_SET(p, MCAST_REPROBES,
2268 nla_get_u32(tbp[i]));
2270 case NDTPA_BASE_REACHABLE_TIME:
2271 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2272 nla_get_msecs(tbp[i]));
2273 /* update reachable_time as well, otherwise, the change will
2274 * only be effective after the next time neigh_periodic_work
2275 * decides to recompute it (can be multiple minutes)
2278 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2280 case NDTPA_GC_STALETIME:
2281 NEIGH_VAR_SET(p, GC_STALETIME,
2282 nla_get_msecs(tbp[i]));
2284 case NDTPA_DELAY_PROBE_TIME:
2285 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2286 nla_get_msecs(tbp[i]));
2287 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2289 case NDTPA_RETRANS_TIME:
2290 NEIGH_VAR_SET(p, RETRANS_TIME,
2291 nla_get_msecs(tbp[i]));
2293 case NDTPA_ANYCAST_DELAY:
2294 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2295 nla_get_msecs(tbp[i]));
2297 case NDTPA_PROXY_DELAY:
2298 NEIGH_VAR_SET(p, PROXY_DELAY,
2299 nla_get_msecs(tbp[i]));
2301 case NDTPA_LOCKTIME:
2302 NEIGH_VAR_SET(p, LOCKTIME,
2303 nla_get_msecs(tbp[i]));
2310 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2311 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2312 !net_eq(net, &init_net))
2313 goto errout_tbl_lock;
2315 if (tb[NDTA_THRESH1])
2316 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2318 if (tb[NDTA_THRESH2])
2319 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2321 if (tb[NDTA_THRESH3])
2322 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2324 if (tb[NDTA_GC_INTERVAL])
2325 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2330 write_unlock_bh(&tbl->lock);
2335 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2336 struct netlink_ext_ack *extack)
2338 struct ndtmsg *ndtm;
2340 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2341 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2345 ndtm = nlmsg_data(nlh);
2346 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2347 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2351 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2352 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2359 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2361 const struct nlmsghdr *nlh = cb->nlh;
2362 struct net *net = sock_net(skb->sk);
2363 int family, tidx, nidx = 0;
2364 int tbl_skip = cb->args[0];
2365 int neigh_skip = cb->args[1];
2366 struct neigh_table *tbl;
2368 if (cb->strict_check) {
2369 int err = neightbl_valid_dump_info(nlh, cb->extack);
2375 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2377 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2378 struct neigh_parms *p;
2380 tbl = neigh_tables[tidx];
2384 if (tidx < tbl_skip || (family && tbl->family != family))
2387 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2388 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2393 p = list_next_entry(&tbl->parms, list);
2394 list_for_each_entry_from(p, &tbl->parms_list, list) {
2395 if (!net_eq(neigh_parms_net(p), net))
2398 if (nidx < neigh_skip)
2401 if (neightbl_fill_param_info(skb, tbl, p,
2402 NETLINK_CB(cb->skb).portid,
2420 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2421 u32 pid, u32 seq, int type, unsigned int flags)
2423 unsigned long now = jiffies;
2424 struct nda_cacheinfo ci;
2425 struct nlmsghdr *nlh;
2428 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2432 ndm = nlmsg_data(nlh);
2433 ndm->ndm_family = neigh->ops->family;
2436 ndm->ndm_flags = neigh->flags;
2437 ndm->ndm_type = neigh->type;
2438 ndm->ndm_ifindex = neigh->dev->ifindex;
2440 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2441 goto nla_put_failure;
2443 read_lock_bh(&neigh->lock);
2444 ndm->ndm_state = neigh->nud_state;
2445 if (neigh->nud_state & NUD_VALID) {
2446 char haddr[MAX_ADDR_LEN];
2448 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2449 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2450 read_unlock_bh(&neigh->lock);
2451 goto nla_put_failure;
2455 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2456 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2457 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2458 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2459 read_unlock_bh(&neigh->lock);
2461 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2462 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2463 goto nla_put_failure;
2465 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2466 goto nla_put_failure;
2468 nlmsg_end(skb, nlh);
2472 nlmsg_cancel(skb, nlh);
2476 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2477 u32 pid, u32 seq, int type, unsigned int flags,
2478 struct neigh_table *tbl)
2480 struct nlmsghdr *nlh;
2483 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2487 ndm = nlmsg_data(nlh);
2488 ndm->ndm_family = tbl->family;
2491 ndm->ndm_flags = pn->flags | NTF_PROXY;
2492 ndm->ndm_type = RTN_UNICAST;
2493 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2494 ndm->ndm_state = NUD_NONE;
2496 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2497 goto nla_put_failure;
2499 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2500 goto nla_put_failure;
2502 nlmsg_end(skb, nlh);
2506 nlmsg_cancel(skb, nlh);
2510 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2512 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2513 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2516 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2518 struct net_device *master;
2523 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2524 if (!master || master->ifindex != master_idx)
2530 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2532 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2538 struct neigh_dump_filter {
2543 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2544 struct netlink_callback *cb,
2545 struct neigh_dump_filter *filter)
2547 struct net *net = sock_net(skb->sk);
2548 struct neighbour *n;
2549 int rc, h, s_h = cb->args[1];
2550 int idx, s_idx = idx = cb->args[2];
2551 struct neigh_hash_table *nht;
2552 unsigned int flags = NLM_F_MULTI;
2554 if (filter->dev_idx || filter->master_idx)
2555 flags |= NLM_F_DUMP_FILTERED;
2558 nht = rcu_dereference_bh(tbl->nht);
2560 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2563 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2565 n = rcu_dereference_bh(n->next)) {
2566 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2568 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2569 neigh_master_filtered(n->dev, filter->master_idx))
2571 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2584 rcu_read_unlock_bh();
2590 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2591 struct netlink_callback *cb,
2592 struct neigh_dump_filter *filter)
2594 struct pneigh_entry *n;
2595 struct net *net = sock_net(skb->sk);
2596 int rc, h, s_h = cb->args[3];
2597 int idx, s_idx = idx = cb->args[4];
2598 unsigned int flags = NLM_F_MULTI;
2600 if (filter->dev_idx || filter->master_idx)
2601 flags |= NLM_F_DUMP_FILTERED;
2603 read_lock_bh(&tbl->lock);
2605 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2608 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2609 if (idx < s_idx || pneigh_net(n) != net)
2611 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2612 neigh_master_filtered(n->dev, filter->master_idx))
2614 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2616 RTM_NEWNEIGH, flags, tbl) < 0) {
2617 read_unlock_bh(&tbl->lock);
2626 read_unlock_bh(&tbl->lock);
2635 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2637 struct neigh_dump_filter *filter,
2638 struct netlink_ext_ack *extack)
2640 struct nlattr *tb[NDA_MAX + 1];
2646 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2647 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2651 ndm = nlmsg_data(nlh);
2652 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2653 ndm->ndm_state || ndm->ndm_type) {
2654 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2658 if (ndm->ndm_flags & ~NTF_PROXY) {
2659 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2663 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2664 nda_policy, extack);
2666 err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2667 nda_policy, extack);
2672 for (i = 0; i <= NDA_MAX; ++i) {
2676 /* all new attributes should require strict_check */
2679 filter->dev_idx = nla_get_u32(tb[i]);
2682 filter->master_idx = nla_get_u32(tb[i]);
2686 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2695 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2697 const struct nlmsghdr *nlh = cb->nlh;
2698 struct neigh_dump_filter filter = {};
2699 struct neigh_table *tbl;
2704 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2706 /* check for full ndmsg structure presence, family member is
2707 * the same for both structures
2709 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2710 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2713 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2714 if (err < 0 && cb->strict_check)
2719 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2720 tbl = neigh_tables[t];
2724 if (t < s_t || (family && tbl->family != family))
2727 memset(&cb->args[1], 0, sizeof(cb->args) -
2728 sizeof(cb->args[0]));
2730 err = pneigh_dump_table(tbl, skb, cb, &filter);
2732 err = neigh_dump_table(tbl, skb, cb, &filter);
2741 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2742 struct neigh_table **tbl,
2743 void **dst, int *dev_idx, u8 *ndm_flags,
2744 struct netlink_ext_ack *extack)
2746 struct nlattr *tb[NDA_MAX + 1];
2750 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2751 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2755 ndm = nlmsg_data(nlh);
2756 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2758 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2762 if (ndm->ndm_flags & ~NTF_PROXY) {
2763 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2767 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2768 nda_policy, extack);
2772 *ndm_flags = ndm->ndm_flags;
2773 *dev_idx = ndm->ndm_ifindex;
2774 *tbl = neigh_find_table(ndm->ndm_family);
2776 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2777 return -EAFNOSUPPORT;
2780 for (i = 0; i <= NDA_MAX; ++i) {
2786 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2787 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2790 *dst = nla_data(tb[i]);
2793 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2801 static inline size_t neigh_nlmsg_size(void)
2803 return NLMSG_ALIGN(sizeof(struct ndmsg))
2804 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2805 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2806 + nla_total_size(sizeof(struct nda_cacheinfo))
2807 + nla_total_size(4) /* NDA_PROBES */
2808 + nla_total_size(1); /* NDA_PROTOCOL */
2811 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2814 struct sk_buff *skb;
2817 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2821 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2827 err = rtnl_unicast(skb, net, pid);
2832 static inline size_t pneigh_nlmsg_size(void)
2834 return NLMSG_ALIGN(sizeof(struct ndmsg))
2835 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2836 + nla_total_size(1); /* NDA_PROTOCOL */
2839 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2840 u32 pid, u32 seq, struct neigh_table *tbl)
2842 struct sk_buff *skb;
2845 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2849 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2855 err = rtnl_unicast(skb, net, pid);
2860 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2861 struct netlink_ext_ack *extack)
2863 struct net *net = sock_net(in_skb->sk);
2864 struct net_device *dev = NULL;
2865 struct neigh_table *tbl = NULL;
2866 struct neighbour *neigh;
2872 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2878 dev = __dev_get_by_index(net, dev_idx);
2880 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2886 NL_SET_ERR_MSG(extack, "Network address not specified");
2890 if (ndm_flags & NTF_PROXY) {
2891 struct pneigh_entry *pn;
2893 pn = pneigh_lookup(tbl, net, dst, dev, 0);
2895 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
2898 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
2899 nlh->nlmsg_seq, tbl);
2903 NL_SET_ERR_MSG(extack, "No device specified");
2907 neigh = neigh_lookup(tbl, dst, dev);
2909 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
2913 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
2916 neigh_release(neigh);
2921 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2924 struct neigh_hash_table *nht;
2927 nht = rcu_dereference_bh(tbl->nht);
2929 read_lock(&tbl->lock); /* avoid resizes */
2930 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2931 struct neighbour *n;
2933 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2935 n = rcu_dereference_bh(n->next))
2938 read_unlock(&tbl->lock);
2939 rcu_read_unlock_bh();
2941 EXPORT_SYMBOL(neigh_for_each);
2943 /* The tbl->lock must be held as a writer and BH disabled. */
2944 void __neigh_for_each_release(struct neigh_table *tbl,
2945 int (*cb)(struct neighbour *))
2948 struct neigh_hash_table *nht;
2950 nht = rcu_dereference_protected(tbl->nht,
2951 lockdep_is_held(&tbl->lock));
2952 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2953 struct neighbour *n;
2954 struct neighbour __rcu **np;
2956 np = &nht->hash_buckets[chain];
2957 while ((n = rcu_dereference_protected(*np,
2958 lockdep_is_held(&tbl->lock))) != NULL) {
2961 write_lock(&n->lock);
2964 rcu_assign_pointer(*np,
2965 rcu_dereference_protected(n->next,
2966 lockdep_is_held(&tbl->lock)));
2970 write_unlock(&n->lock);
2972 neigh_cleanup_and_release(n);
2976 EXPORT_SYMBOL(__neigh_for_each_release);
2978 int neigh_xmit(int index, struct net_device *dev,
2979 const void *addr, struct sk_buff *skb)
2981 int err = -EAFNOSUPPORT;
2982 if (likely(index < NEIGH_NR_TABLES)) {
2983 struct neigh_table *tbl;
2984 struct neighbour *neigh;
2986 tbl = neigh_tables[index];
2990 neigh = __neigh_lookup_noref(tbl, addr, dev);
2992 neigh = __neigh_create(tbl, addr, dev, false);
2993 err = PTR_ERR(neigh);
2994 if (IS_ERR(neigh)) {
2995 rcu_read_unlock_bh();
2998 err = neigh->output(neigh, skb);
2999 rcu_read_unlock_bh();
3001 else if (index == NEIGH_LINK_TABLE) {
3002 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3003 addr, NULL, skb->len);
3006 err = dev_queue_xmit(skb);
3014 EXPORT_SYMBOL(neigh_xmit);
3016 #ifdef CONFIG_PROC_FS
3018 static struct neighbour *neigh_get_first(struct seq_file *seq)
3020 struct neigh_seq_state *state = seq->private;
3021 struct net *net = seq_file_net(seq);
3022 struct neigh_hash_table *nht = state->nht;
3023 struct neighbour *n = NULL;
3024 int bucket = state->bucket;
3026 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3027 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3028 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3031 if (!net_eq(dev_net(n->dev), net))
3033 if (state->neigh_sub_iter) {
3037 v = state->neigh_sub_iter(state, n, &fakep);
3041 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3043 if (n->nud_state & ~NUD_NOARP)
3046 n = rcu_dereference_bh(n->next);
3052 state->bucket = bucket;
3057 static struct neighbour *neigh_get_next(struct seq_file *seq,
3058 struct neighbour *n,
3061 struct neigh_seq_state *state = seq->private;
3062 struct net *net = seq_file_net(seq);
3063 struct neigh_hash_table *nht = state->nht;
3065 if (state->neigh_sub_iter) {
3066 void *v = state->neigh_sub_iter(state, n, pos);
3070 n = rcu_dereference_bh(n->next);
3074 if (!net_eq(dev_net(n->dev), net))
3076 if (state->neigh_sub_iter) {
3077 void *v = state->neigh_sub_iter(state, n, pos);
3082 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3085 if (n->nud_state & ~NUD_NOARP)
3088 n = rcu_dereference_bh(n->next);
3094 if (++state->bucket >= (1 << nht->hash_shift))
3097 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3105 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3107 struct neighbour *n = neigh_get_first(seq);
3112 n = neigh_get_next(seq, n, pos);
3117 return *pos ? NULL : n;
3120 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3122 struct neigh_seq_state *state = seq->private;
3123 struct net *net = seq_file_net(seq);
3124 struct neigh_table *tbl = state->tbl;
3125 struct pneigh_entry *pn = NULL;
3126 int bucket = state->bucket;
3128 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3129 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3130 pn = tbl->phash_buckets[bucket];
3131 while (pn && !net_eq(pneigh_net(pn), net))
3136 state->bucket = bucket;
3141 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3142 struct pneigh_entry *pn,
3145 struct neigh_seq_state *state = seq->private;
3146 struct net *net = seq_file_net(seq);
3147 struct neigh_table *tbl = state->tbl;
3151 } while (pn && !net_eq(pneigh_net(pn), net));
3154 if (++state->bucket > PNEIGH_HASHMASK)
3156 pn = tbl->phash_buckets[state->bucket];
3157 while (pn && !net_eq(pneigh_net(pn), net))
3169 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3171 struct pneigh_entry *pn = pneigh_get_first(seq);
3176 pn = pneigh_get_next(seq, pn, pos);
3181 return *pos ? NULL : pn;
3184 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3186 struct neigh_seq_state *state = seq->private;
3188 loff_t idxpos = *pos;
3190 rc = neigh_get_idx(seq, &idxpos);
3191 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3192 rc = pneigh_get_idx(seq, &idxpos);
3197 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3200 struct neigh_seq_state *state = seq->private;
3204 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3207 state->nht = rcu_dereference_bh(tbl->nht);
3209 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3211 EXPORT_SYMBOL(neigh_seq_start);
3213 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3215 struct neigh_seq_state *state;
3218 if (v == SEQ_START_TOKEN) {
3219 rc = neigh_get_first(seq);
3223 state = seq->private;
3224 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3225 rc = neigh_get_next(seq, v, NULL);
3228 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3229 rc = pneigh_get_first(seq);
3231 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3232 rc = pneigh_get_next(seq, v, NULL);
3238 EXPORT_SYMBOL(neigh_seq_next);
3240 void neigh_seq_stop(struct seq_file *seq, void *v)
3243 rcu_read_unlock_bh();
3245 EXPORT_SYMBOL(neigh_seq_stop);
3247 /* statistics via seq_file */
3249 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3251 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3255 return SEQ_START_TOKEN;
3257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3258 if (!cpu_possible(cpu))
3261 return per_cpu_ptr(tbl->stats, cpu);
3266 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3268 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3271 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3272 if (!cpu_possible(cpu))
3275 return per_cpu_ptr(tbl->stats, cpu);
3280 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3285 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3287 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3288 struct neigh_statistics *st = v;
3290 if (v == SEQ_START_TOKEN) {
3291 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3295 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3296 "%08lx %08lx %08lx %08lx %08lx %08lx\n",
3297 atomic_read(&tbl->entries),
3308 st->rcv_probes_mcast,
3309 st->rcv_probes_ucast,
3311 st->periodic_gc_runs,
3320 static const struct seq_operations neigh_stat_seq_ops = {
3321 .start = neigh_stat_seq_start,
3322 .next = neigh_stat_seq_next,
3323 .stop = neigh_stat_seq_stop,
3324 .show = neigh_stat_seq_show,
3326 #endif /* CONFIG_PROC_FS */
3328 static void __neigh_notify(struct neighbour *n, int type, int flags,
3331 struct net *net = dev_net(n->dev);
3332 struct sk_buff *skb;
3335 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3339 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3341 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3342 WARN_ON(err == -EMSGSIZE);
3346 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3350 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3353 void neigh_app_ns(struct neighbour *n)
3355 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3357 EXPORT_SYMBOL(neigh_app_ns);
3359 #ifdef CONFIG_SYSCTL
3361 static int int_max = INT_MAX;
3362 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3364 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3365 void __user *buffer, size_t *lenp, loff_t *ppos)
3368 struct ctl_table tmp = *ctl;
3371 tmp.extra2 = &unres_qlen_max;
3374 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3375 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3378 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3382 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3387 return __in_dev_arp_parms_get_rcu(dev);
3389 return __in6_dev_nd_parms_get_rcu(dev);
3394 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3397 struct net_device *dev;
3398 int family = neigh_parms_family(p);
3401 for_each_netdev_rcu(net, dev) {
3402 struct neigh_parms *dst_p =
3403 neigh_get_dev_parms_rcu(dev, family);
3405 if (dst_p && !test_bit(index, dst_p->data_state))
3406 dst_p->data[index] = p->data[index];
3411 static void neigh_proc_update(struct ctl_table *ctl, int write)
3413 struct net_device *dev = ctl->extra1;
3414 struct neigh_parms *p = ctl->extra2;
3415 struct net *net = neigh_parms_net(p);
3416 int index = (int *) ctl->data - p->data;
3421 set_bit(index, p->data_state);
3422 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3423 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3424 if (!dev) /* NULL dev means this is default value */
3425 neigh_copy_dflt_parms(net, p, index);
3428 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3429 void __user *buffer,
3430 size_t *lenp, loff_t *ppos)
3432 struct ctl_table tmp = *ctl;
3436 tmp.extra2 = &int_max;
3438 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3439 neigh_proc_update(ctl, write);
3443 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3444 void __user *buffer, size_t *lenp, loff_t *ppos)
3446 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3448 neigh_proc_update(ctl, write);
3451 EXPORT_SYMBOL(neigh_proc_dointvec);
3453 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3454 void __user *buffer,
3455 size_t *lenp, loff_t *ppos)
3457 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3459 neigh_proc_update(ctl, write);
3462 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3464 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3465 void __user *buffer,
3466 size_t *lenp, loff_t *ppos)
3468 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3470 neigh_proc_update(ctl, write);
3474 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3475 void __user *buffer,
3476 size_t *lenp, loff_t *ppos)
3478 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3480 neigh_proc_update(ctl, write);
3483 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3485 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3486 void __user *buffer,
3487 size_t *lenp, loff_t *ppos)
3489 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3491 neigh_proc_update(ctl, write);
3495 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3496 void __user *buffer,
3497 size_t *lenp, loff_t *ppos)
3499 struct neigh_parms *p = ctl->extra2;
3502 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3503 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3504 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3505 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3509 if (write && ret == 0) {
3510 /* update reachable_time as well, otherwise, the change will
3511 * only be effective after the next time neigh_periodic_work
3512 * decides to recompute it
3515 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3520 #define NEIGH_PARMS_DATA_OFFSET(index) \
3521 (&((struct neigh_parms *) 0)->data[index])
3523 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3524 [NEIGH_VAR_ ## attr] = { \
3526 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3527 .maxlen = sizeof(int), \
3529 .proc_handler = proc, \
3532 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3533 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3535 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3536 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3538 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3539 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3541 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3542 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3544 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3545 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3547 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3548 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3550 static struct neigh_sysctl_table {
3551 struct ctl_table_header *sysctl_header;
3552 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3553 } neigh_sysctl_template __read_mostly = {
3555 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3556 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3557 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3558 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3559 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3560 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3561 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3562 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3563 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3564 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3565 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3566 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3567 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3568 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3569 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3570 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3571 [NEIGH_VAR_GC_INTERVAL] = {
3572 .procname = "gc_interval",
3573 .maxlen = sizeof(int),
3575 .proc_handler = proc_dointvec_jiffies,
3577 [NEIGH_VAR_GC_THRESH1] = {
3578 .procname = "gc_thresh1",
3579 .maxlen = sizeof(int),
3583 .proc_handler = proc_dointvec_minmax,
3585 [NEIGH_VAR_GC_THRESH2] = {
3586 .procname = "gc_thresh2",
3587 .maxlen = sizeof(int),
3591 .proc_handler = proc_dointvec_minmax,
3593 [NEIGH_VAR_GC_THRESH3] = {
3594 .procname = "gc_thresh3",
3595 .maxlen = sizeof(int),
3599 .proc_handler = proc_dointvec_minmax,
3605 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3606 proc_handler *handler)
3609 struct neigh_sysctl_table *t;
3610 const char *dev_name_source;
3611 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3614 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3618 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3619 t->neigh_vars[i].data += (long) p;
3620 t->neigh_vars[i].extra1 = dev;
3621 t->neigh_vars[i].extra2 = p;
3625 dev_name_source = dev->name;
3626 /* Terminate the table early */
3627 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3628 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3630 struct neigh_table *tbl = p->tbl;
3631 dev_name_source = "default";
3632 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3633 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3634 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3635 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3640 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3642 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3643 /* RetransTime (in milliseconds)*/
3644 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3645 /* ReachableTime (in milliseconds) */
3646 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3648 /* Those handlers will update p->reachable_time after
3649 * base_reachable_time(_ms) is set to ensure the new timer starts being
3650 * applied after the next neighbour update instead of waiting for
3651 * neigh_periodic_work to update its value (can be multiple minutes)
3652 * So any handler that replaces them should do this as well
3655 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3656 neigh_proc_base_reachable_time;
3657 /* ReachableTime (in milliseconds) */
3658 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3659 neigh_proc_base_reachable_time;
3662 /* Don't export sysctls to unprivileged users */
3663 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3664 t->neigh_vars[0].procname = NULL;
3666 switch (neigh_parms_family(p)) {
3677 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3678 p_name, dev_name_source);
3680 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3681 if (!t->sysctl_header)
3684 p->sysctl_table = t;
3692 EXPORT_SYMBOL(neigh_sysctl_register);
3694 void neigh_sysctl_unregister(struct neigh_parms *p)
3696 if (p->sysctl_table) {
3697 struct neigh_sysctl_table *t = p->sysctl_table;
3698 p->sysctl_table = NULL;
3699 unregister_net_sysctl_table(t->sysctl_header);
3703 EXPORT_SYMBOL(neigh_sysctl_unregister);
3705 #endif /* CONFIG_SYSCTL */
3707 static int __init neigh_init(void)
3709 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3710 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3711 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3713 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3715 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3720 subsys_initcall(neigh_init);