]> asedeno.scripts.mit.edu Git - linux.git/blob - net/ipv4/devinet.c
Merge tag 'for-linus-20180906' of git://git.kernel.dk/linux-block
[linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         ASSERT_RTNL();
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127         ASSERT_RTNL();
128         hlist_del_init_rcu(&ifa->hash);
129 }
130
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141         struct net_device *result = NULL;
142         struct in_ifaddr *ifa;
143
144         rcu_read_lock();
145         ifa = inet_lookup_ifaddr_rcu(net, addr);
146         if (!ifa) {
147                 struct flowi4 fl4 = { .daddr = addr };
148                 struct fib_result res = { 0 };
149                 struct fib_table *local;
150
151                 /* Fallback to FIB local table so that communication
152                  * over loopback subnets work.
153                  */
154                 local = fib_get_table(net, RT_TABLE_LOCAL);
155                 if (local &&
156                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
157                     res.type == RTN_LOCAL)
158                         result = FIB_RES_DEV(res);
159         } else {
160                 result = ifa->ifa_dev->dev;
161         }
162         if (result && devref)
163                 dev_hold(result);
164         rcu_read_unlock();
165         return result;
166 }
167 EXPORT_SYMBOL(__ip_dev_find);
168
169 /* called under RCU lock */
170 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
171 {
172         u32 hash = inet_addr_hash(net, addr);
173         struct in_ifaddr *ifa;
174
175         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
176                 if (ifa->ifa_local == addr &&
177                     net_eq(dev_net(ifa->ifa_dev->dev), net))
178                         return ifa;
179
180         return NULL;
181 }
182
183 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
184
185 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
186 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
187 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
188                          int destroy);
189 #ifdef CONFIG_SYSCTL
190 static int devinet_sysctl_register(struct in_device *idev);
191 static void devinet_sysctl_unregister(struct in_device *idev);
192 #else
193 static int devinet_sysctl_register(struct in_device *idev)
194 {
195         return 0;
196 }
197 static void devinet_sysctl_unregister(struct in_device *idev)
198 {
199 }
200 #endif
201
202 /* Locks all the inet devices. */
203
204 static struct in_ifaddr *inet_alloc_ifa(void)
205 {
206         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
207 }
208
209 static void inet_rcu_free_ifa(struct rcu_head *head)
210 {
211         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
212         if (ifa->ifa_dev)
213                 in_dev_put(ifa->ifa_dev);
214         kfree(ifa);
215 }
216
217 static void inet_free_ifa(struct in_ifaddr *ifa)
218 {
219         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
220 }
221
222 void in_dev_finish_destroy(struct in_device *idev)
223 {
224         struct net_device *dev = idev->dev;
225
226         WARN_ON(idev->ifa_list);
227         WARN_ON(idev->mc_list);
228         kfree(rcu_dereference_protected(idev->mc_hash, 1));
229 #ifdef NET_REFCNT_DEBUG
230         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
231 #endif
232         dev_put(dev);
233         if (!idev->dead)
234                 pr_err("Freeing alive in_device %p\n", idev);
235         else
236                 kfree(idev);
237 }
238 EXPORT_SYMBOL(in_dev_finish_destroy);
239
240 static struct in_device *inetdev_init(struct net_device *dev)
241 {
242         struct in_device *in_dev;
243         int err = -ENOMEM;
244
245         ASSERT_RTNL();
246
247         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
248         if (!in_dev)
249                 goto out;
250         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
251                         sizeof(in_dev->cnf));
252         in_dev->cnf.sysctl = NULL;
253         in_dev->dev = dev;
254         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
255         if (!in_dev->arp_parms)
256                 goto out_kfree;
257         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
258                 dev_disable_lro(dev);
259         /* Reference in_dev->dev */
260         dev_hold(dev);
261         /* Account for reference dev->ip_ptr (below) */
262         refcount_set(&in_dev->refcnt, 1);
263
264         err = devinet_sysctl_register(in_dev);
265         if (err) {
266                 in_dev->dead = 1;
267                 in_dev_put(in_dev);
268                 in_dev = NULL;
269                 goto out;
270         }
271         ip_mc_init_dev(in_dev);
272         if (dev->flags & IFF_UP)
273                 ip_mc_up(in_dev);
274
275         /* we can receive as soon as ip_ptr is set -- do this last */
276         rcu_assign_pointer(dev->ip_ptr, in_dev);
277 out:
278         return in_dev ?: ERR_PTR(err);
279 out_kfree:
280         kfree(in_dev);
281         in_dev = NULL;
282         goto out;
283 }
284
285 static void in_dev_rcu_put(struct rcu_head *head)
286 {
287         struct in_device *idev = container_of(head, struct in_device, rcu_head);
288         in_dev_put(idev);
289 }
290
291 static void inetdev_destroy(struct in_device *in_dev)
292 {
293         struct in_ifaddr *ifa;
294         struct net_device *dev;
295
296         ASSERT_RTNL();
297
298         dev = in_dev->dev;
299
300         in_dev->dead = 1;
301
302         ip_mc_destroy_dev(in_dev);
303
304         while ((ifa = in_dev->ifa_list) != NULL) {
305                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
306                 inet_free_ifa(ifa);
307         }
308
309         RCU_INIT_POINTER(dev->ip_ptr, NULL);
310
311         devinet_sysctl_unregister(in_dev);
312         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
313         arp_ifdown(dev);
314
315         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
316 }
317
318 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
319 {
320         rcu_read_lock();
321         for_primary_ifa(in_dev) {
322                 if (inet_ifa_match(a, ifa)) {
323                         if (!b || inet_ifa_match(b, ifa)) {
324                                 rcu_read_unlock();
325                                 return 1;
326                         }
327                 }
328         } endfor_ifa(in_dev);
329         rcu_read_unlock();
330         return 0;
331 }
332
333 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
334                          int destroy, struct nlmsghdr *nlh, u32 portid)
335 {
336         struct in_ifaddr *promote = NULL;
337         struct in_ifaddr *ifa, *ifa1 = *ifap;
338         struct in_ifaddr *last_prim = in_dev->ifa_list;
339         struct in_ifaddr *prev_prom = NULL;
340         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
341
342         ASSERT_RTNL();
343
344         if (in_dev->dead)
345                 goto no_promotions;
346
347         /* 1. Deleting primary ifaddr forces deletion all secondaries
348          * unless alias promotion is set
349          **/
350
351         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
352                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
353
354                 while ((ifa = *ifap1) != NULL) {
355                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
356                             ifa1->ifa_scope <= ifa->ifa_scope)
357                                 last_prim = ifa;
358
359                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
360                             ifa1->ifa_mask != ifa->ifa_mask ||
361                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
362                                 ifap1 = &ifa->ifa_next;
363                                 prev_prom = ifa;
364                                 continue;
365                         }
366
367                         if (!do_promote) {
368                                 inet_hash_remove(ifa);
369                                 *ifap1 = ifa->ifa_next;
370
371                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
372                                 blocking_notifier_call_chain(&inetaddr_chain,
373                                                 NETDEV_DOWN, ifa);
374                                 inet_free_ifa(ifa);
375                         } else {
376                                 promote = ifa;
377                                 break;
378                         }
379                 }
380         }
381
382         /* On promotion all secondaries from subnet are changing
383          * the primary IP, we must remove all their routes silently
384          * and later to add them back with new prefsrc. Do this
385          * while all addresses are on the device list.
386          */
387         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
388                 if (ifa1->ifa_mask == ifa->ifa_mask &&
389                     inet_ifa_match(ifa1->ifa_address, ifa))
390                         fib_del_ifaddr(ifa, ifa1);
391         }
392
393 no_promotions:
394         /* 2. Unlink it */
395
396         *ifap = ifa1->ifa_next;
397         inet_hash_remove(ifa1);
398
399         /* 3. Announce address deletion */
400
401         /* Send message first, then call notifier.
402            At first sight, FIB update triggered by notifier
403            will refer to already deleted ifaddr, that could confuse
404            netlink listeners. It is not true: look, gated sees
405            that route deleted and if it still thinks that ifaddr
406            is valid, it will try to restore deleted routes... Grr.
407            So that, this order is correct.
408          */
409         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
410         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
411
412         if (promote) {
413                 struct in_ifaddr *next_sec = promote->ifa_next;
414
415                 if (prev_prom) {
416                         prev_prom->ifa_next = promote->ifa_next;
417                         promote->ifa_next = last_prim->ifa_next;
418                         last_prim->ifa_next = promote;
419                 }
420
421                 promote->ifa_flags &= ~IFA_F_SECONDARY;
422                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
423                 blocking_notifier_call_chain(&inetaddr_chain,
424                                 NETDEV_UP, promote);
425                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
426                         if (ifa1->ifa_mask != ifa->ifa_mask ||
427                             !inet_ifa_match(ifa1->ifa_address, ifa))
428                                         continue;
429                         fib_add_ifaddr(ifa);
430                 }
431
432         }
433         if (destroy)
434                 inet_free_ifa(ifa1);
435 }
436
437 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
438                          int destroy)
439 {
440         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
441 }
442
443 static void check_lifetime(struct work_struct *work);
444
445 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
446
447 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
448                              u32 portid, struct netlink_ext_ack *extack)
449 {
450         struct in_device *in_dev = ifa->ifa_dev;
451         struct in_ifaddr *ifa1, **ifap, **last_primary;
452         struct in_validator_info ivi;
453         int ret;
454
455         ASSERT_RTNL();
456
457         if (!ifa->ifa_local) {
458                 inet_free_ifa(ifa);
459                 return 0;
460         }
461
462         ifa->ifa_flags &= ~IFA_F_SECONDARY;
463         last_primary = &in_dev->ifa_list;
464
465         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
466              ifap = &ifa1->ifa_next) {
467                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
468                     ifa->ifa_scope <= ifa1->ifa_scope)
469                         last_primary = &ifa1->ifa_next;
470                 if (ifa1->ifa_mask == ifa->ifa_mask &&
471                     inet_ifa_match(ifa1->ifa_address, ifa)) {
472                         if (ifa1->ifa_local == ifa->ifa_local) {
473                                 inet_free_ifa(ifa);
474                                 return -EEXIST;
475                         }
476                         if (ifa1->ifa_scope != ifa->ifa_scope) {
477                                 inet_free_ifa(ifa);
478                                 return -EINVAL;
479                         }
480                         ifa->ifa_flags |= IFA_F_SECONDARY;
481                 }
482         }
483
484         /* Allow any devices that wish to register ifaddr validtors to weigh
485          * in now, before changes are committed.  The rntl lock is serializing
486          * access here, so the state should not change between a validator call
487          * and a final notify on commit.  This isn't invoked on promotion under
488          * the assumption that validators are checking the address itself, and
489          * not the flags.
490          */
491         ivi.ivi_addr = ifa->ifa_address;
492         ivi.ivi_dev = ifa->ifa_dev;
493         ivi.extack = extack;
494         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
495                                            NETDEV_UP, &ivi);
496         ret = notifier_to_errno(ret);
497         if (ret) {
498                 inet_free_ifa(ifa);
499                 return ret;
500         }
501
502         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
503                 prandom_seed((__force u32) ifa->ifa_local);
504                 ifap = last_primary;
505         }
506
507         ifa->ifa_next = *ifap;
508         *ifap = ifa;
509
510         inet_hash_insert(dev_net(in_dev->dev), ifa);
511
512         cancel_delayed_work(&check_lifetime_work);
513         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
514
515         /* Send message first, then call notifier.
516            Notifier will trigger FIB update, so that
517            listeners of netlink will know about new ifaddr */
518         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
519         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
520
521         return 0;
522 }
523
524 static int inet_insert_ifa(struct in_ifaddr *ifa)
525 {
526         return __inet_insert_ifa(ifa, NULL, 0, NULL);
527 }
528
529 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
530 {
531         struct in_device *in_dev = __in_dev_get_rtnl(dev);
532
533         ASSERT_RTNL();
534
535         if (!in_dev) {
536                 inet_free_ifa(ifa);
537                 return -ENOBUFS;
538         }
539         ipv4_devconf_setall(in_dev);
540         neigh_parms_data_state_setall(in_dev->arp_parms);
541         if (ifa->ifa_dev != in_dev) {
542                 WARN_ON(ifa->ifa_dev);
543                 in_dev_hold(in_dev);
544                 ifa->ifa_dev = in_dev;
545         }
546         if (ipv4_is_loopback(ifa->ifa_local))
547                 ifa->ifa_scope = RT_SCOPE_HOST;
548         return inet_insert_ifa(ifa);
549 }
550
551 /* Caller must hold RCU or RTNL :
552  * We dont take a reference on found in_device
553  */
554 struct in_device *inetdev_by_index(struct net *net, int ifindex)
555 {
556         struct net_device *dev;
557         struct in_device *in_dev = NULL;
558
559         rcu_read_lock();
560         dev = dev_get_by_index_rcu(net, ifindex);
561         if (dev)
562                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
563         rcu_read_unlock();
564         return in_dev;
565 }
566 EXPORT_SYMBOL(inetdev_by_index);
567
568 /* Called only from RTNL semaphored context. No locks. */
569
570 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
571                                     __be32 mask)
572 {
573         ASSERT_RTNL();
574
575         for_primary_ifa(in_dev) {
576                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
577                         return ifa;
578         } endfor_ifa(in_dev);
579         return NULL;
580 }
581
582 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
583 {
584         struct ip_mreqn mreq = {
585                 .imr_multiaddr.s_addr = ifa->ifa_address,
586                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
587         };
588         int ret;
589
590         ASSERT_RTNL();
591
592         lock_sock(sk);
593         if (join)
594                 ret = ip_mc_join_group(sk, &mreq);
595         else
596                 ret = ip_mc_leave_group(sk, &mreq);
597         release_sock(sk);
598
599         return ret;
600 }
601
602 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
603                             struct netlink_ext_ack *extack)
604 {
605         struct net *net = sock_net(skb->sk);
606         struct nlattr *tb[IFA_MAX+1];
607         struct in_device *in_dev;
608         struct ifaddrmsg *ifm;
609         struct in_ifaddr *ifa, **ifap;
610         int err = -EINVAL;
611
612         ASSERT_RTNL();
613
614         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
615                           extack);
616         if (err < 0)
617                 goto errout;
618
619         ifm = nlmsg_data(nlh);
620         in_dev = inetdev_by_index(net, ifm->ifa_index);
621         if (!in_dev) {
622                 err = -ENODEV;
623                 goto errout;
624         }
625
626         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
627              ifap = &ifa->ifa_next) {
628                 if (tb[IFA_LOCAL] &&
629                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
630                         continue;
631
632                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
633                         continue;
634
635                 if (tb[IFA_ADDRESS] &&
636                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
637                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
638                         continue;
639
640                 if (ipv4_is_multicast(ifa->ifa_address))
641                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
642                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
643                 return 0;
644         }
645
646         err = -EADDRNOTAVAIL;
647 errout:
648         return err;
649 }
650
651 #define INFINITY_LIFE_TIME      0xFFFFFFFF
652
653 static void check_lifetime(struct work_struct *work)
654 {
655         unsigned long now, next, next_sec, next_sched;
656         struct in_ifaddr *ifa;
657         struct hlist_node *n;
658         int i;
659
660         now = jiffies;
661         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
662
663         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
664                 bool change_needed = false;
665
666                 rcu_read_lock();
667                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
668                         unsigned long age;
669
670                         if (ifa->ifa_flags & IFA_F_PERMANENT)
671                                 continue;
672
673                         /* We try to batch several events at once. */
674                         age = (now - ifa->ifa_tstamp +
675                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
676
677                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
678                             age >= ifa->ifa_valid_lft) {
679                                 change_needed = true;
680                         } else if (ifa->ifa_preferred_lft ==
681                                    INFINITY_LIFE_TIME) {
682                                 continue;
683                         } else if (age >= ifa->ifa_preferred_lft) {
684                                 if (time_before(ifa->ifa_tstamp +
685                                                 ifa->ifa_valid_lft * HZ, next))
686                                         next = ifa->ifa_tstamp +
687                                                ifa->ifa_valid_lft * HZ;
688
689                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
690                                         change_needed = true;
691                         } else if (time_before(ifa->ifa_tstamp +
692                                                ifa->ifa_preferred_lft * HZ,
693                                                next)) {
694                                 next = ifa->ifa_tstamp +
695                                        ifa->ifa_preferred_lft * HZ;
696                         }
697                 }
698                 rcu_read_unlock();
699                 if (!change_needed)
700                         continue;
701                 rtnl_lock();
702                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
703                         unsigned long age;
704
705                         if (ifa->ifa_flags & IFA_F_PERMANENT)
706                                 continue;
707
708                         /* We try to batch several events at once. */
709                         age = (now - ifa->ifa_tstamp +
710                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
711
712                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
713                             age >= ifa->ifa_valid_lft) {
714                                 struct in_ifaddr **ifap;
715
716                                 for (ifap = &ifa->ifa_dev->ifa_list;
717                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
718                                         if (*ifap == ifa) {
719                                                 inet_del_ifa(ifa->ifa_dev,
720                                                              ifap, 1);
721                                                 break;
722                                         }
723                                 }
724                         } else if (ifa->ifa_preferred_lft !=
725                                    INFINITY_LIFE_TIME &&
726                                    age >= ifa->ifa_preferred_lft &&
727                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
728                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
729                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
730                         }
731                 }
732                 rtnl_unlock();
733         }
734
735         next_sec = round_jiffies_up(next);
736         next_sched = next;
737
738         /* If rounded timeout is accurate enough, accept it. */
739         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
740                 next_sched = next_sec;
741
742         now = jiffies;
743         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
744         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
745                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
746
747         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
748                         next_sched - now);
749 }
750
751 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
752                              __u32 prefered_lft)
753 {
754         unsigned long timeout;
755
756         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
757
758         timeout = addrconf_timeout_fixup(valid_lft, HZ);
759         if (addrconf_finite_timeout(timeout))
760                 ifa->ifa_valid_lft = timeout;
761         else
762                 ifa->ifa_flags |= IFA_F_PERMANENT;
763
764         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
765         if (addrconf_finite_timeout(timeout)) {
766                 if (timeout == 0)
767                         ifa->ifa_flags |= IFA_F_DEPRECATED;
768                 ifa->ifa_preferred_lft = timeout;
769         }
770         ifa->ifa_tstamp = jiffies;
771         if (!ifa->ifa_cstamp)
772                 ifa->ifa_cstamp = ifa->ifa_tstamp;
773 }
774
775 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
776                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
777 {
778         struct nlattr *tb[IFA_MAX+1];
779         struct in_ifaddr *ifa;
780         struct ifaddrmsg *ifm;
781         struct net_device *dev;
782         struct in_device *in_dev;
783         int err;
784
785         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
786                           NULL);
787         if (err < 0)
788                 goto errout;
789
790         ifm = nlmsg_data(nlh);
791         err = -EINVAL;
792         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
793                 goto errout;
794
795         dev = __dev_get_by_index(net, ifm->ifa_index);
796         err = -ENODEV;
797         if (!dev)
798                 goto errout;
799
800         in_dev = __in_dev_get_rtnl(dev);
801         err = -ENOBUFS;
802         if (!in_dev)
803                 goto errout;
804
805         ifa = inet_alloc_ifa();
806         if (!ifa)
807                 /*
808                  * A potential indev allocation can be left alive, it stays
809                  * assigned to its device and is destroy with it.
810                  */
811                 goto errout;
812
813         ipv4_devconf_setall(in_dev);
814         neigh_parms_data_state_setall(in_dev->arp_parms);
815         in_dev_hold(in_dev);
816
817         if (!tb[IFA_ADDRESS])
818                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
819
820         INIT_HLIST_NODE(&ifa->hash);
821         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
822         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
823         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
824                                          ifm->ifa_flags;
825         ifa->ifa_scope = ifm->ifa_scope;
826         ifa->ifa_dev = in_dev;
827
828         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
829         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
830
831         if (tb[IFA_BROADCAST])
832                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
833
834         if (tb[IFA_LABEL])
835                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
836         else
837                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
838
839         if (tb[IFA_RT_PRIORITY])
840                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
841
842         if (tb[IFA_CACHEINFO]) {
843                 struct ifa_cacheinfo *ci;
844
845                 ci = nla_data(tb[IFA_CACHEINFO]);
846                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
847                         err = -EINVAL;
848                         goto errout_free;
849                 }
850                 *pvalid_lft = ci->ifa_valid;
851                 *pprefered_lft = ci->ifa_prefered;
852         }
853
854         return ifa;
855
856 errout_free:
857         inet_free_ifa(ifa);
858 errout:
859         return ERR_PTR(err);
860 }
861
862 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
863 {
864         struct in_device *in_dev = ifa->ifa_dev;
865         struct in_ifaddr *ifa1, **ifap;
866
867         if (!ifa->ifa_local)
868                 return NULL;
869
870         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
871              ifap = &ifa1->ifa_next) {
872                 if (ifa1->ifa_mask == ifa->ifa_mask &&
873                     inet_ifa_match(ifa1->ifa_address, ifa) &&
874                     ifa1->ifa_local == ifa->ifa_local)
875                         return ifa1;
876         }
877         return NULL;
878 }
879
880 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
881                             struct netlink_ext_ack *extack)
882 {
883         struct net *net = sock_net(skb->sk);
884         struct in_ifaddr *ifa;
885         struct in_ifaddr *ifa_existing;
886         __u32 valid_lft = INFINITY_LIFE_TIME;
887         __u32 prefered_lft = INFINITY_LIFE_TIME;
888
889         ASSERT_RTNL();
890
891         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
892         if (IS_ERR(ifa))
893                 return PTR_ERR(ifa);
894
895         ifa_existing = find_matching_ifa(ifa);
896         if (!ifa_existing) {
897                 /* It would be best to check for !NLM_F_CREATE here but
898                  * userspace already relies on not having to provide this.
899                  */
900                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
901                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
902                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
903                                                true, ifa);
904
905                         if (ret < 0) {
906                                 inet_free_ifa(ifa);
907                                 return ret;
908                         }
909                 }
910                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
911                                          extack);
912         } else {
913                 u32 new_metric = ifa->ifa_rt_priority;
914
915                 inet_free_ifa(ifa);
916
917                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
918                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
919                         return -EEXIST;
920                 ifa = ifa_existing;
921
922                 if (ifa->ifa_rt_priority != new_metric) {
923                         fib_modify_prefix_metric(ifa, new_metric);
924                         ifa->ifa_rt_priority = new_metric;
925                 }
926
927                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
928                 cancel_delayed_work(&check_lifetime_work);
929                 queue_delayed_work(system_power_efficient_wq,
930                                 &check_lifetime_work, 0);
931                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
932         }
933         return 0;
934 }
935
936 /*
937  *      Determine a default network mask, based on the IP address.
938  */
939
940 static int inet_abc_len(__be32 addr)
941 {
942         int rc = -1;    /* Something else, probably a multicast. */
943
944         if (ipv4_is_zeronet(addr))
945                 rc = 0;
946         else {
947                 __u32 haddr = ntohl(addr);
948
949                 if (IN_CLASSA(haddr))
950                         rc = 8;
951                 else if (IN_CLASSB(haddr))
952                         rc = 16;
953                 else if (IN_CLASSC(haddr))
954                         rc = 24;
955         }
956
957         return rc;
958 }
959
960
961 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
962 {
963         struct sockaddr_in sin_orig;
964         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
965         struct in_device *in_dev;
966         struct in_ifaddr **ifap = NULL;
967         struct in_ifaddr *ifa = NULL;
968         struct net_device *dev;
969         char *colon;
970         int ret = -EFAULT;
971         int tryaddrmatch = 0;
972
973         ifr->ifr_name[IFNAMSIZ - 1] = 0;
974
975         /* save original address for comparison */
976         memcpy(&sin_orig, sin, sizeof(*sin));
977
978         colon = strchr(ifr->ifr_name, ':');
979         if (colon)
980                 *colon = 0;
981
982         dev_load(net, ifr->ifr_name);
983
984         switch (cmd) {
985         case SIOCGIFADDR:       /* Get interface address */
986         case SIOCGIFBRDADDR:    /* Get the broadcast address */
987         case SIOCGIFDSTADDR:    /* Get the destination address */
988         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
989                 /* Note that these ioctls will not sleep,
990                    so that we do not impose a lock.
991                    One day we will be forced to put shlock here (I mean SMP)
992                  */
993                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
994                 memset(sin, 0, sizeof(*sin));
995                 sin->sin_family = AF_INET;
996                 break;
997
998         case SIOCSIFFLAGS:
999                 ret = -EPERM;
1000                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1001                         goto out;
1002                 break;
1003         case SIOCSIFADDR:       /* Set interface address (and family) */
1004         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1005         case SIOCSIFDSTADDR:    /* Set the destination address */
1006         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1007                 ret = -EPERM;
1008                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1009                         goto out;
1010                 ret = -EINVAL;
1011                 if (sin->sin_family != AF_INET)
1012                         goto out;
1013                 break;
1014         default:
1015                 ret = -EINVAL;
1016                 goto out;
1017         }
1018
1019         rtnl_lock();
1020
1021         ret = -ENODEV;
1022         dev = __dev_get_by_name(net, ifr->ifr_name);
1023         if (!dev)
1024                 goto done;
1025
1026         if (colon)
1027                 *colon = ':';
1028
1029         in_dev = __in_dev_get_rtnl(dev);
1030         if (in_dev) {
1031                 if (tryaddrmatch) {
1032                         /* Matthias Andree */
1033                         /* compare label and address (4.4BSD style) */
1034                         /* note: we only do this for a limited set of ioctls
1035                            and only if the original address family was AF_INET.
1036                            This is checked above. */
1037                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1038                              ifap = &ifa->ifa_next) {
1039                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1040                                     sin_orig.sin_addr.s_addr ==
1041                                                         ifa->ifa_local) {
1042                                         break; /* found */
1043                                 }
1044                         }
1045                 }
1046                 /* we didn't get a match, maybe the application is
1047                    4.3BSD-style and passed in junk so we fall back to
1048                    comparing just the label */
1049                 if (!ifa) {
1050                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1051                              ifap = &ifa->ifa_next)
1052                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1053                                         break;
1054                 }
1055         }
1056
1057         ret = -EADDRNOTAVAIL;
1058         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1059                 goto done;
1060
1061         switch (cmd) {
1062         case SIOCGIFADDR:       /* Get interface address */
1063                 ret = 0;
1064                 sin->sin_addr.s_addr = ifa->ifa_local;
1065                 break;
1066
1067         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1068                 ret = 0;
1069                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1070                 break;
1071
1072         case SIOCGIFDSTADDR:    /* Get the destination address */
1073                 ret = 0;
1074                 sin->sin_addr.s_addr = ifa->ifa_address;
1075                 break;
1076
1077         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1078                 ret = 0;
1079                 sin->sin_addr.s_addr = ifa->ifa_mask;
1080                 break;
1081
1082         case SIOCSIFFLAGS:
1083                 if (colon) {
1084                         ret = -EADDRNOTAVAIL;
1085                         if (!ifa)
1086                                 break;
1087                         ret = 0;
1088                         if (!(ifr->ifr_flags & IFF_UP))
1089                                 inet_del_ifa(in_dev, ifap, 1);
1090                         break;
1091                 }
1092                 ret = dev_change_flags(dev, ifr->ifr_flags);
1093                 break;
1094
1095         case SIOCSIFADDR:       /* Set interface address (and family) */
1096                 ret = -EINVAL;
1097                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1098                         break;
1099
1100                 if (!ifa) {
1101                         ret = -ENOBUFS;
1102                         ifa = inet_alloc_ifa();
1103                         if (!ifa)
1104                                 break;
1105                         INIT_HLIST_NODE(&ifa->hash);
1106                         if (colon)
1107                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1108                         else
1109                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1110                 } else {
1111                         ret = 0;
1112                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1113                                 break;
1114                         inet_del_ifa(in_dev, ifap, 0);
1115                         ifa->ifa_broadcast = 0;
1116                         ifa->ifa_scope = 0;
1117                 }
1118
1119                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1120
1121                 if (!(dev->flags & IFF_POINTOPOINT)) {
1122                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1123                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1124                         if ((dev->flags & IFF_BROADCAST) &&
1125                             ifa->ifa_prefixlen < 31)
1126                                 ifa->ifa_broadcast = ifa->ifa_address |
1127                                                      ~ifa->ifa_mask;
1128                 } else {
1129                         ifa->ifa_prefixlen = 32;
1130                         ifa->ifa_mask = inet_make_mask(32);
1131                 }
1132                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1133                 ret = inet_set_ifa(dev, ifa);
1134                 break;
1135
1136         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1137                 ret = 0;
1138                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1139                         inet_del_ifa(in_dev, ifap, 0);
1140                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1141                         inet_insert_ifa(ifa);
1142                 }
1143                 break;
1144
1145         case SIOCSIFDSTADDR:    /* Set the destination address */
1146                 ret = 0;
1147                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1148                         break;
1149                 ret = -EINVAL;
1150                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1151                         break;
1152                 ret = 0;
1153                 inet_del_ifa(in_dev, ifap, 0);
1154                 ifa->ifa_address = sin->sin_addr.s_addr;
1155                 inet_insert_ifa(ifa);
1156                 break;
1157
1158         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1159
1160                 /*
1161                  *      The mask we set must be legal.
1162                  */
1163                 ret = -EINVAL;
1164                 if (bad_mask(sin->sin_addr.s_addr, 0))
1165                         break;
1166                 ret = 0;
1167                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1168                         __be32 old_mask = ifa->ifa_mask;
1169                         inet_del_ifa(in_dev, ifap, 0);
1170                         ifa->ifa_mask = sin->sin_addr.s_addr;
1171                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1172
1173                         /* See if current broadcast address matches
1174                          * with current netmask, then recalculate
1175                          * the broadcast address. Otherwise it's a
1176                          * funny address, so don't touch it since
1177                          * the user seems to know what (s)he's doing...
1178                          */
1179                         if ((dev->flags & IFF_BROADCAST) &&
1180                             (ifa->ifa_prefixlen < 31) &&
1181                             (ifa->ifa_broadcast ==
1182                              (ifa->ifa_local|~old_mask))) {
1183                                 ifa->ifa_broadcast = (ifa->ifa_local |
1184                                                       ~sin->sin_addr.s_addr);
1185                         }
1186                         inet_insert_ifa(ifa);
1187                 }
1188                 break;
1189         }
1190 done:
1191         rtnl_unlock();
1192 out:
1193         return ret;
1194 }
1195
1196 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1197 {
1198         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1199         struct in_ifaddr *ifa;
1200         struct ifreq ifr;
1201         int done = 0;
1202
1203         if (WARN_ON(size > sizeof(struct ifreq)))
1204                 goto out;
1205
1206         if (!in_dev)
1207                 goto out;
1208
1209         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1210                 if (!buf) {
1211                         done += size;
1212                         continue;
1213                 }
1214                 if (len < size)
1215                         break;
1216                 memset(&ifr, 0, sizeof(struct ifreq));
1217                 strcpy(ifr.ifr_name, ifa->ifa_label);
1218
1219                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1220                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1221                                                                 ifa->ifa_local;
1222
1223                 if (copy_to_user(buf + done, &ifr, size)) {
1224                         done = -EFAULT;
1225                         break;
1226                 }
1227                 len  -= size;
1228                 done += size;
1229         }
1230 out:
1231         return done;
1232 }
1233
1234 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1235                                  int scope)
1236 {
1237         for_primary_ifa(in_dev) {
1238                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1239                     ifa->ifa_scope <= scope)
1240                         return ifa->ifa_local;
1241         } endfor_ifa(in_dev);
1242
1243         return 0;
1244 }
1245
1246 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1247 {
1248         __be32 addr = 0;
1249         struct in_device *in_dev;
1250         struct net *net = dev_net(dev);
1251         int master_idx;
1252
1253         rcu_read_lock();
1254         in_dev = __in_dev_get_rcu(dev);
1255         if (!in_dev)
1256                 goto no_in_dev;
1257
1258         for_primary_ifa(in_dev) {
1259                 if (ifa->ifa_scope > scope)
1260                         continue;
1261                 if (!dst || inet_ifa_match(dst, ifa)) {
1262                         addr = ifa->ifa_local;
1263                         break;
1264                 }
1265                 if (!addr)
1266                         addr = ifa->ifa_local;
1267         } endfor_ifa(in_dev);
1268
1269         if (addr)
1270                 goto out_unlock;
1271 no_in_dev:
1272         master_idx = l3mdev_master_ifindex_rcu(dev);
1273
1274         /* For VRFs, the VRF device takes the place of the loopback device,
1275          * with addresses on it being preferred.  Note in such cases the
1276          * loopback device will be among the devices that fail the master_idx
1277          * equality check in the loop below.
1278          */
1279         if (master_idx &&
1280             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1281             (in_dev = __in_dev_get_rcu(dev))) {
1282                 addr = in_dev_select_addr(in_dev, scope);
1283                 if (addr)
1284                         goto out_unlock;
1285         }
1286
1287         /* Not loopback addresses on loopback should be preferred
1288            in this case. It is important that lo is the first interface
1289            in dev_base list.
1290          */
1291         for_each_netdev_rcu(net, dev) {
1292                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1293                         continue;
1294
1295                 in_dev = __in_dev_get_rcu(dev);
1296                 if (!in_dev)
1297                         continue;
1298
1299                 addr = in_dev_select_addr(in_dev, scope);
1300                 if (addr)
1301                         goto out_unlock;
1302         }
1303 out_unlock:
1304         rcu_read_unlock();
1305         return addr;
1306 }
1307 EXPORT_SYMBOL(inet_select_addr);
1308
1309 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1310                               __be32 local, int scope)
1311 {
1312         int same = 0;
1313         __be32 addr = 0;
1314
1315         for_ifa(in_dev) {
1316                 if (!addr &&
1317                     (local == ifa->ifa_local || !local) &&
1318                     ifa->ifa_scope <= scope) {
1319                         addr = ifa->ifa_local;
1320                         if (same)
1321                                 break;
1322                 }
1323                 if (!same) {
1324                         same = (!local || inet_ifa_match(local, ifa)) &&
1325                                 (!dst || inet_ifa_match(dst, ifa));
1326                         if (same && addr) {
1327                                 if (local || !dst)
1328                                         break;
1329                                 /* Is the selected addr into dst subnet? */
1330                                 if (inet_ifa_match(addr, ifa))
1331                                         break;
1332                                 /* No, then can we use new local src? */
1333                                 if (ifa->ifa_scope <= scope) {
1334                                         addr = ifa->ifa_local;
1335                                         break;
1336                                 }
1337                                 /* search for large dst subnet for addr */
1338                                 same = 0;
1339                         }
1340                 }
1341         } endfor_ifa(in_dev);
1342
1343         return same ? addr : 0;
1344 }
1345
1346 /*
1347  * Confirm that local IP address exists using wildcards:
1348  * - net: netns to check, cannot be NULL
1349  * - in_dev: only on this interface, NULL=any interface
1350  * - dst: only in the same subnet as dst, 0=any dst
1351  * - local: address, 0=autoselect the local address
1352  * - scope: maximum allowed scope value for the local address
1353  */
1354 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1355                          __be32 dst, __be32 local, int scope)
1356 {
1357         __be32 addr = 0;
1358         struct net_device *dev;
1359
1360         if (in_dev)
1361                 return confirm_addr_indev(in_dev, dst, local, scope);
1362
1363         rcu_read_lock();
1364         for_each_netdev_rcu(net, dev) {
1365                 in_dev = __in_dev_get_rcu(dev);
1366                 if (in_dev) {
1367                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1368                         if (addr)
1369                                 break;
1370                 }
1371         }
1372         rcu_read_unlock();
1373
1374         return addr;
1375 }
1376 EXPORT_SYMBOL(inet_confirm_addr);
1377
1378 /*
1379  *      Device notifier
1380  */
1381
1382 int register_inetaddr_notifier(struct notifier_block *nb)
1383 {
1384         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1385 }
1386 EXPORT_SYMBOL(register_inetaddr_notifier);
1387
1388 int unregister_inetaddr_notifier(struct notifier_block *nb)
1389 {
1390         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1391 }
1392 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1393
1394 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1395 {
1396         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1397 }
1398 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1399
1400 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1401 {
1402         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1403             nb);
1404 }
1405 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1406
1407 /* Rename ifa_labels for a device name change. Make some effort to preserve
1408  * existing alias numbering and to create unique labels if possible.
1409 */
1410 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1411 {
1412         struct in_ifaddr *ifa;
1413         int named = 0;
1414
1415         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1416                 char old[IFNAMSIZ], *dot;
1417
1418                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1419                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1420                 if (named++ == 0)
1421                         goto skip;
1422                 dot = strchr(old, ':');
1423                 if (!dot) {
1424                         sprintf(old, ":%d", named);
1425                         dot = old;
1426                 }
1427                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1428                         strcat(ifa->ifa_label, dot);
1429                 else
1430                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1431 skip:
1432                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1433         }
1434 }
1435
1436 static bool inetdev_valid_mtu(unsigned int mtu)
1437 {
1438         return mtu >= IPV4_MIN_MTU;
1439 }
1440
1441 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1442                                         struct in_device *in_dev)
1443
1444 {
1445         struct in_ifaddr *ifa;
1446
1447         for (ifa = in_dev->ifa_list; ifa;
1448              ifa = ifa->ifa_next) {
1449                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1450                          ifa->ifa_local, dev,
1451                          ifa->ifa_local, NULL,
1452                          dev->dev_addr, NULL);
1453         }
1454 }
1455
1456 /* Called only under RTNL semaphore */
1457
1458 static int inetdev_event(struct notifier_block *this, unsigned long event,
1459                          void *ptr)
1460 {
1461         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1462         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1463
1464         ASSERT_RTNL();
1465
1466         if (!in_dev) {
1467                 if (event == NETDEV_REGISTER) {
1468                         in_dev = inetdev_init(dev);
1469                         if (IS_ERR(in_dev))
1470                                 return notifier_from_errno(PTR_ERR(in_dev));
1471                         if (dev->flags & IFF_LOOPBACK) {
1472                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1473                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1474                         }
1475                 } else if (event == NETDEV_CHANGEMTU) {
1476                         /* Re-enabling IP */
1477                         if (inetdev_valid_mtu(dev->mtu))
1478                                 in_dev = inetdev_init(dev);
1479                 }
1480                 goto out;
1481         }
1482
1483         switch (event) {
1484         case NETDEV_REGISTER:
1485                 pr_debug("%s: bug\n", __func__);
1486                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1487                 break;
1488         case NETDEV_UP:
1489                 if (!inetdev_valid_mtu(dev->mtu))
1490                         break;
1491                 if (dev->flags & IFF_LOOPBACK) {
1492                         struct in_ifaddr *ifa = inet_alloc_ifa();
1493
1494                         if (ifa) {
1495                                 INIT_HLIST_NODE(&ifa->hash);
1496                                 ifa->ifa_local =
1497                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1498                                 ifa->ifa_prefixlen = 8;
1499                                 ifa->ifa_mask = inet_make_mask(8);
1500                                 in_dev_hold(in_dev);
1501                                 ifa->ifa_dev = in_dev;
1502                                 ifa->ifa_scope = RT_SCOPE_HOST;
1503                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1504                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1505                                                  INFINITY_LIFE_TIME);
1506                                 ipv4_devconf_setall(in_dev);
1507                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1508                                 inet_insert_ifa(ifa);
1509                         }
1510                 }
1511                 ip_mc_up(in_dev);
1512                 /* fall through */
1513         case NETDEV_CHANGEADDR:
1514                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1515                         break;
1516                 /* fall through */
1517         case NETDEV_NOTIFY_PEERS:
1518                 /* Send gratuitous ARP to notify of link change */
1519                 inetdev_send_gratuitous_arp(dev, in_dev);
1520                 break;
1521         case NETDEV_DOWN:
1522                 ip_mc_down(in_dev);
1523                 break;
1524         case NETDEV_PRE_TYPE_CHANGE:
1525                 ip_mc_unmap(in_dev);
1526                 break;
1527         case NETDEV_POST_TYPE_CHANGE:
1528                 ip_mc_remap(in_dev);
1529                 break;
1530         case NETDEV_CHANGEMTU:
1531                 if (inetdev_valid_mtu(dev->mtu))
1532                         break;
1533                 /* disable IP when MTU is not enough */
1534                 /* fall through */
1535         case NETDEV_UNREGISTER:
1536                 inetdev_destroy(in_dev);
1537                 break;
1538         case NETDEV_CHANGENAME:
1539                 /* Do not notify about label change, this event is
1540                  * not interesting to applications using netlink.
1541                  */
1542                 inetdev_changename(dev, in_dev);
1543
1544                 devinet_sysctl_unregister(in_dev);
1545                 devinet_sysctl_register(in_dev);
1546                 break;
1547         }
1548 out:
1549         return NOTIFY_DONE;
1550 }
1551
1552 static struct notifier_block ip_netdev_notifier = {
1553         .notifier_call = inetdev_event,
1554 };
1555
1556 static size_t inet_nlmsg_size(void)
1557 {
1558         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1559                + nla_total_size(4) /* IFA_ADDRESS */
1560                + nla_total_size(4) /* IFA_LOCAL */
1561                + nla_total_size(4) /* IFA_BROADCAST */
1562                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1563                + nla_total_size(4)  /* IFA_FLAGS */
1564                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1565                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1566 }
1567
1568 static inline u32 cstamp_delta(unsigned long cstamp)
1569 {
1570         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1571 }
1572
1573 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1574                          unsigned long tstamp, u32 preferred, u32 valid)
1575 {
1576         struct ifa_cacheinfo ci;
1577
1578         ci.cstamp = cstamp_delta(cstamp);
1579         ci.tstamp = cstamp_delta(tstamp);
1580         ci.ifa_prefered = preferred;
1581         ci.ifa_valid = valid;
1582
1583         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1584 }
1585
1586 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1587                             u32 portid, u32 seq, int event, unsigned int flags)
1588 {
1589         struct ifaddrmsg *ifm;
1590         struct nlmsghdr  *nlh;
1591         u32 preferred, valid;
1592
1593         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1594         if (!nlh)
1595                 return -EMSGSIZE;
1596
1597         ifm = nlmsg_data(nlh);
1598         ifm->ifa_family = AF_INET;
1599         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1600         ifm->ifa_flags = ifa->ifa_flags;
1601         ifm->ifa_scope = ifa->ifa_scope;
1602         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1603
1604         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1605                 preferred = ifa->ifa_preferred_lft;
1606                 valid = ifa->ifa_valid_lft;
1607                 if (preferred != INFINITY_LIFE_TIME) {
1608                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1609
1610                         if (preferred > tval)
1611                                 preferred -= tval;
1612                         else
1613                                 preferred = 0;
1614                         if (valid != INFINITY_LIFE_TIME) {
1615                                 if (valid > tval)
1616                                         valid -= tval;
1617                                 else
1618                                         valid = 0;
1619                         }
1620                 }
1621         } else {
1622                 preferred = INFINITY_LIFE_TIME;
1623                 valid = INFINITY_LIFE_TIME;
1624         }
1625         if ((ifa->ifa_address &&
1626              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1627             (ifa->ifa_local &&
1628              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1629             (ifa->ifa_broadcast &&
1630              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1631             (ifa->ifa_label[0] &&
1632              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1633             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1634             (ifa->ifa_rt_priority &&
1635              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1636             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1637                           preferred, valid))
1638                 goto nla_put_failure;
1639
1640         nlmsg_end(skb, nlh);
1641         return 0;
1642
1643 nla_put_failure:
1644         nlmsg_cancel(skb, nlh);
1645         return -EMSGSIZE;
1646 }
1647
1648 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1649 {
1650         struct net *net = sock_net(skb->sk);
1651         int h, s_h;
1652         int idx, s_idx;
1653         int ip_idx, s_ip_idx;
1654         struct net_device *dev;
1655         struct in_device *in_dev;
1656         struct in_ifaddr *ifa;
1657         struct hlist_head *head;
1658
1659         s_h = cb->args[0];
1660         s_idx = idx = cb->args[1];
1661         s_ip_idx = ip_idx = cb->args[2];
1662
1663         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1664                 idx = 0;
1665                 head = &net->dev_index_head[h];
1666                 rcu_read_lock();
1667                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1668                           net->dev_base_seq;
1669                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1670                         if (idx < s_idx)
1671                                 goto cont;
1672                         if (h > s_h || idx > s_idx)
1673                                 s_ip_idx = 0;
1674                         in_dev = __in_dev_get_rcu(dev);
1675                         if (!in_dev)
1676                                 goto cont;
1677
1678                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1679                              ifa = ifa->ifa_next, ip_idx++) {
1680                                 if (ip_idx < s_ip_idx)
1681                                         continue;
1682                                 if (inet_fill_ifaddr(skb, ifa,
1683                                              NETLINK_CB(cb->skb).portid,
1684                                              cb->nlh->nlmsg_seq,
1685                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1686                                         rcu_read_unlock();
1687                                         goto done;
1688                                 }
1689                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1690                         }
1691 cont:
1692                         idx++;
1693                 }
1694                 rcu_read_unlock();
1695         }
1696
1697 done:
1698         cb->args[0] = h;
1699         cb->args[1] = idx;
1700         cb->args[2] = ip_idx;
1701
1702         return skb->len;
1703 }
1704
1705 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1706                       u32 portid)
1707 {
1708         struct sk_buff *skb;
1709         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1710         int err = -ENOBUFS;
1711         struct net *net;
1712
1713         net = dev_net(ifa->ifa_dev->dev);
1714         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1715         if (!skb)
1716                 goto errout;
1717
1718         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1719         if (err < 0) {
1720                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1721                 WARN_ON(err == -EMSGSIZE);
1722                 kfree_skb(skb);
1723                 goto errout;
1724         }
1725         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1726         return;
1727 errout:
1728         if (err < 0)
1729                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1730 }
1731
1732 static size_t inet_get_link_af_size(const struct net_device *dev,
1733                                     u32 ext_filter_mask)
1734 {
1735         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1736
1737         if (!in_dev)
1738                 return 0;
1739
1740         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1741 }
1742
1743 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1744                              u32 ext_filter_mask)
1745 {
1746         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1747         struct nlattr *nla;
1748         int i;
1749
1750         if (!in_dev)
1751                 return -ENODATA;
1752
1753         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1754         if (!nla)
1755                 return -EMSGSIZE;
1756
1757         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1758                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1759
1760         return 0;
1761 }
1762
1763 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1764         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1765 };
1766
1767 static int inet_validate_link_af(const struct net_device *dev,
1768                                  const struct nlattr *nla)
1769 {
1770         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1771         int err, rem;
1772
1773         if (dev && !__in_dev_get_rcu(dev))
1774                 return -EAFNOSUPPORT;
1775
1776         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1777         if (err < 0)
1778                 return err;
1779
1780         if (tb[IFLA_INET_CONF]) {
1781                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1782                         int cfgid = nla_type(a);
1783
1784                         if (nla_len(a) < 4)
1785                                 return -EINVAL;
1786
1787                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1788                                 return -EINVAL;
1789                 }
1790         }
1791
1792         return 0;
1793 }
1794
1795 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1796 {
1797         struct in_device *in_dev = __in_dev_get_rcu(dev);
1798         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1799         int rem;
1800
1801         if (!in_dev)
1802                 return -EAFNOSUPPORT;
1803
1804         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1805                 BUG();
1806
1807         if (tb[IFLA_INET_CONF]) {
1808                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1809                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1810         }
1811
1812         return 0;
1813 }
1814
1815 static int inet_netconf_msgsize_devconf(int type)
1816 {
1817         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1818                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1819         bool all = false;
1820
1821         if (type == NETCONFA_ALL)
1822                 all = true;
1823
1824         if (all || type == NETCONFA_FORWARDING)
1825                 size += nla_total_size(4);
1826         if (all || type == NETCONFA_RP_FILTER)
1827                 size += nla_total_size(4);
1828         if (all || type == NETCONFA_MC_FORWARDING)
1829                 size += nla_total_size(4);
1830         if (all || type == NETCONFA_BC_FORWARDING)
1831                 size += nla_total_size(4);
1832         if (all || type == NETCONFA_PROXY_NEIGH)
1833                 size += nla_total_size(4);
1834         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1835                 size += nla_total_size(4);
1836
1837         return size;
1838 }
1839
1840 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1841                                      struct ipv4_devconf *devconf, u32 portid,
1842                                      u32 seq, int event, unsigned int flags,
1843                                      int type)
1844 {
1845         struct nlmsghdr  *nlh;
1846         struct netconfmsg *ncm;
1847         bool all = false;
1848
1849         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1850                         flags);
1851         if (!nlh)
1852                 return -EMSGSIZE;
1853
1854         if (type == NETCONFA_ALL)
1855                 all = true;
1856
1857         ncm = nlmsg_data(nlh);
1858         ncm->ncm_family = AF_INET;
1859
1860         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1861                 goto nla_put_failure;
1862
1863         if (!devconf)
1864                 goto out;
1865
1866         if ((all || type == NETCONFA_FORWARDING) &&
1867             nla_put_s32(skb, NETCONFA_FORWARDING,
1868                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1869                 goto nla_put_failure;
1870         if ((all || type == NETCONFA_RP_FILTER) &&
1871             nla_put_s32(skb, NETCONFA_RP_FILTER,
1872                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1873                 goto nla_put_failure;
1874         if ((all || type == NETCONFA_MC_FORWARDING) &&
1875             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1876                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1877                 goto nla_put_failure;
1878         if ((all || type == NETCONFA_BC_FORWARDING) &&
1879             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
1880                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
1881                 goto nla_put_failure;
1882         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1883             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1884                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1885                 goto nla_put_failure;
1886         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1887             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1888                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1889                 goto nla_put_failure;
1890
1891 out:
1892         nlmsg_end(skb, nlh);
1893         return 0;
1894
1895 nla_put_failure:
1896         nlmsg_cancel(skb, nlh);
1897         return -EMSGSIZE;
1898 }
1899
1900 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1901                                  int ifindex, struct ipv4_devconf *devconf)
1902 {
1903         struct sk_buff *skb;
1904         int err = -ENOBUFS;
1905
1906         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1907         if (!skb)
1908                 goto errout;
1909
1910         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1911                                         event, 0, type);
1912         if (err < 0) {
1913                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1914                 WARN_ON(err == -EMSGSIZE);
1915                 kfree_skb(skb);
1916                 goto errout;
1917         }
1918         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1919         return;
1920 errout:
1921         if (err < 0)
1922                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1923 }
1924
1925 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1926         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1927         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1928         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1929         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1930         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1931 };
1932
1933 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1934                                     struct nlmsghdr *nlh,
1935                                     struct netlink_ext_ack *extack)
1936 {
1937         struct net *net = sock_net(in_skb->sk);
1938         struct nlattr *tb[NETCONFA_MAX+1];
1939         struct netconfmsg *ncm;
1940         struct sk_buff *skb;
1941         struct ipv4_devconf *devconf;
1942         struct in_device *in_dev;
1943         struct net_device *dev;
1944         int ifindex;
1945         int err;
1946
1947         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1948                           devconf_ipv4_policy, extack);
1949         if (err < 0)
1950                 goto errout;
1951
1952         err = -EINVAL;
1953         if (!tb[NETCONFA_IFINDEX])
1954                 goto errout;
1955
1956         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1957         switch (ifindex) {
1958         case NETCONFA_IFINDEX_ALL:
1959                 devconf = net->ipv4.devconf_all;
1960                 break;
1961         case NETCONFA_IFINDEX_DEFAULT:
1962                 devconf = net->ipv4.devconf_dflt;
1963                 break;
1964         default:
1965                 dev = __dev_get_by_index(net, ifindex);
1966                 if (!dev)
1967                         goto errout;
1968                 in_dev = __in_dev_get_rtnl(dev);
1969                 if (!in_dev)
1970                         goto errout;
1971                 devconf = &in_dev->cnf;
1972                 break;
1973         }
1974
1975         err = -ENOBUFS;
1976         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1977         if (!skb)
1978                 goto errout;
1979
1980         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1981                                         NETLINK_CB(in_skb).portid,
1982                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1983                                         NETCONFA_ALL);
1984         if (err < 0) {
1985                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1986                 WARN_ON(err == -EMSGSIZE);
1987                 kfree_skb(skb);
1988                 goto errout;
1989         }
1990         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1991 errout:
1992         return err;
1993 }
1994
1995 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1996                                      struct netlink_callback *cb)
1997 {
1998         struct net *net = sock_net(skb->sk);
1999         int h, s_h;
2000         int idx, s_idx;
2001         struct net_device *dev;
2002         struct in_device *in_dev;
2003         struct hlist_head *head;
2004
2005         s_h = cb->args[0];
2006         s_idx = idx = cb->args[1];
2007
2008         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2009                 idx = 0;
2010                 head = &net->dev_index_head[h];
2011                 rcu_read_lock();
2012                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2013                           net->dev_base_seq;
2014                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2015                         if (idx < s_idx)
2016                                 goto cont;
2017                         in_dev = __in_dev_get_rcu(dev);
2018                         if (!in_dev)
2019                                 goto cont;
2020
2021                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2022                                                       &in_dev->cnf,
2023                                                       NETLINK_CB(cb->skb).portid,
2024                                                       cb->nlh->nlmsg_seq,
2025                                                       RTM_NEWNETCONF,
2026                                                       NLM_F_MULTI,
2027                                                       NETCONFA_ALL) < 0) {
2028                                 rcu_read_unlock();
2029                                 goto done;
2030                         }
2031                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2032 cont:
2033                         idx++;
2034                 }
2035                 rcu_read_unlock();
2036         }
2037         if (h == NETDEV_HASHENTRIES) {
2038                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2039                                               net->ipv4.devconf_all,
2040                                               NETLINK_CB(cb->skb).portid,
2041                                               cb->nlh->nlmsg_seq,
2042                                               RTM_NEWNETCONF, NLM_F_MULTI,
2043                                               NETCONFA_ALL) < 0)
2044                         goto done;
2045                 else
2046                         h++;
2047         }
2048         if (h == NETDEV_HASHENTRIES + 1) {
2049                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2050                                               net->ipv4.devconf_dflt,
2051                                               NETLINK_CB(cb->skb).portid,
2052                                               cb->nlh->nlmsg_seq,
2053                                               RTM_NEWNETCONF, NLM_F_MULTI,
2054                                               NETCONFA_ALL) < 0)
2055                         goto done;
2056                 else
2057                         h++;
2058         }
2059 done:
2060         cb->args[0] = h;
2061         cb->args[1] = idx;
2062
2063         return skb->len;
2064 }
2065
2066 #ifdef CONFIG_SYSCTL
2067
2068 static void devinet_copy_dflt_conf(struct net *net, int i)
2069 {
2070         struct net_device *dev;
2071
2072         rcu_read_lock();
2073         for_each_netdev_rcu(net, dev) {
2074                 struct in_device *in_dev;
2075
2076                 in_dev = __in_dev_get_rcu(dev);
2077                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2078                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2079         }
2080         rcu_read_unlock();
2081 }
2082
2083 /* called with RTNL locked */
2084 static void inet_forward_change(struct net *net)
2085 {
2086         struct net_device *dev;
2087         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2088
2089         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2090         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2091         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2092                                     NETCONFA_FORWARDING,
2093                                     NETCONFA_IFINDEX_ALL,
2094                                     net->ipv4.devconf_all);
2095         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2096                                     NETCONFA_FORWARDING,
2097                                     NETCONFA_IFINDEX_DEFAULT,
2098                                     net->ipv4.devconf_dflt);
2099
2100         for_each_netdev(net, dev) {
2101                 struct in_device *in_dev;
2102
2103                 if (on)
2104                         dev_disable_lro(dev);
2105
2106                 in_dev = __in_dev_get_rtnl(dev);
2107                 if (in_dev) {
2108                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2109                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2110                                                     NETCONFA_FORWARDING,
2111                                                     dev->ifindex, &in_dev->cnf);
2112                 }
2113         }
2114 }
2115
2116 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2117 {
2118         if (cnf == net->ipv4.devconf_dflt)
2119                 return NETCONFA_IFINDEX_DEFAULT;
2120         else if (cnf == net->ipv4.devconf_all)
2121                 return NETCONFA_IFINDEX_ALL;
2122         else {
2123                 struct in_device *idev
2124                         = container_of(cnf, struct in_device, cnf);
2125                 return idev->dev->ifindex;
2126         }
2127 }
2128
2129 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2130                              void __user *buffer,
2131                              size_t *lenp, loff_t *ppos)
2132 {
2133         int old_value = *(int *)ctl->data;
2134         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2135         int new_value = *(int *)ctl->data;
2136
2137         if (write) {
2138                 struct ipv4_devconf *cnf = ctl->extra1;
2139                 struct net *net = ctl->extra2;
2140                 int i = (int *)ctl->data - cnf->data;
2141                 int ifindex;
2142
2143                 set_bit(i, cnf->state);
2144
2145                 if (cnf == net->ipv4.devconf_dflt)
2146                         devinet_copy_dflt_conf(net, i);
2147                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2148                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2149                         if ((new_value == 0) && (old_value != 0))
2150                                 rt_cache_flush(net);
2151
2152                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2153                     new_value != old_value)
2154                         rt_cache_flush(net);
2155
2156                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2157                     new_value != old_value) {
2158                         ifindex = devinet_conf_ifindex(net, cnf);
2159                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2160                                                     NETCONFA_RP_FILTER,
2161                                                     ifindex, cnf);
2162                 }
2163                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2164                     new_value != old_value) {
2165                         ifindex = devinet_conf_ifindex(net, cnf);
2166                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2167                                                     NETCONFA_PROXY_NEIGH,
2168                                                     ifindex, cnf);
2169                 }
2170                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2171                     new_value != old_value) {
2172                         ifindex = devinet_conf_ifindex(net, cnf);
2173                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2174                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2175                                                     ifindex, cnf);
2176                 }
2177         }
2178
2179         return ret;
2180 }
2181
2182 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2183                                   void __user *buffer,
2184                                   size_t *lenp, loff_t *ppos)
2185 {
2186         int *valp = ctl->data;
2187         int val = *valp;
2188         loff_t pos = *ppos;
2189         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2190
2191         if (write && *valp != val) {
2192                 struct net *net = ctl->extra2;
2193
2194                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2195                         if (!rtnl_trylock()) {
2196                                 /* Restore the original values before restarting */
2197                                 *valp = val;
2198                                 *ppos = pos;
2199                                 return restart_syscall();
2200                         }
2201                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2202                                 inet_forward_change(net);
2203                         } else {
2204                                 struct ipv4_devconf *cnf = ctl->extra1;
2205                                 struct in_device *idev =
2206                                         container_of(cnf, struct in_device, cnf);
2207                                 if (*valp)
2208                                         dev_disable_lro(idev->dev);
2209                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2210                                                             NETCONFA_FORWARDING,
2211                                                             idev->dev->ifindex,
2212                                                             cnf);
2213                         }
2214                         rtnl_unlock();
2215                         rt_cache_flush(net);
2216                 } else
2217                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2218                                                     NETCONFA_FORWARDING,
2219                                                     NETCONFA_IFINDEX_DEFAULT,
2220                                                     net->ipv4.devconf_dflt);
2221         }
2222
2223         return ret;
2224 }
2225
2226 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2227                                 void __user *buffer,
2228                                 size_t *lenp, loff_t *ppos)
2229 {
2230         int *valp = ctl->data;
2231         int val = *valp;
2232         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2233         struct net *net = ctl->extra2;
2234
2235         if (write && *valp != val)
2236                 rt_cache_flush(net);
2237
2238         return ret;
2239 }
2240
2241 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2242         { \
2243                 .procname       = name, \
2244                 .data           = ipv4_devconf.data + \
2245                                   IPV4_DEVCONF_ ## attr - 1, \
2246                 .maxlen         = sizeof(int), \
2247                 .mode           = mval, \
2248                 .proc_handler   = proc, \
2249                 .extra1         = &ipv4_devconf, \
2250         }
2251
2252 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2253         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2254
2255 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2256         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2257
2258 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2259         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2260
2261 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2262         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2263
2264 static struct devinet_sysctl_table {
2265         struct ctl_table_header *sysctl_header;
2266         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2267 } devinet_sysctl = {
2268         .devinet_vars = {
2269                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2270                                              devinet_sysctl_forward),
2271                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2272                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2273
2274                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2275                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2276                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2277                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2278                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2279                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2280                                         "accept_source_route"),
2281                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2282                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2283                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2284                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2285                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2286                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2287                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2288                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2289                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2290                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2291                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2292                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2293                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2294                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2295                                         "force_igmp_version"),
2296                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2297                                         "igmpv2_unsolicited_report_interval"),
2298                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2299                                         "igmpv3_unsolicited_report_interval"),
2300                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2301                                         "ignore_routes_with_linkdown"),
2302                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2303                                         "drop_gratuitous_arp"),
2304
2305                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2306                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2307                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2308                                               "promote_secondaries"),
2309                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2310                                               "route_localnet"),
2311                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2312                                               "drop_unicast_in_l2_multicast"),
2313         },
2314 };
2315
2316 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2317                                      int ifindex, struct ipv4_devconf *p)
2318 {
2319         int i;
2320         struct devinet_sysctl_table *t;
2321         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2322
2323         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2324         if (!t)
2325                 goto out;
2326
2327         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2328                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2329                 t->devinet_vars[i].extra1 = p;
2330                 t->devinet_vars[i].extra2 = net;
2331         }
2332
2333         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2334
2335         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2336         if (!t->sysctl_header)
2337                 goto free;
2338
2339         p->sysctl = t;
2340
2341         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2342                                     ifindex, p);
2343         return 0;
2344
2345 free:
2346         kfree(t);
2347 out:
2348         return -ENOBUFS;
2349 }
2350
2351 static void __devinet_sysctl_unregister(struct net *net,
2352                                         struct ipv4_devconf *cnf, int ifindex)
2353 {
2354         struct devinet_sysctl_table *t = cnf->sysctl;
2355
2356         if (t) {
2357                 cnf->sysctl = NULL;
2358                 unregister_net_sysctl_table(t->sysctl_header);
2359                 kfree(t);
2360         }
2361
2362         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2363 }
2364
2365 static int devinet_sysctl_register(struct in_device *idev)
2366 {
2367         int err;
2368
2369         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2370                 return -EINVAL;
2371
2372         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2373         if (err)
2374                 return err;
2375         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2376                                         idev->dev->ifindex, &idev->cnf);
2377         if (err)
2378                 neigh_sysctl_unregister(idev->arp_parms);
2379         return err;
2380 }
2381
2382 static void devinet_sysctl_unregister(struct in_device *idev)
2383 {
2384         struct net *net = dev_net(idev->dev);
2385
2386         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2387         neigh_sysctl_unregister(idev->arp_parms);
2388 }
2389
2390 static struct ctl_table ctl_forward_entry[] = {
2391         {
2392                 .procname       = "ip_forward",
2393                 .data           = &ipv4_devconf.data[
2394                                         IPV4_DEVCONF_FORWARDING - 1],
2395                 .maxlen         = sizeof(int),
2396                 .mode           = 0644,
2397                 .proc_handler   = devinet_sysctl_forward,
2398                 .extra1         = &ipv4_devconf,
2399                 .extra2         = &init_net,
2400         },
2401         { },
2402 };
2403 #endif
2404
2405 static __net_init int devinet_init_net(struct net *net)
2406 {
2407         int err;
2408         struct ipv4_devconf *all, *dflt;
2409 #ifdef CONFIG_SYSCTL
2410         struct ctl_table *tbl = ctl_forward_entry;
2411         struct ctl_table_header *forw_hdr;
2412 #endif
2413
2414         err = -ENOMEM;
2415         all = &ipv4_devconf;
2416         dflt = &ipv4_devconf_dflt;
2417
2418         if (!net_eq(net, &init_net)) {
2419                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2420                 if (!all)
2421                         goto err_alloc_all;
2422
2423                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2424                 if (!dflt)
2425                         goto err_alloc_dflt;
2426
2427 #ifdef CONFIG_SYSCTL
2428                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2429                 if (!tbl)
2430                         goto err_alloc_ctl;
2431
2432                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2433                 tbl[0].extra1 = all;
2434                 tbl[0].extra2 = net;
2435 #endif
2436         }
2437
2438 #ifdef CONFIG_SYSCTL
2439         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2440         if (err < 0)
2441                 goto err_reg_all;
2442
2443         err = __devinet_sysctl_register(net, "default",
2444                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2445         if (err < 0)
2446                 goto err_reg_dflt;
2447
2448         err = -ENOMEM;
2449         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2450         if (!forw_hdr)
2451                 goto err_reg_ctl;
2452         net->ipv4.forw_hdr = forw_hdr;
2453 #endif
2454
2455         net->ipv4.devconf_all = all;
2456         net->ipv4.devconf_dflt = dflt;
2457         return 0;
2458
2459 #ifdef CONFIG_SYSCTL
2460 err_reg_ctl:
2461         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2462 err_reg_dflt:
2463         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2464 err_reg_all:
2465         if (tbl != ctl_forward_entry)
2466                 kfree(tbl);
2467 err_alloc_ctl:
2468 #endif
2469         if (dflt != &ipv4_devconf_dflt)
2470                 kfree(dflt);
2471 err_alloc_dflt:
2472         if (all != &ipv4_devconf)
2473                 kfree(all);
2474 err_alloc_all:
2475         return err;
2476 }
2477
2478 static __net_exit void devinet_exit_net(struct net *net)
2479 {
2480 #ifdef CONFIG_SYSCTL
2481         struct ctl_table *tbl;
2482
2483         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2484         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2485         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2486                                     NETCONFA_IFINDEX_DEFAULT);
2487         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2488                                     NETCONFA_IFINDEX_ALL);
2489         kfree(tbl);
2490 #endif
2491         kfree(net->ipv4.devconf_dflt);
2492         kfree(net->ipv4.devconf_all);
2493 }
2494
2495 static __net_initdata struct pernet_operations devinet_ops = {
2496         .init = devinet_init_net,
2497         .exit = devinet_exit_net,
2498 };
2499
2500 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2501         .family           = AF_INET,
2502         .fill_link_af     = inet_fill_link_af,
2503         .get_link_af_size = inet_get_link_af_size,
2504         .validate_link_af = inet_validate_link_af,
2505         .set_link_af      = inet_set_link_af,
2506 };
2507
2508 void __init devinet_init(void)
2509 {
2510         int i;
2511
2512         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2513                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2514
2515         register_pernet_subsys(&devinet_ops);
2516
2517         register_gifconf(PF_INET, inet_gifconf);
2518         register_netdevice_notifier(&ip_netdev_notifier);
2519
2520         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2521
2522         rtnl_af_register(&inet_af_ops);
2523
2524         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2525         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2526         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2527         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2528                       inet_netconf_dump_devconf, 0);
2529 }