]> asedeno.scripts.mit.edu Git - linux.git/blob - net/sched/sch_api.c
media: imx7-media-csi: Use devm_platform_ioremap_resource()
[linux.git] / net / sched / sch_api.c
1 /*
2  * net/sched/sch_api.c  Packet scheduler API.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/slab.h>
31 #include <linux/hashtable.h>
32
33 #include <net/net_namespace.h>
34 #include <net/sock.h>
35 #include <net/netlink.h>
36 #include <net/pkt_sched.h>
37 #include <net/pkt_cls.h>
38
39 /*
40
41    Short review.
42    -------------
43
44    This file consists of two interrelated parts:
45
46    1. queueing disciplines manager frontend.
47    2. traffic classes manager frontend.
48
49    Generally, queueing discipline ("qdisc") is a black box,
50    which is able to enqueue packets and to dequeue them (when
51    device is ready to send something) in order and at times
52    determined by algorithm hidden in it.
53
54    qdisc's are divided to two categories:
55    - "queues", which have no internal structure visible from outside.
56    - "schedulers", which split all the packets to "traffic classes",
57      using "packet classifiers" (look at cls_api.c)
58
59    In turn, classes may have child qdiscs (as rule, queues)
60    attached to them etc. etc. etc.
61
62    The goal of the routines in this file is to translate
63    information supplied by user in the form of handles
64    to more intelligible for kernel form, to make some sanity
65    checks and part of work, which is common to all qdiscs
66    and to provide rtnetlink notifications.
67
68    All real intelligent work is done inside qdisc modules.
69
70
71
72    Every discipline has two major routines: enqueue and dequeue.
73
74    ---dequeue
75
76    dequeue usually returns a skb to send. It is allowed to return NULL,
77    but it does not mean that queue is empty, it just means that
78    discipline does not want to send anything this time.
79    Queue is really empty if q->q.qlen == 0.
80    For complicated disciplines with multiple queues q->q is not
81    real packet queue, but however q->q.qlen must be valid.
82
83    ---enqueue
84
85    enqueue returns 0, if packet was enqueued successfully.
86    If packet (this one or another one) was dropped, it returns
87    not zero error code.
88    NET_XMIT_DROP        - this packet dropped
89      Expected action: do not backoff, but wait until queue will clear.
90    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
91      Expected action: backoff or ignore
92
93    Auxiliary routines:
94
95    ---peek
96
97    like dequeue but without removing a packet from the queue
98
99    ---reset
100
101    returns qdisc to initial state: purge all buffers, clear all
102    timers, counters (except for statistics) etc.
103
104    ---init
105
106    initializes newly created qdisc.
107
108    ---destroy
109
110    destroys resources allocated by init and during lifetime of qdisc.
111
112    ---change
113
114    changes qdisc parameters.
115  */
116
117 /* Protects list of registered TC modules. It is pure SMP lock. */
118 static DEFINE_RWLOCK(qdisc_mod_lock);
119
120
121 /************************************************
122  *      Queueing disciplines manipulation.      *
123  ************************************************/
124
125
126 /* The list of all installed queueing disciplines. */
127
128 static struct Qdisc_ops *qdisc_base;
129
130 /* Register/unregister queueing discipline */
131
132 int register_qdisc(struct Qdisc_ops *qops)
133 {
134         struct Qdisc_ops *q, **qp;
135         int rc = -EEXIST;
136
137         write_lock(&qdisc_mod_lock);
138         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
139                 if (!strcmp(qops->id, q->id))
140                         goto out;
141
142         if (qops->enqueue == NULL)
143                 qops->enqueue = noop_qdisc_ops.enqueue;
144         if (qops->peek == NULL) {
145                 if (qops->dequeue == NULL)
146                         qops->peek = noop_qdisc_ops.peek;
147                 else
148                         goto out_einval;
149         }
150         if (qops->dequeue == NULL)
151                 qops->dequeue = noop_qdisc_ops.dequeue;
152
153         if (qops->cl_ops) {
154                 const struct Qdisc_class_ops *cops = qops->cl_ops;
155
156                 if (!(cops->find && cops->walk && cops->leaf))
157                         goto out_einval;
158
159                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
160                         goto out_einval;
161         }
162
163         qops->next = NULL;
164         *qp = qops;
165         rc = 0;
166 out:
167         write_unlock(&qdisc_mod_lock);
168         return rc;
169
170 out_einval:
171         rc = -EINVAL;
172         goto out;
173 }
174 EXPORT_SYMBOL(register_qdisc);
175
176 int unregister_qdisc(struct Qdisc_ops *qops)
177 {
178         struct Qdisc_ops *q, **qp;
179         int err = -ENOENT;
180
181         write_lock(&qdisc_mod_lock);
182         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
183                 if (q == qops)
184                         break;
185         if (q) {
186                 *qp = q->next;
187                 q->next = NULL;
188                 err = 0;
189         }
190         write_unlock(&qdisc_mod_lock);
191         return err;
192 }
193 EXPORT_SYMBOL(unregister_qdisc);
194
195 /* Get default qdisc if not otherwise specified */
196 void qdisc_get_default(char *name, size_t len)
197 {
198         read_lock(&qdisc_mod_lock);
199         strlcpy(name, default_qdisc_ops->id, len);
200         read_unlock(&qdisc_mod_lock);
201 }
202
203 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
204 {
205         struct Qdisc_ops *q = NULL;
206
207         for (q = qdisc_base; q; q = q->next) {
208                 if (!strcmp(name, q->id)) {
209                         if (!try_module_get(q->owner))
210                                 q = NULL;
211                         break;
212                 }
213         }
214
215         return q;
216 }
217
218 /* Set new default qdisc to use */
219 int qdisc_set_default(const char *name)
220 {
221         const struct Qdisc_ops *ops;
222
223         if (!capable(CAP_NET_ADMIN))
224                 return -EPERM;
225
226         write_lock(&qdisc_mod_lock);
227         ops = qdisc_lookup_default(name);
228         if (!ops) {
229                 /* Not found, drop lock and try to load module */
230                 write_unlock(&qdisc_mod_lock);
231                 request_module("sch_%s", name);
232                 write_lock(&qdisc_mod_lock);
233
234                 ops = qdisc_lookup_default(name);
235         }
236
237         if (ops) {
238                 /* Set new default */
239                 module_put(default_qdisc_ops->owner);
240                 default_qdisc_ops = ops;
241         }
242         write_unlock(&qdisc_mod_lock);
243
244         return ops ? 0 : -ENOENT;
245 }
246
247 #ifdef CONFIG_NET_SCH_DEFAULT
248 /* Set default value from kernel config */
249 static int __init sch_default_qdisc(void)
250 {
251         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
252 }
253 late_initcall(sch_default_qdisc);
254 #endif
255
256 /* We know handle. Find qdisc among all qdisc's attached to device
257  * (root qdisc, all its children, children of children etc.)
258  * Note: caller either uses rtnl or rcu_read_lock()
259  */
260
261 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
262 {
263         struct Qdisc *q;
264
265         if (!qdisc_dev(root))
266                 return (root->handle == handle ? root : NULL);
267
268         if (!(root->flags & TCQ_F_BUILTIN) &&
269             root->handle == handle)
270                 return root;
271
272         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
273                 if (q->handle == handle)
274                         return q;
275         }
276         return NULL;
277 }
278
279 void qdisc_hash_add(struct Qdisc *q, bool invisible)
280 {
281         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
282                 ASSERT_RTNL();
283                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
284                 if (invisible)
285                         q->flags |= TCQ_F_INVISIBLE;
286         }
287 }
288 EXPORT_SYMBOL(qdisc_hash_add);
289
290 void qdisc_hash_del(struct Qdisc *q)
291 {
292         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
293                 ASSERT_RTNL();
294                 hash_del_rcu(&q->hash);
295         }
296 }
297 EXPORT_SYMBOL(qdisc_hash_del);
298
299 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
300 {
301         struct Qdisc *q;
302
303         if (!handle)
304                 return NULL;
305         q = qdisc_match_from_root(dev->qdisc, handle);
306         if (q)
307                 goto out;
308
309         if (dev_ingress_queue(dev))
310                 q = qdisc_match_from_root(
311                         dev_ingress_queue(dev)->qdisc_sleeping,
312                         handle);
313 out:
314         return q;
315 }
316
317 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
318 {
319         struct netdev_queue *nq;
320         struct Qdisc *q;
321
322         if (!handle)
323                 return NULL;
324         q = qdisc_match_from_root(dev->qdisc, handle);
325         if (q)
326                 goto out;
327
328         nq = dev_ingress_queue_rcu(dev);
329         if (nq)
330                 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
331 out:
332         return q;
333 }
334
335 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
336 {
337         unsigned long cl;
338         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
339
340         if (cops == NULL)
341                 return NULL;
342         cl = cops->find(p, classid);
343
344         if (cl == 0)
345                 return NULL;
346         return cops->leaf(p, cl);
347 }
348
349 /* Find queueing discipline by name */
350
351 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
352 {
353         struct Qdisc_ops *q = NULL;
354
355         if (kind) {
356                 read_lock(&qdisc_mod_lock);
357                 for (q = qdisc_base; q; q = q->next) {
358                         if (nla_strcmp(kind, q->id) == 0) {
359                                 if (!try_module_get(q->owner))
360                                         q = NULL;
361                                 break;
362                         }
363                 }
364                 read_unlock(&qdisc_mod_lock);
365         }
366         return q;
367 }
368
369 /* The linklayer setting were not transferred from iproute2, in older
370  * versions, and the rate tables lookup systems have been dropped in
371  * the kernel. To keep backward compatible with older iproute2 tc
372  * utils, we detect the linklayer setting by detecting if the rate
373  * table were modified.
374  *
375  * For linklayer ATM table entries, the rate table will be aligned to
376  * 48 bytes, thus some table entries will contain the same value.  The
377  * mpu (min packet unit) is also encoded into the old rate table, thus
378  * starting from the mpu, we find low and high table entries for
379  * mapping this cell.  If these entries contain the same value, when
380  * the rate tables have been modified for linklayer ATM.
381  *
382  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
383  * and then roundup to the next cell, calc the table entry one below,
384  * and compare.
385  */
386 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
387 {
388         int low       = roundup(r->mpu, 48);
389         int high      = roundup(low+1, 48);
390         int cell_low  = low >> r->cell_log;
391         int cell_high = (high >> r->cell_log) - 1;
392
393         /* rtab is too inaccurate at rates > 100Mbit/s */
394         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
395                 pr_debug("TC linklayer: Giving up ATM detection\n");
396                 return TC_LINKLAYER_ETHERNET;
397         }
398
399         if ((cell_high > cell_low) && (cell_high < 256)
400             && (rtab[cell_low] == rtab[cell_high])) {
401                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
402                          cell_low, cell_high, rtab[cell_high]);
403                 return TC_LINKLAYER_ATM;
404         }
405         return TC_LINKLAYER_ETHERNET;
406 }
407
408 static struct qdisc_rate_table *qdisc_rtab_list;
409
410 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
411                                         struct nlattr *tab,
412                                         struct netlink_ext_ack *extack)
413 {
414         struct qdisc_rate_table *rtab;
415
416         if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
417             nla_len(tab) != TC_RTAB_SIZE) {
418                 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
419                 return NULL;
420         }
421
422         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
423                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
424                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
425                         rtab->refcnt++;
426                         return rtab;
427                 }
428         }
429
430         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
431         if (rtab) {
432                 rtab->rate = *r;
433                 rtab->refcnt = 1;
434                 memcpy(rtab->data, nla_data(tab), 1024);
435                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
436                         r->linklayer = __detect_linklayer(r, rtab->data);
437                 rtab->next = qdisc_rtab_list;
438                 qdisc_rtab_list = rtab;
439         } else {
440                 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
441         }
442         return rtab;
443 }
444 EXPORT_SYMBOL(qdisc_get_rtab);
445
446 void qdisc_put_rtab(struct qdisc_rate_table *tab)
447 {
448         struct qdisc_rate_table *rtab, **rtabp;
449
450         if (!tab || --tab->refcnt)
451                 return;
452
453         for (rtabp = &qdisc_rtab_list;
454              (rtab = *rtabp) != NULL;
455              rtabp = &rtab->next) {
456                 if (rtab == tab) {
457                         *rtabp = rtab->next;
458                         kfree(rtab);
459                         return;
460                 }
461         }
462 }
463 EXPORT_SYMBOL(qdisc_put_rtab);
464
465 static LIST_HEAD(qdisc_stab_list);
466
467 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
468         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
469         [TCA_STAB_DATA] = { .type = NLA_BINARY },
470 };
471
472 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
473                                                struct netlink_ext_ack *extack)
474 {
475         struct nlattr *tb[TCA_STAB_MAX + 1];
476         struct qdisc_size_table *stab;
477         struct tc_sizespec *s;
478         unsigned int tsize = 0;
479         u16 *tab = NULL;
480         int err;
481
482         err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
483                                           extack);
484         if (err < 0)
485                 return ERR_PTR(err);
486         if (!tb[TCA_STAB_BASE]) {
487                 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
488                 return ERR_PTR(-EINVAL);
489         }
490
491         s = nla_data(tb[TCA_STAB_BASE]);
492
493         if (s->tsize > 0) {
494                 if (!tb[TCA_STAB_DATA]) {
495                         NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
496                         return ERR_PTR(-EINVAL);
497                 }
498                 tab = nla_data(tb[TCA_STAB_DATA]);
499                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
500         }
501
502         if (tsize != s->tsize || (!tab && tsize > 0)) {
503                 NL_SET_ERR_MSG(extack, "Invalid size of size table");
504                 return ERR_PTR(-EINVAL);
505         }
506
507         list_for_each_entry(stab, &qdisc_stab_list, list) {
508                 if (memcmp(&stab->szopts, s, sizeof(*s)))
509                         continue;
510                 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
511                         continue;
512                 stab->refcnt++;
513                 return stab;
514         }
515
516         stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
517         if (!stab)
518                 return ERR_PTR(-ENOMEM);
519
520         stab->refcnt = 1;
521         stab->szopts = *s;
522         if (tsize > 0)
523                 memcpy(stab->data, tab, tsize * sizeof(u16));
524
525         list_add_tail(&stab->list, &qdisc_stab_list);
526
527         return stab;
528 }
529
530 void qdisc_put_stab(struct qdisc_size_table *tab)
531 {
532         if (!tab)
533                 return;
534
535         if (--tab->refcnt == 0) {
536                 list_del(&tab->list);
537                 kfree_rcu(tab, rcu);
538         }
539 }
540 EXPORT_SYMBOL(qdisc_put_stab);
541
542 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
543 {
544         struct nlattr *nest;
545
546         nest = nla_nest_start_noflag(skb, TCA_STAB);
547         if (nest == NULL)
548                 goto nla_put_failure;
549         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
550                 goto nla_put_failure;
551         nla_nest_end(skb, nest);
552
553         return skb->len;
554
555 nla_put_failure:
556         return -1;
557 }
558
559 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
560                                const struct qdisc_size_table *stab)
561 {
562         int pkt_len, slot;
563
564         pkt_len = skb->len + stab->szopts.overhead;
565         if (unlikely(!stab->szopts.tsize))
566                 goto out;
567
568         slot = pkt_len + stab->szopts.cell_align;
569         if (unlikely(slot < 0))
570                 slot = 0;
571
572         slot >>= stab->szopts.cell_log;
573         if (likely(slot < stab->szopts.tsize))
574                 pkt_len = stab->data[slot];
575         else
576                 pkt_len = stab->data[stab->szopts.tsize - 1] *
577                                 (slot / stab->szopts.tsize) +
578                                 stab->data[slot % stab->szopts.tsize];
579
580         pkt_len <<= stab->szopts.size_log;
581 out:
582         if (unlikely(pkt_len < 1))
583                 pkt_len = 1;
584         qdisc_skb_cb(skb)->pkt_len = pkt_len;
585 }
586 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
587
588 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
589 {
590         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
591                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
592                         txt, qdisc->ops->id, qdisc->handle >> 16);
593                 qdisc->flags |= TCQ_F_WARN_NONWC;
594         }
595 }
596 EXPORT_SYMBOL(qdisc_warn_nonwc);
597
598 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
599 {
600         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
601                                                  timer);
602
603         rcu_read_lock();
604         __netif_schedule(qdisc_root(wd->qdisc));
605         rcu_read_unlock();
606
607         return HRTIMER_NORESTART;
608 }
609
610 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
611                                  clockid_t clockid)
612 {
613         hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
614         wd->timer.function = qdisc_watchdog;
615         wd->qdisc = qdisc;
616 }
617 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
618
619 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
620 {
621         qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
622 }
623 EXPORT_SYMBOL(qdisc_watchdog_init);
624
625 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
626 {
627         if (test_bit(__QDISC_STATE_DEACTIVATED,
628                      &qdisc_root_sleeping(wd->qdisc)->state))
629                 return;
630
631         if (wd->last_expires == expires)
632                 return;
633
634         wd->last_expires = expires;
635         hrtimer_start(&wd->timer,
636                       ns_to_ktime(expires),
637                       HRTIMER_MODE_ABS_PINNED);
638 }
639 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
640
641 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
642 {
643         hrtimer_cancel(&wd->timer);
644 }
645 EXPORT_SYMBOL(qdisc_watchdog_cancel);
646
647 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
648 {
649         struct hlist_head *h;
650         unsigned int i;
651
652         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
653
654         if (h != NULL) {
655                 for (i = 0; i < n; i++)
656                         INIT_HLIST_HEAD(&h[i]);
657         }
658         return h;
659 }
660
661 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
662 {
663         struct Qdisc_class_common *cl;
664         struct hlist_node *next;
665         struct hlist_head *nhash, *ohash;
666         unsigned int nsize, nmask, osize;
667         unsigned int i, h;
668
669         /* Rehash when load factor exceeds 0.75 */
670         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
671                 return;
672         nsize = clhash->hashsize * 2;
673         nmask = nsize - 1;
674         nhash = qdisc_class_hash_alloc(nsize);
675         if (nhash == NULL)
676                 return;
677
678         ohash = clhash->hash;
679         osize = clhash->hashsize;
680
681         sch_tree_lock(sch);
682         for (i = 0; i < osize; i++) {
683                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
684                         h = qdisc_class_hash(cl->classid, nmask);
685                         hlist_add_head(&cl->hnode, &nhash[h]);
686                 }
687         }
688         clhash->hash     = nhash;
689         clhash->hashsize = nsize;
690         clhash->hashmask = nmask;
691         sch_tree_unlock(sch);
692
693         kvfree(ohash);
694 }
695 EXPORT_SYMBOL(qdisc_class_hash_grow);
696
697 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
698 {
699         unsigned int size = 4;
700
701         clhash->hash = qdisc_class_hash_alloc(size);
702         if (!clhash->hash)
703                 return -ENOMEM;
704         clhash->hashsize  = size;
705         clhash->hashmask  = size - 1;
706         clhash->hashelems = 0;
707         return 0;
708 }
709 EXPORT_SYMBOL(qdisc_class_hash_init);
710
711 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
712 {
713         kvfree(clhash->hash);
714 }
715 EXPORT_SYMBOL(qdisc_class_hash_destroy);
716
717 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
718                              struct Qdisc_class_common *cl)
719 {
720         unsigned int h;
721
722         INIT_HLIST_NODE(&cl->hnode);
723         h = qdisc_class_hash(cl->classid, clhash->hashmask);
724         hlist_add_head(&cl->hnode, &clhash->hash[h]);
725         clhash->hashelems++;
726 }
727 EXPORT_SYMBOL(qdisc_class_hash_insert);
728
729 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
730                              struct Qdisc_class_common *cl)
731 {
732         hlist_del(&cl->hnode);
733         clhash->hashelems--;
734 }
735 EXPORT_SYMBOL(qdisc_class_hash_remove);
736
737 /* Allocate an unique handle from space managed by kernel
738  * Possible range is [8000-FFFF]:0000 (0x8000 values)
739  */
740 static u32 qdisc_alloc_handle(struct net_device *dev)
741 {
742         int i = 0x8000;
743         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
744
745         do {
746                 autohandle += TC_H_MAKE(0x10000U, 0);
747                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
748                         autohandle = TC_H_MAKE(0x80000000U, 0);
749                 if (!qdisc_lookup(dev, autohandle))
750                         return autohandle;
751                 cond_resched();
752         } while (--i > 0);
753
754         return 0;
755 }
756
757 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
758 {
759         bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
760         const struct Qdisc_class_ops *cops;
761         unsigned long cl;
762         u32 parentid;
763         bool notify;
764         int drops;
765
766         if (n == 0 && len == 0)
767                 return;
768         drops = max_t(int, n, 0);
769         rcu_read_lock();
770         while ((parentid = sch->parent)) {
771                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
772                         break;
773
774                 if (sch->flags & TCQ_F_NOPARENT)
775                         break;
776                 /* Notify parent qdisc only if child qdisc becomes empty.
777                  *
778                  * If child was empty even before update then backlog
779                  * counter is screwed and we skip notification because
780                  * parent class is already passive.
781                  *
782                  * If the original child was offloaded then it is allowed
783                  * to be seem as empty, so the parent is notified anyway.
784                  */
785                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
786                                                        !qdisc_is_offloaded);
787                 /* TODO: perform the search on a per txq basis */
788                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
789                 if (sch == NULL) {
790                         WARN_ON_ONCE(parentid != TC_H_ROOT);
791                         break;
792                 }
793                 cops = sch->ops->cl_ops;
794                 if (notify && cops->qlen_notify) {
795                         cl = cops->find(sch, parentid);
796                         cops->qlen_notify(sch, cl);
797                 }
798                 sch->q.qlen -= n;
799                 sch->qstats.backlog -= len;
800                 __qdisc_qstats_drop(sch, drops);
801         }
802         rcu_read_unlock();
803 }
804 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
805
806 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
807                               void *type_data)
808 {
809         struct net_device *dev = qdisc_dev(sch);
810         int err;
811
812         sch->flags &= ~TCQ_F_OFFLOADED;
813         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
814                 return 0;
815
816         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
817         if (err == -EOPNOTSUPP)
818                 return 0;
819
820         if (!err)
821                 sch->flags |= TCQ_F_OFFLOADED;
822
823         return err;
824 }
825 EXPORT_SYMBOL(qdisc_offload_dump_helper);
826
827 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
828                                 struct Qdisc *new, struct Qdisc *old,
829                                 enum tc_setup_type type, void *type_data,
830                                 struct netlink_ext_ack *extack)
831 {
832         bool any_qdisc_is_offloaded;
833         int err;
834
835         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
836                 return;
837
838         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
839
840         /* Don't report error if the graft is part of destroy operation. */
841         if (!err || !new || new == &noop_qdisc)
842                 return;
843
844         /* Don't report error if the parent, the old child and the new
845          * one are not offloaded.
846          */
847         any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
848         any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
849         any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
850
851         if (any_qdisc_is_offloaded)
852                 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
853 }
854 EXPORT_SYMBOL(qdisc_offload_graft_helper);
855
856 static void qdisc_offload_graft_root(struct net_device *dev,
857                                      struct Qdisc *new, struct Qdisc *old,
858                                      struct netlink_ext_ack *extack)
859 {
860         struct tc_root_qopt_offload graft_offload = {
861                 .command        = TC_ROOT_GRAFT,
862                 .handle         = new ? new->handle : 0,
863                 .ingress        = (new && new->flags & TCQ_F_INGRESS) ||
864                                   (old && old->flags & TCQ_F_INGRESS),
865         };
866
867         qdisc_offload_graft_helper(dev, NULL, new, old,
868                                    TC_SETUP_ROOT_QDISC, &graft_offload, extack);
869 }
870
871 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
872                          u32 portid, u32 seq, u16 flags, int event)
873 {
874         struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
875         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
876         struct tcmsg *tcm;
877         struct nlmsghdr  *nlh;
878         unsigned char *b = skb_tail_pointer(skb);
879         struct gnet_dump d;
880         struct qdisc_size_table *stab;
881         u32 block_index;
882         __u32 qlen;
883
884         cond_resched();
885         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
886         if (!nlh)
887                 goto out_nlmsg_trim;
888         tcm = nlmsg_data(nlh);
889         tcm->tcm_family = AF_UNSPEC;
890         tcm->tcm__pad1 = 0;
891         tcm->tcm__pad2 = 0;
892         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
893         tcm->tcm_parent = clid;
894         tcm->tcm_handle = q->handle;
895         tcm->tcm_info = refcount_read(&q->refcnt);
896         if (nla_put_string(skb, TCA_KIND, q->ops->id))
897                 goto nla_put_failure;
898         if (q->ops->ingress_block_get) {
899                 block_index = q->ops->ingress_block_get(q);
900                 if (block_index &&
901                     nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
902                         goto nla_put_failure;
903         }
904         if (q->ops->egress_block_get) {
905                 block_index = q->ops->egress_block_get(q);
906                 if (block_index &&
907                     nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
908                         goto nla_put_failure;
909         }
910         if (q->ops->dump && q->ops->dump(q, skb) < 0)
911                 goto nla_put_failure;
912         if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
913                 goto nla_put_failure;
914         qlen = qdisc_qlen_sum(q);
915
916         stab = rtnl_dereference(q->stab);
917         if (stab && qdisc_dump_stab(skb, stab) < 0)
918                 goto nla_put_failure;
919
920         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
921                                          NULL, &d, TCA_PAD) < 0)
922                 goto nla_put_failure;
923
924         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
925                 goto nla_put_failure;
926
927         if (qdisc_is_percpu_stats(q)) {
928                 cpu_bstats = q->cpu_bstats;
929                 cpu_qstats = q->cpu_qstats;
930         }
931
932         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
933                                   &d, cpu_bstats, &q->bstats) < 0 ||
934             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
935             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
936                 goto nla_put_failure;
937
938         if (gnet_stats_finish_copy(&d) < 0)
939                 goto nla_put_failure;
940
941         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
942         return skb->len;
943
944 out_nlmsg_trim:
945 nla_put_failure:
946         nlmsg_trim(skb, b);
947         return -1;
948 }
949
950 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
951 {
952         if (q->flags & TCQ_F_BUILTIN)
953                 return true;
954         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
955                 return true;
956
957         return false;
958 }
959
960 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
961                         struct nlmsghdr *n, u32 clid,
962                         struct Qdisc *old, struct Qdisc *new)
963 {
964         struct sk_buff *skb;
965         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
966
967         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
968         if (!skb)
969                 return -ENOBUFS;
970
971         if (old && !tc_qdisc_dump_ignore(old, false)) {
972                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
973                                   0, RTM_DELQDISC) < 0)
974                         goto err_out;
975         }
976         if (new && !tc_qdisc_dump_ignore(new, false)) {
977                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
978                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
979                         goto err_out;
980         }
981
982         if (skb->len)
983                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
984                                       n->nlmsg_flags & NLM_F_ECHO);
985
986 err_out:
987         kfree_skb(skb);
988         return -EINVAL;
989 }
990
991 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
992                                struct nlmsghdr *n, u32 clid,
993                                struct Qdisc *old, struct Qdisc *new)
994 {
995         if (new || old)
996                 qdisc_notify(net, skb, n, clid, old, new);
997
998         if (old)
999                 qdisc_put(old);
1000 }
1001
1002 static void qdisc_clear_nolock(struct Qdisc *sch)
1003 {
1004         sch->flags &= ~TCQ_F_NOLOCK;
1005         if (!(sch->flags & TCQ_F_CPUSTATS))
1006                 return;
1007
1008         free_percpu(sch->cpu_bstats);
1009         free_percpu(sch->cpu_qstats);
1010         sch->cpu_bstats = NULL;
1011         sch->cpu_qstats = NULL;
1012         sch->flags &= ~TCQ_F_CPUSTATS;
1013 }
1014
1015 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1016  * to device "dev".
1017  *
1018  * When appropriate send a netlink notification using 'skb'
1019  * and "n".
1020  *
1021  * On success, destroy old qdisc.
1022  */
1023
1024 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1025                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1026                        struct Qdisc *new, struct Qdisc *old,
1027                        struct netlink_ext_ack *extack)
1028 {
1029         struct Qdisc *q = old;
1030         struct net *net = dev_net(dev);
1031
1032         if (parent == NULL) {
1033                 unsigned int i, num_q, ingress;
1034
1035                 ingress = 0;
1036                 num_q = dev->num_tx_queues;
1037                 if ((q && q->flags & TCQ_F_INGRESS) ||
1038                     (new && new->flags & TCQ_F_INGRESS)) {
1039                         num_q = 1;
1040                         ingress = 1;
1041                         if (!dev_ingress_queue(dev)) {
1042                                 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1043                                 return -ENOENT;
1044                         }
1045                 }
1046
1047                 if (dev->flags & IFF_UP)
1048                         dev_deactivate(dev);
1049
1050                 qdisc_offload_graft_root(dev, new, old, extack);
1051
1052                 if (new && new->ops->attach)
1053                         goto skip;
1054
1055                 for (i = 0; i < num_q; i++) {
1056                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1057
1058                         if (!ingress)
1059                                 dev_queue = netdev_get_tx_queue(dev, i);
1060
1061                         old = dev_graft_qdisc(dev_queue, new);
1062                         if (new && i > 0)
1063                                 qdisc_refcount_inc(new);
1064
1065                         if (!ingress)
1066                                 qdisc_put(old);
1067                 }
1068
1069 skip:
1070                 if (!ingress) {
1071                         notify_and_destroy(net, skb, n, classid,
1072                                            dev->qdisc, new);
1073                         if (new && !new->ops->attach)
1074                                 qdisc_refcount_inc(new);
1075                         dev->qdisc = new ? : &noop_qdisc;
1076
1077                         if (new && new->ops->attach)
1078                                 new->ops->attach(new);
1079                 } else {
1080                         notify_and_destroy(net, skb, n, classid, old, new);
1081                 }
1082
1083                 if (dev->flags & IFF_UP)
1084                         dev_activate(dev);
1085         } else {
1086                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1087                 unsigned long cl;
1088                 int err;
1089
1090                 /* Only support running class lockless if parent is lockless */
1091                 if (new && (new->flags & TCQ_F_NOLOCK) &&
1092                     parent && !(parent->flags & TCQ_F_NOLOCK))
1093                         qdisc_clear_nolock(new);
1094
1095                 if (!cops || !cops->graft)
1096                         return -EOPNOTSUPP;
1097
1098                 cl = cops->find(parent, classid);
1099                 if (!cl) {
1100                         NL_SET_ERR_MSG(extack, "Specified class not found");
1101                         return -ENOENT;
1102                 }
1103
1104                 err = cops->graft(parent, cl, new, &old, extack);
1105                 if (err)
1106                         return err;
1107                 notify_and_destroy(net, skb, n, classid, old, new);
1108         }
1109         return 0;
1110 }
1111
1112 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1113                                    struct netlink_ext_ack *extack)
1114 {
1115         u32 block_index;
1116
1117         if (tca[TCA_INGRESS_BLOCK]) {
1118                 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1119
1120                 if (!block_index) {
1121                         NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1122                         return -EINVAL;
1123                 }
1124                 if (!sch->ops->ingress_block_set) {
1125                         NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1126                         return -EOPNOTSUPP;
1127                 }
1128                 sch->ops->ingress_block_set(sch, block_index);
1129         }
1130         if (tca[TCA_EGRESS_BLOCK]) {
1131                 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1132
1133                 if (!block_index) {
1134                         NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1135                         return -EINVAL;
1136                 }
1137                 if (!sch->ops->egress_block_set) {
1138                         NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1139                         return -EOPNOTSUPP;
1140                 }
1141                 sch->ops->egress_block_set(sch, block_index);
1142         }
1143         return 0;
1144 }
1145
1146 /*
1147    Allocate and initialize new qdisc.
1148
1149    Parameters are passed via opt.
1150  */
1151
1152 static struct Qdisc *qdisc_create(struct net_device *dev,
1153                                   struct netdev_queue *dev_queue,
1154                                   struct Qdisc *p, u32 parent, u32 handle,
1155                                   struct nlattr **tca, int *errp,
1156                                   struct netlink_ext_ack *extack)
1157 {
1158         int err;
1159         struct nlattr *kind = tca[TCA_KIND];
1160         struct Qdisc *sch;
1161         struct Qdisc_ops *ops;
1162         struct qdisc_size_table *stab;
1163
1164         ops = qdisc_lookup_ops(kind);
1165 #ifdef CONFIG_MODULES
1166         if (ops == NULL && kind != NULL) {
1167                 char name[IFNAMSIZ];
1168                 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1169                         /* We dropped the RTNL semaphore in order to
1170                          * perform the module load.  So, even if we
1171                          * succeeded in loading the module we have to
1172                          * tell the caller to replay the request.  We
1173                          * indicate this using -EAGAIN.
1174                          * We replay the request because the device may
1175                          * go away in the mean time.
1176                          */
1177                         rtnl_unlock();
1178                         request_module("sch_%s", name);
1179                         rtnl_lock();
1180                         ops = qdisc_lookup_ops(kind);
1181                         if (ops != NULL) {
1182                                 /* We will try again qdisc_lookup_ops,
1183                                  * so don't keep a reference.
1184                                  */
1185                                 module_put(ops->owner);
1186                                 err = -EAGAIN;
1187                                 goto err_out;
1188                         }
1189                 }
1190         }
1191 #endif
1192
1193         err = -ENOENT;
1194         if (!ops) {
1195                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1196                 goto err_out;
1197         }
1198
1199         sch = qdisc_alloc(dev_queue, ops, extack);
1200         if (IS_ERR(sch)) {
1201                 err = PTR_ERR(sch);
1202                 goto err_out2;
1203         }
1204
1205         sch->parent = parent;
1206
1207         if (handle == TC_H_INGRESS) {
1208                 sch->flags |= TCQ_F_INGRESS;
1209                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1210         } else {
1211                 if (handle == 0) {
1212                         handle = qdisc_alloc_handle(dev);
1213                         if (handle == 0) {
1214                                 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1215                                 err = -ENOSPC;
1216                                 goto err_out3;
1217                         }
1218                 }
1219                 if (!netif_is_multiqueue(dev))
1220                         sch->flags |= TCQ_F_ONETXQUEUE;
1221         }
1222
1223         sch->handle = handle;
1224
1225         /* This exist to keep backward compatible with a userspace
1226          * loophole, what allowed userspace to get IFF_NO_QUEUE
1227          * facility on older kernels by setting tx_queue_len=0 (prior
1228          * to qdisc init), and then forgot to reinit tx_queue_len
1229          * before again attaching a qdisc.
1230          */
1231         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1232                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1233                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1234         }
1235
1236         err = qdisc_block_indexes_set(sch, tca, extack);
1237         if (err)
1238                 goto err_out3;
1239
1240         if (ops->init) {
1241                 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1242                 if (err != 0)
1243                         goto err_out5;
1244         }
1245
1246         if (tca[TCA_STAB]) {
1247                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1248                 if (IS_ERR(stab)) {
1249                         err = PTR_ERR(stab);
1250                         goto err_out4;
1251                 }
1252                 rcu_assign_pointer(sch->stab, stab);
1253         }
1254         if (tca[TCA_RATE]) {
1255                 seqcount_t *running;
1256
1257                 err = -EOPNOTSUPP;
1258                 if (sch->flags & TCQ_F_MQROOT) {
1259                         NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1260                         goto err_out4;
1261                 }
1262
1263                 if (sch->parent != TC_H_ROOT &&
1264                     !(sch->flags & TCQ_F_INGRESS) &&
1265                     (!p || !(p->flags & TCQ_F_MQROOT)))
1266                         running = qdisc_root_sleeping_running(sch);
1267                 else
1268                         running = &sch->running;
1269
1270                 err = gen_new_estimator(&sch->bstats,
1271                                         sch->cpu_bstats,
1272                                         &sch->rate_est,
1273                                         NULL,
1274                                         running,
1275                                         tca[TCA_RATE]);
1276                 if (err) {
1277                         NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1278                         goto err_out4;
1279                 }
1280         }
1281
1282         qdisc_hash_add(sch, false);
1283
1284         return sch;
1285
1286 err_out5:
1287         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1288         if (ops->destroy)
1289                 ops->destroy(sch);
1290 err_out3:
1291         dev_put(dev);
1292         qdisc_free(sch);
1293 err_out2:
1294         module_put(ops->owner);
1295 err_out:
1296         *errp = err;
1297         return NULL;
1298
1299 err_out4:
1300         /*
1301          * Any broken qdiscs that would require a ops->reset() here?
1302          * The qdisc was never in action so it shouldn't be necessary.
1303          */
1304         qdisc_put_stab(rtnl_dereference(sch->stab));
1305         if (ops->destroy)
1306                 ops->destroy(sch);
1307         goto err_out3;
1308 }
1309
1310 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1311                         struct netlink_ext_ack *extack)
1312 {
1313         struct qdisc_size_table *ostab, *stab = NULL;
1314         int err = 0;
1315
1316         if (tca[TCA_OPTIONS]) {
1317                 if (!sch->ops->change) {
1318                         NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1319                         return -EINVAL;
1320                 }
1321                 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1322                         NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1323                         return -EOPNOTSUPP;
1324                 }
1325                 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1326                 if (err)
1327                         return err;
1328         }
1329
1330         if (tca[TCA_STAB]) {
1331                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1332                 if (IS_ERR(stab))
1333                         return PTR_ERR(stab);
1334         }
1335
1336         ostab = rtnl_dereference(sch->stab);
1337         rcu_assign_pointer(sch->stab, stab);
1338         qdisc_put_stab(ostab);
1339
1340         if (tca[TCA_RATE]) {
1341                 /* NB: ignores errors from replace_estimator
1342                    because change can't be undone. */
1343                 if (sch->flags & TCQ_F_MQROOT)
1344                         goto out;
1345                 gen_replace_estimator(&sch->bstats,
1346                                       sch->cpu_bstats,
1347                                       &sch->rate_est,
1348                                       NULL,
1349                                       qdisc_root_sleeping_running(sch),
1350                                       tca[TCA_RATE]);
1351         }
1352 out:
1353         return 0;
1354 }
1355
1356 struct check_loop_arg {
1357         struct qdisc_walker     w;
1358         struct Qdisc            *p;
1359         int                     depth;
1360 };
1361
1362 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1363                          struct qdisc_walker *w);
1364
1365 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1366 {
1367         struct check_loop_arg   arg;
1368
1369         if (q->ops->cl_ops == NULL)
1370                 return 0;
1371
1372         arg.w.stop = arg.w.skip = arg.w.count = 0;
1373         arg.w.fn = check_loop_fn;
1374         arg.depth = depth;
1375         arg.p = p;
1376         q->ops->cl_ops->walk(q, &arg.w);
1377         return arg.w.stop ? -ELOOP : 0;
1378 }
1379
1380 static int
1381 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1382 {
1383         struct Qdisc *leaf;
1384         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1385         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1386
1387         leaf = cops->leaf(q, cl);
1388         if (leaf) {
1389                 if (leaf == arg->p || arg->depth > 7)
1390                         return -ELOOP;
1391                 return check_loop(leaf, arg->p, arg->depth + 1);
1392         }
1393         return 0;
1394 }
1395
1396 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1397         [TCA_KIND]              = { .type = NLA_STRING },
1398         [TCA_RATE]              = { .type = NLA_BINARY,
1399                                     .len = sizeof(struct tc_estimator) },
1400         [TCA_STAB]              = { .type = NLA_NESTED },
1401         [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
1402         [TCA_CHAIN]             = { .type = NLA_U32 },
1403         [TCA_INGRESS_BLOCK]     = { .type = NLA_U32 },
1404         [TCA_EGRESS_BLOCK]      = { .type = NLA_U32 },
1405 };
1406
1407 /*
1408  * Delete/get qdisc.
1409  */
1410
1411 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1412                         struct netlink_ext_ack *extack)
1413 {
1414         struct net *net = sock_net(skb->sk);
1415         struct tcmsg *tcm = nlmsg_data(n);
1416         struct nlattr *tca[TCA_MAX + 1];
1417         struct net_device *dev;
1418         u32 clid;
1419         struct Qdisc *q = NULL;
1420         struct Qdisc *p = NULL;
1421         int err;
1422
1423         if ((n->nlmsg_type != RTM_GETQDISC) &&
1424             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1425                 return -EPERM;
1426
1427         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1428                                      rtm_tca_policy, extack);
1429         if (err < 0)
1430                 return err;
1431
1432         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1433         if (!dev)
1434                 return -ENODEV;
1435
1436         clid = tcm->tcm_parent;
1437         if (clid) {
1438                 if (clid != TC_H_ROOT) {
1439                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1440                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1441                                 if (!p) {
1442                                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1443                                         return -ENOENT;
1444                                 }
1445                                 q = qdisc_leaf(p, clid);
1446                         } else if (dev_ingress_queue(dev)) {
1447                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1448                         }
1449                 } else {
1450                         q = dev->qdisc;
1451                 }
1452                 if (!q) {
1453                         NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1454                         return -ENOENT;
1455                 }
1456
1457                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1458                         NL_SET_ERR_MSG(extack, "Invalid handle");
1459                         return -EINVAL;
1460                 }
1461         } else {
1462                 q = qdisc_lookup(dev, tcm->tcm_handle);
1463                 if (!q) {
1464                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1465                         return -ENOENT;
1466                 }
1467         }
1468
1469         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1470                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1471                 return -EINVAL;
1472         }
1473
1474         if (n->nlmsg_type == RTM_DELQDISC) {
1475                 if (!clid) {
1476                         NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1477                         return -EINVAL;
1478                 }
1479                 if (q->handle == 0) {
1480                         NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1481                         return -ENOENT;
1482                 }
1483                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1484                 if (err != 0)
1485                         return err;
1486         } else {
1487                 qdisc_notify(net, skb, n, clid, NULL, q);
1488         }
1489         return 0;
1490 }
1491
1492 /*
1493  * Create/change qdisc.
1494  */
1495
1496 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1497                            struct netlink_ext_ack *extack)
1498 {
1499         struct net *net = sock_net(skb->sk);
1500         struct tcmsg *tcm;
1501         struct nlattr *tca[TCA_MAX + 1];
1502         struct net_device *dev;
1503         u32 clid;
1504         struct Qdisc *q, *p;
1505         int err;
1506
1507         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1508                 return -EPERM;
1509
1510 replay:
1511         /* Reinit, just in case something touches this. */
1512         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1513                                      rtm_tca_policy, extack);
1514         if (err < 0)
1515                 return err;
1516
1517         tcm = nlmsg_data(n);
1518         clid = tcm->tcm_parent;
1519         q = p = NULL;
1520
1521         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1522         if (!dev)
1523                 return -ENODEV;
1524
1525
1526         if (clid) {
1527                 if (clid != TC_H_ROOT) {
1528                         if (clid != TC_H_INGRESS) {
1529                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1530                                 if (!p) {
1531                                         NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1532                                         return -ENOENT;
1533                                 }
1534                                 q = qdisc_leaf(p, clid);
1535                         } else if (dev_ingress_queue_create(dev)) {
1536                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1537                         }
1538                 } else {
1539                         q = dev->qdisc;
1540                 }
1541
1542                 /* It may be default qdisc, ignore it */
1543                 if (q && q->handle == 0)
1544                         q = NULL;
1545
1546                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1547                         if (tcm->tcm_handle) {
1548                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1549                                         NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1550                                         return -EEXIST;
1551                                 }
1552                                 if (TC_H_MIN(tcm->tcm_handle)) {
1553                                         NL_SET_ERR_MSG(extack, "Invalid minor handle");
1554                                         return -EINVAL;
1555                                 }
1556                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1557                                 if (!q)
1558                                         goto create_n_graft;
1559                                 if (n->nlmsg_flags & NLM_F_EXCL) {
1560                                         NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1561                                         return -EEXIST;
1562                                 }
1563                                 if (tca[TCA_KIND] &&
1564                                     nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1565                                         NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1566                                         return -EINVAL;
1567                                 }
1568                                 if (q == p ||
1569                                     (p && check_loop(q, p, 0))) {
1570                                         NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1571                                         return -ELOOP;
1572                                 }
1573                                 qdisc_refcount_inc(q);
1574                                 goto graft;
1575                         } else {
1576                                 if (!q)
1577                                         goto create_n_graft;
1578
1579                                 /* This magic test requires explanation.
1580                                  *
1581                                  *   We know, that some child q is already
1582                                  *   attached to this parent and have choice:
1583                                  *   either to change it or to create/graft new one.
1584                                  *
1585                                  *   1. We are allowed to create/graft only
1586                                  *   if CREATE and REPLACE flags are set.
1587                                  *
1588                                  *   2. If EXCL is set, requestor wanted to say,
1589                                  *   that qdisc tcm_handle is not expected
1590                                  *   to exist, so that we choose create/graft too.
1591                                  *
1592                                  *   3. The last case is when no flags are set.
1593                                  *   Alas, it is sort of hole in API, we
1594                                  *   cannot decide what to do unambiguously.
1595                                  *   For now we select create/graft, if
1596                                  *   user gave KIND, which does not match existing.
1597                                  */
1598                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1599                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1600                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1601                                      (tca[TCA_KIND] &&
1602                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1603                                         goto create_n_graft;
1604                         }
1605                 }
1606         } else {
1607                 if (!tcm->tcm_handle) {
1608                         NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1609                         return -EINVAL;
1610                 }
1611                 q = qdisc_lookup(dev, tcm->tcm_handle);
1612         }
1613
1614         /* Change qdisc parameters */
1615         if (!q) {
1616                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1617                 return -ENOENT;
1618         }
1619         if (n->nlmsg_flags & NLM_F_EXCL) {
1620                 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1621                 return -EEXIST;
1622         }
1623         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1624                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1625                 return -EINVAL;
1626         }
1627         err = qdisc_change(q, tca, extack);
1628         if (err == 0)
1629                 qdisc_notify(net, skb, n, clid, NULL, q);
1630         return err;
1631
1632 create_n_graft:
1633         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1634                 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1635                 return -ENOENT;
1636         }
1637         if (clid == TC_H_INGRESS) {
1638                 if (dev_ingress_queue(dev)) {
1639                         q = qdisc_create(dev, dev_ingress_queue(dev), p,
1640                                          tcm->tcm_parent, tcm->tcm_parent,
1641                                          tca, &err, extack);
1642                 } else {
1643                         NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1644                         err = -ENOENT;
1645                 }
1646         } else {
1647                 struct netdev_queue *dev_queue;
1648
1649                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1650                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1651                 else if (p)
1652                         dev_queue = p->dev_queue;
1653                 else
1654                         dev_queue = netdev_get_tx_queue(dev, 0);
1655
1656                 q = qdisc_create(dev, dev_queue, p,
1657                                  tcm->tcm_parent, tcm->tcm_handle,
1658                                  tca, &err, extack);
1659         }
1660         if (q == NULL) {
1661                 if (err == -EAGAIN)
1662                         goto replay;
1663                 return err;
1664         }
1665
1666 graft:
1667         err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1668         if (err) {
1669                 if (q)
1670                         qdisc_put(q);
1671                 return err;
1672         }
1673
1674         return 0;
1675 }
1676
1677 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1678                               struct netlink_callback *cb,
1679                               int *q_idx_p, int s_q_idx, bool recur,
1680                               bool dump_invisible)
1681 {
1682         int ret = 0, q_idx = *q_idx_p;
1683         struct Qdisc *q;
1684         int b;
1685
1686         if (!root)
1687                 return 0;
1688
1689         q = root;
1690         if (q_idx < s_q_idx) {
1691                 q_idx++;
1692         } else {
1693                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1694                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1695                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1696                                   RTM_NEWQDISC) <= 0)
1697                         goto done;
1698                 q_idx++;
1699         }
1700
1701         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1702          * itself has already been dumped.
1703          *
1704          * If we've already dumped the top-level (ingress) qdisc above and the global
1705          * qdisc hashtable, we don't want to hit it again
1706          */
1707         if (!qdisc_dev(root) || !recur)
1708                 goto out;
1709
1710         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1711                 if (q_idx < s_q_idx) {
1712                         q_idx++;
1713                         continue;
1714                 }
1715                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1716                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1717                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1718                                   RTM_NEWQDISC) <= 0)
1719                         goto done;
1720                 q_idx++;
1721         }
1722
1723 out:
1724         *q_idx_p = q_idx;
1725         return ret;
1726 done:
1727         ret = -1;
1728         goto out;
1729 }
1730
1731 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1732 {
1733         struct net *net = sock_net(skb->sk);
1734         int idx, q_idx;
1735         int s_idx, s_q_idx;
1736         struct net_device *dev;
1737         const struct nlmsghdr *nlh = cb->nlh;
1738         struct nlattr *tca[TCA_MAX + 1];
1739         int err;
1740
1741         s_idx = cb->args[0];
1742         s_q_idx = q_idx = cb->args[1];
1743
1744         idx = 0;
1745         ASSERT_RTNL();
1746
1747         err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1748                                      rtm_tca_policy, cb->extack);
1749         if (err < 0)
1750                 return err;
1751
1752         for_each_netdev(net, dev) {
1753                 struct netdev_queue *dev_queue;
1754
1755                 if (idx < s_idx)
1756                         goto cont;
1757                 if (idx > s_idx)
1758                         s_q_idx = 0;
1759                 q_idx = 0;
1760
1761                 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1762                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1763                         goto done;
1764
1765                 dev_queue = dev_ingress_queue(dev);
1766                 if (dev_queue &&
1767                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1768                                        &q_idx, s_q_idx, false,
1769                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1770                         goto done;
1771
1772 cont:
1773                 idx++;
1774         }
1775
1776 done:
1777         cb->args[0] = idx;
1778         cb->args[1] = q_idx;
1779
1780         return skb->len;
1781 }
1782
1783
1784
1785 /************************************************
1786  *      Traffic classes manipulation.           *
1787  ************************************************/
1788
1789 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1790                           unsigned long cl,
1791                           u32 portid, u32 seq, u16 flags, int event)
1792 {
1793         struct tcmsg *tcm;
1794         struct nlmsghdr  *nlh;
1795         unsigned char *b = skb_tail_pointer(skb);
1796         struct gnet_dump d;
1797         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1798
1799         cond_resched();
1800         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1801         if (!nlh)
1802                 goto out_nlmsg_trim;
1803         tcm = nlmsg_data(nlh);
1804         tcm->tcm_family = AF_UNSPEC;
1805         tcm->tcm__pad1 = 0;
1806         tcm->tcm__pad2 = 0;
1807         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1808         tcm->tcm_parent = q->handle;
1809         tcm->tcm_handle = q->handle;
1810         tcm->tcm_info = 0;
1811         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1812                 goto nla_put_failure;
1813         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1814                 goto nla_put_failure;
1815
1816         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1817                                          NULL, &d, TCA_PAD) < 0)
1818                 goto nla_put_failure;
1819
1820         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1821                 goto nla_put_failure;
1822
1823         if (gnet_stats_finish_copy(&d) < 0)
1824                 goto nla_put_failure;
1825
1826         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1827         return skb->len;
1828
1829 out_nlmsg_trim:
1830 nla_put_failure:
1831         nlmsg_trim(skb, b);
1832         return -1;
1833 }
1834
1835 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1836                          struct nlmsghdr *n, struct Qdisc *q,
1837                          unsigned long cl, int event)
1838 {
1839         struct sk_buff *skb;
1840         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1841         int err = 0;
1842
1843         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1844         if (!skb)
1845                 return -ENOBUFS;
1846
1847         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1848                 kfree_skb(skb);
1849                 return -EINVAL;
1850         }
1851
1852         err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1853                              n->nlmsg_flags & NLM_F_ECHO);
1854         if (err > 0)
1855                 err = 0;
1856         return err;
1857 }
1858
1859 static int tclass_del_notify(struct net *net,
1860                              const struct Qdisc_class_ops *cops,
1861                              struct sk_buff *oskb, struct nlmsghdr *n,
1862                              struct Qdisc *q, unsigned long cl)
1863 {
1864         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1865         struct sk_buff *skb;
1866         int err = 0;
1867
1868         if (!cops->delete)
1869                 return -EOPNOTSUPP;
1870
1871         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1872         if (!skb)
1873                 return -ENOBUFS;
1874
1875         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1876                            RTM_DELTCLASS) < 0) {
1877                 kfree_skb(skb);
1878                 return -EINVAL;
1879         }
1880
1881         err = cops->delete(q, cl);
1882         if (err) {
1883                 kfree_skb(skb);
1884                 return err;
1885         }
1886
1887         err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1888                              n->nlmsg_flags & NLM_F_ECHO);
1889         if (err > 0)
1890                 err = 0;
1891         return err;
1892 }
1893
1894 #ifdef CONFIG_NET_CLS
1895
1896 struct tcf_bind_args {
1897         struct tcf_walker w;
1898         u32 classid;
1899         unsigned long cl;
1900 };
1901
1902 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1903 {
1904         struct tcf_bind_args *a = (void *)arg;
1905
1906         if (tp->ops->bind_class) {
1907                 struct Qdisc *q = tcf_block_q(tp->chain->block);
1908
1909                 sch_tree_lock(q);
1910                 tp->ops->bind_class(n, a->classid, a->cl);
1911                 sch_tree_unlock(q);
1912         }
1913         return 0;
1914 }
1915
1916 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1917                            unsigned long new_cl)
1918 {
1919         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1920         struct tcf_block *block;
1921         struct tcf_chain *chain;
1922         unsigned long cl;
1923
1924         cl = cops->find(q, portid);
1925         if (!cl)
1926                 return;
1927         block = cops->tcf_block(q, cl, NULL);
1928         if (!block)
1929                 return;
1930         for (chain = tcf_get_next_chain(block, NULL);
1931              chain;
1932              chain = tcf_get_next_chain(block, chain)) {
1933                 struct tcf_proto *tp;
1934
1935                 for (tp = tcf_get_next_proto(chain, NULL, true);
1936                      tp; tp = tcf_get_next_proto(chain, tp, true)) {
1937                         struct tcf_bind_args arg = {};
1938
1939                         arg.w.fn = tcf_node_bind;
1940                         arg.classid = clid;
1941                         arg.cl = new_cl;
1942                         tp->ops->walk(tp, &arg.w, true);
1943                 }
1944         }
1945 }
1946
1947 #else
1948
1949 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1950                            unsigned long new_cl)
1951 {
1952 }
1953
1954 #endif
1955
1956 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1957                          struct netlink_ext_ack *extack)
1958 {
1959         struct net *net = sock_net(skb->sk);
1960         struct tcmsg *tcm = nlmsg_data(n);
1961         struct nlattr *tca[TCA_MAX + 1];
1962         struct net_device *dev;
1963         struct Qdisc *q = NULL;
1964         const struct Qdisc_class_ops *cops;
1965         unsigned long cl = 0;
1966         unsigned long new_cl;
1967         u32 portid;
1968         u32 clid;
1969         u32 qid;
1970         int err;
1971
1972         if ((n->nlmsg_type != RTM_GETTCLASS) &&
1973             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1974                 return -EPERM;
1975
1976         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1977                                      rtm_tca_policy, extack);
1978         if (err < 0)
1979                 return err;
1980
1981         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1982         if (!dev)
1983                 return -ENODEV;
1984
1985         /*
1986            parent == TC_H_UNSPEC - unspecified parent.
1987            parent == TC_H_ROOT   - class is root, which has no parent.
1988            parent == X:0         - parent is root class.
1989            parent == X:Y         - parent is a node in hierarchy.
1990            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
1991
1992            handle == 0:0         - generate handle from kernel pool.
1993            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
1994            handle == X:Y         - clear.
1995            handle == X:0         - root class.
1996          */
1997
1998         /* Step 1. Determine qdisc handle X:0 */
1999
2000         portid = tcm->tcm_parent;
2001         clid = tcm->tcm_handle;
2002         qid = TC_H_MAJ(clid);
2003
2004         if (portid != TC_H_ROOT) {
2005                 u32 qid1 = TC_H_MAJ(portid);
2006
2007                 if (qid && qid1) {
2008                         /* If both majors are known, they must be identical. */
2009                         if (qid != qid1)
2010                                 return -EINVAL;
2011                 } else if (qid1) {
2012                         qid = qid1;
2013                 } else if (qid == 0)
2014                         qid = dev->qdisc->handle;
2015
2016                 /* Now qid is genuine qdisc handle consistent
2017                  * both with parent and child.
2018                  *
2019                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
2020                  */
2021                 if (portid)
2022                         portid = TC_H_MAKE(qid, portid);
2023         } else {
2024                 if (qid == 0)
2025                         qid = dev->qdisc->handle;
2026         }
2027
2028         /* OK. Locate qdisc */
2029         q = qdisc_lookup(dev, qid);
2030         if (!q)
2031                 return -ENOENT;
2032
2033         /* An check that it supports classes */
2034         cops = q->ops->cl_ops;
2035         if (cops == NULL)
2036                 return -EINVAL;
2037
2038         /* Now try to get class */
2039         if (clid == 0) {
2040                 if (portid == TC_H_ROOT)
2041                         clid = qid;
2042         } else
2043                 clid = TC_H_MAKE(qid, clid);
2044
2045         if (clid)
2046                 cl = cops->find(q, clid);
2047
2048         if (cl == 0) {
2049                 err = -ENOENT;
2050                 if (n->nlmsg_type != RTM_NEWTCLASS ||
2051                     !(n->nlmsg_flags & NLM_F_CREATE))
2052                         goto out;
2053         } else {
2054                 switch (n->nlmsg_type) {
2055                 case RTM_NEWTCLASS:
2056                         err = -EEXIST;
2057                         if (n->nlmsg_flags & NLM_F_EXCL)
2058                                 goto out;
2059                         break;
2060                 case RTM_DELTCLASS:
2061                         err = tclass_del_notify(net, cops, skb, n, q, cl);
2062                         /* Unbind the class with flilters with 0 */
2063                         tc_bind_tclass(q, portid, clid, 0);
2064                         goto out;
2065                 case RTM_GETTCLASS:
2066                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2067                         goto out;
2068                 default:
2069                         err = -EINVAL;
2070                         goto out;
2071                 }
2072         }
2073
2074         if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2075                 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2076                 return -EOPNOTSUPP;
2077         }
2078
2079         new_cl = cl;
2080         err = -EOPNOTSUPP;
2081         if (cops->change)
2082                 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2083         if (err == 0) {
2084                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2085                 /* We just create a new class, need to do reverse binding. */
2086                 if (cl != new_cl)
2087                         tc_bind_tclass(q, portid, clid, new_cl);
2088         }
2089 out:
2090         return err;
2091 }
2092
2093 struct qdisc_dump_args {
2094         struct qdisc_walker     w;
2095         struct sk_buff          *skb;
2096         struct netlink_callback *cb;
2097 };
2098
2099 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2100                             struct qdisc_walker *arg)
2101 {
2102         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2103
2104         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2105                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2106                               RTM_NEWTCLASS);
2107 }
2108
2109 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2110                                 struct tcmsg *tcm, struct netlink_callback *cb,
2111                                 int *t_p, int s_t)
2112 {
2113         struct qdisc_dump_args arg;
2114
2115         if (tc_qdisc_dump_ignore(q, false) ||
2116             *t_p < s_t || !q->ops->cl_ops ||
2117             (tcm->tcm_parent &&
2118              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2119                 (*t_p)++;
2120                 return 0;
2121         }
2122         if (*t_p > s_t)
2123                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2124         arg.w.fn = qdisc_class_dump;
2125         arg.skb = skb;
2126         arg.cb = cb;
2127         arg.w.stop  = 0;
2128         arg.w.skip = cb->args[1];
2129         arg.w.count = 0;
2130         q->ops->cl_ops->walk(q, &arg.w);
2131         cb->args[1] = arg.w.count;
2132         if (arg.w.stop)
2133                 return -1;
2134         (*t_p)++;
2135         return 0;
2136 }
2137
2138 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2139                                struct tcmsg *tcm, struct netlink_callback *cb,
2140                                int *t_p, int s_t)
2141 {
2142         struct Qdisc *q;
2143         int b;
2144
2145         if (!root)
2146                 return 0;
2147
2148         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2149                 return -1;
2150
2151         if (!qdisc_dev(root))
2152                 return 0;
2153
2154         if (tcm->tcm_parent) {
2155                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2156                 if (q && q != root &&
2157                     tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2158                         return -1;
2159                 return 0;
2160         }
2161         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2162                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2163                         return -1;
2164         }
2165
2166         return 0;
2167 }
2168
2169 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2170 {
2171         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2172         struct net *net = sock_net(skb->sk);
2173         struct netdev_queue *dev_queue;
2174         struct net_device *dev;
2175         int t, s_t;
2176
2177         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2178                 return 0;
2179         dev = dev_get_by_index(net, tcm->tcm_ifindex);
2180         if (!dev)
2181                 return 0;
2182
2183         s_t = cb->args[0];
2184         t = 0;
2185
2186         if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
2187                 goto done;
2188
2189         dev_queue = dev_ingress_queue(dev);
2190         if (dev_queue &&
2191             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2192                                 &t, s_t) < 0)
2193                 goto done;
2194
2195 done:
2196         cb->args[0] = t;
2197
2198         dev_put(dev);
2199         return skb->len;
2200 }
2201
2202 #ifdef CONFIG_PROC_FS
2203 static int psched_show(struct seq_file *seq, void *v)
2204 {
2205         seq_printf(seq, "%08x %08x %08x %08x\n",
2206                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2207                    1000000,
2208                    (u32)NSEC_PER_SEC / hrtimer_resolution);
2209
2210         return 0;
2211 }
2212
2213 static int __net_init psched_net_init(struct net *net)
2214 {
2215         struct proc_dir_entry *e;
2216
2217         e = proc_create_single("psched", 0, net->proc_net, psched_show);
2218         if (e == NULL)
2219                 return -ENOMEM;
2220
2221         return 0;
2222 }
2223
2224 static void __net_exit psched_net_exit(struct net *net)
2225 {
2226         remove_proc_entry("psched", net->proc_net);
2227 }
2228 #else
2229 static int __net_init psched_net_init(struct net *net)
2230 {
2231         return 0;
2232 }
2233
2234 static void __net_exit psched_net_exit(struct net *net)
2235 {
2236 }
2237 #endif
2238
2239 static struct pernet_operations psched_net_ops = {
2240         .init = psched_net_init,
2241         .exit = psched_net_exit,
2242 };
2243
2244 static int __init pktsched_init(void)
2245 {
2246         int err;
2247
2248         err = register_pernet_subsys(&psched_net_ops);
2249         if (err) {
2250                 pr_err("pktsched_init: "
2251                        "cannot initialize per netns operations\n");
2252                 return err;
2253         }
2254
2255         register_qdisc(&pfifo_fast_ops);
2256         register_qdisc(&pfifo_qdisc_ops);
2257         register_qdisc(&bfifo_qdisc_ops);
2258         register_qdisc(&pfifo_head_drop_qdisc_ops);
2259         register_qdisc(&mq_qdisc_ops);
2260         register_qdisc(&noqueue_qdisc_ops);
2261
2262         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2263         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2264         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2265                       0);
2266         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2267         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2268         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2269                       0);
2270
2271         return 0;
2272 }
2273
2274 subsys_initcall(pktsched_init);