]> asedeno.scripts.mit.edu Git - linux.git/blob - net/sched/cls_api.c
Merge tag 'tty-5.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
[linux.git] / net / sched / cls_api.c
1 /*
2  * net/sched/cls_api.c  Packet classifier API.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  *
13  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
14  *
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/err.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/kmod.h>
26 #include <linux/slab.h>
27 #include <linux/idr.h>
28 #include <linux/rhashtable.h>
29 #include <net/net_namespace.h>
30 #include <net/sock.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
33 #include <net/pkt_cls.h>
34
35 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
36
37 /* The list of all installed classifier types */
38 static LIST_HEAD(tcf_proto_base);
39
40 /* Protects list of registered TC modules. It is pure SMP lock. */
41 static DEFINE_RWLOCK(cls_mod_lock);
42
43 /* Find classifier type by string name */
44
45 static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
46 {
47         const struct tcf_proto_ops *t, *res = NULL;
48
49         if (kind) {
50                 read_lock(&cls_mod_lock);
51                 list_for_each_entry(t, &tcf_proto_base, head) {
52                         if (strcmp(kind, t->kind) == 0) {
53                                 if (try_module_get(t->owner))
54                                         res = t;
55                                 break;
56                         }
57                 }
58                 read_unlock(&cls_mod_lock);
59         }
60         return res;
61 }
62
63 static const struct tcf_proto_ops *
64 tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
65 {
66         const struct tcf_proto_ops *ops;
67
68         ops = __tcf_proto_lookup_ops(kind);
69         if (ops)
70                 return ops;
71 #ifdef CONFIG_MODULES
72         rtnl_unlock();
73         request_module("cls_%s", kind);
74         rtnl_lock();
75         ops = __tcf_proto_lookup_ops(kind);
76         /* We dropped the RTNL semaphore in order to perform
77          * the module load. So, even if we succeeded in loading
78          * the module we have to replay the request. We indicate
79          * this using -EAGAIN.
80          */
81         if (ops) {
82                 module_put(ops->owner);
83                 return ERR_PTR(-EAGAIN);
84         }
85 #endif
86         NL_SET_ERR_MSG(extack, "TC classifier not found");
87         return ERR_PTR(-ENOENT);
88 }
89
90 /* Register(unregister) new classifier type */
91
92 int register_tcf_proto_ops(struct tcf_proto_ops *ops)
93 {
94         struct tcf_proto_ops *t;
95         int rc = -EEXIST;
96
97         write_lock(&cls_mod_lock);
98         list_for_each_entry(t, &tcf_proto_base, head)
99                 if (!strcmp(ops->kind, t->kind))
100                         goto out;
101
102         list_add_tail(&ops->head, &tcf_proto_base);
103         rc = 0;
104 out:
105         write_unlock(&cls_mod_lock);
106         return rc;
107 }
108 EXPORT_SYMBOL(register_tcf_proto_ops);
109
110 static struct workqueue_struct *tc_filter_wq;
111
112 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
113 {
114         struct tcf_proto_ops *t;
115         int rc = -ENOENT;
116
117         /* Wait for outstanding call_rcu()s, if any, from a
118          * tcf_proto_ops's destroy() handler.
119          */
120         rcu_barrier();
121         flush_workqueue(tc_filter_wq);
122
123         write_lock(&cls_mod_lock);
124         list_for_each_entry(t, &tcf_proto_base, head) {
125                 if (t == ops) {
126                         list_del(&t->head);
127                         rc = 0;
128                         break;
129                 }
130         }
131         write_unlock(&cls_mod_lock);
132         return rc;
133 }
134 EXPORT_SYMBOL(unregister_tcf_proto_ops);
135
136 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
137 {
138         INIT_RCU_WORK(rwork, func);
139         return queue_rcu_work(tc_filter_wq, rwork);
140 }
141 EXPORT_SYMBOL(tcf_queue_work);
142
143 /* Select new prio value from the range, managed by kernel. */
144
145 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
146 {
147         u32 first = TC_H_MAKE(0xC0000000U, 0U);
148
149         if (tp)
150                 first = tp->prio - 1;
151
152         return TC_H_MAJ(first);
153 }
154
155 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
156                                           u32 prio, struct tcf_chain *chain,
157                                           struct netlink_ext_ack *extack)
158 {
159         struct tcf_proto *tp;
160         int err;
161
162         tp = kzalloc(sizeof(*tp), GFP_KERNEL);
163         if (!tp)
164                 return ERR_PTR(-ENOBUFS);
165
166         tp->ops = tcf_proto_lookup_ops(kind, extack);
167         if (IS_ERR(tp->ops)) {
168                 err = PTR_ERR(tp->ops);
169                 goto errout;
170         }
171         tp->classify = tp->ops->classify;
172         tp->protocol = protocol;
173         tp->prio = prio;
174         tp->chain = chain;
175
176         err = tp->ops->init(tp);
177         if (err) {
178                 module_put(tp->ops->owner);
179                 goto errout;
180         }
181         return tp;
182
183 errout:
184         kfree(tp);
185         return ERR_PTR(err);
186 }
187
188 static void tcf_proto_destroy(struct tcf_proto *tp,
189                               struct netlink_ext_ack *extack)
190 {
191         tp->ops->destroy(tp, extack);
192         module_put(tp->ops->owner);
193         kfree_rcu(tp, rcu);
194 }
195
196 struct tcf_filter_chain_list_item {
197         struct list_head list;
198         tcf_chain_head_change_t *chain_head_change;
199         void *chain_head_change_priv;
200 };
201
202 static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
203                                           u32 chain_index)
204 {
205         struct tcf_chain *chain;
206
207         chain = kzalloc(sizeof(*chain), GFP_KERNEL);
208         if (!chain)
209                 return NULL;
210         list_add_tail(&chain->list, &block->chain_list);
211         chain->block = block;
212         chain->index = chain_index;
213         chain->refcnt = 1;
214         if (!chain->index)
215                 block->chain0.chain = chain;
216         return chain;
217 }
218
219 static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
220                                        struct tcf_proto *tp_head)
221 {
222         if (item->chain_head_change)
223                 item->chain_head_change(tp_head, item->chain_head_change_priv);
224 }
225
226 static void tcf_chain0_head_change(struct tcf_chain *chain,
227                                    struct tcf_proto *tp_head)
228 {
229         struct tcf_filter_chain_list_item *item;
230         struct tcf_block *block = chain->block;
231
232         if (chain->index)
233                 return;
234         list_for_each_entry(item, &block->chain0.filter_chain_list, list)
235                 tcf_chain_head_change_item(item, tp_head);
236 }
237
238 static void tcf_chain_destroy(struct tcf_chain *chain)
239 {
240         struct tcf_block *block = chain->block;
241
242         list_del(&chain->list);
243         if (!chain->index)
244                 block->chain0.chain = NULL;
245         kfree(chain);
246         if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
247                 kfree_rcu(block, rcu);
248 }
249
250 static void tcf_chain_hold(struct tcf_chain *chain)
251 {
252         ++chain->refcnt;
253 }
254
255 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
256 {
257         /* In case all the references are action references, this
258          * chain should not be shown to the user.
259          */
260         return chain->refcnt == chain->action_refcnt;
261 }
262
263 static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
264                                           u32 chain_index)
265 {
266         struct tcf_chain *chain;
267
268         list_for_each_entry(chain, &block->chain_list, list) {
269                 if (chain->index == chain_index)
270                         return chain;
271         }
272         return NULL;
273 }
274
275 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
276                            u32 seq, u16 flags, int event, bool unicast);
277
278 static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
279                                          u32 chain_index, bool create,
280                                          bool by_act)
281 {
282         struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
283
284         if (chain) {
285                 tcf_chain_hold(chain);
286         } else {
287                 if (!create)
288                         return NULL;
289                 chain = tcf_chain_create(block, chain_index);
290                 if (!chain)
291                         return NULL;
292         }
293
294         if (by_act)
295                 ++chain->action_refcnt;
296
297         /* Send notification only in case we got the first
298          * non-action reference. Until then, the chain acts only as
299          * a placeholder for actions pointing to it and user ought
300          * not know about them.
301          */
302         if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
303                 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
304                                 RTM_NEWCHAIN, false);
305
306         return chain;
307 }
308
309 static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
310                                        bool create)
311 {
312         return __tcf_chain_get(block, chain_index, create, false);
313 }
314
315 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
316 {
317         return __tcf_chain_get(block, chain_index, true, true);
318 }
319 EXPORT_SYMBOL(tcf_chain_get_by_act);
320
321 static void tc_chain_tmplt_del(struct tcf_chain *chain);
322
323 static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
324 {
325         if (by_act)
326                 chain->action_refcnt--;
327         chain->refcnt--;
328
329         /* The last dropped non-action reference will trigger notification. */
330         if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
331                 tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
332
333         if (chain->refcnt == 0) {
334                 tc_chain_tmplt_del(chain);
335                 tcf_chain_destroy(chain);
336         }
337 }
338
339 static void tcf_chain_put(struct tcf_chain *chain)
340 {
341         __tcf_chain_put(chain, false);
342 }
343
344 void tcf_chain_put_by_act(struct tcf_chain *chain)
345 {
346         __tcf_chain_put(chain, true);
347 }
348 EXPORT_SYMBOL(tcf_chain_put_by_act);
349
350 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
351 {
352         if (chain->explicitly_created)
353                 tcf_chain_put(chain);
354 }
355
356 static void tcf_chain_flush(struct tcf_chain *chain)
357 {
358         struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
359
360         tcf_chain0_head_change(chain, NULL);
361         while (tp) {
362                 RCU_INIT_POINTER(chain->filter_chain, tp->next);
363                 tcf_proto_destroy(tp, NULL);
364                 tp = rtnl_dereference(chain->filter_chain);
365                 tcf_chain_put(chain);
366         }
367 }
368
369 static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
370 {
371         const struct Qdisc_class_ops *cops;
372         struct Qdisc *qdisc;
373
374         if (!dev_ingress_queue(dev))
375                 return NULL;
376
377         qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
378         if (!qdisc)
379                 return NULL;
380
381         cops = qdisc->ops->cl_ops;
382         if (!cops)
383                 return NULL;
384
385         if (!cops->tcf_block)
386                 return NULL;
387
388         return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
389 }
390
391 static struct rhashtable indr_setup_block_ht;
392
393 struct tc_indr_block_dev {
394         struct rhash_head ht_node;
395         struct net_device *dev;
396         unsigned int refcnt;
397         struct list_head cb_list;
398         struct tcf_block *block;
399 };
400
401 struct tc_indr_block_cb {
402         struct list_head list;
403         void *cb_priv;
404         tc_indr_block_bind_cb_t *cb;
405         void *cb_ident;
406 };
407
408 static const struct rhashtable_params tc_indr_setup_block_ht_params = {
409         .key_offset     = offsetof(struct tc_indr_block_dev, dev),
410         .head_offset    = offsetof(struct tc_indr_block_dev, ht_node),
411         .key_len        = sizeof(struct net_device *),
412 };
413
414 static struct tc_indr_block_dev *
415 tc_indr_block_dev_lookup(struct net_device *dev)
416 {
417         return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
418                                       tc_indr_setup_block_ht_params);
419 }
420
421 static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
422 {
423         struct tc_indr_block_dev *indr_dev;
424
425         indr_dev = tc_indr_block_dev_lookup(dev);
426         if (indr_dev)
427                 goto inc_ref;
428
429         indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
430         if (!indr_dev)
431                 return NULL;
432
433         INIT_LIST_HEAD(&indr_dev->cb_list);
434         indr_dev->dev = dev;
435         indr_dev->block = tc_dev_ingress_block(dev);
436         if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
437                                    tc_indr_setup_block_ht_params)) {
438                 kfree(indr_dev);
439                 return NULL;
440         }
441
442 inc_ref:
443         indr_dev->refcnt++;
444         return indr_dev;
445 }
446
447 static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
448 {
449         if (--indr_dev->refcnt)
450                 return;
451
452         rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
453                                tc_indr_setup_block_ht_params);
454         kfree(indr_dev);
455 }
456
457 static struct tc_indr_block_cb *
458 tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
459                         tc_indr_block_bind_cb_t *cb, void *cb_ident)
460 {
461         struct tc_indr_block_cb *indr_block_cb;
462
463         list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
464                 if (indr_block_cb->cb == cb &&
465                     indr_block_cb->cb_ident == cb_ident)
466                         return indr_block_cb;
467         return NULL;
468 }
469
470 static struct tc_indr_block_cb *
471 tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
472                      tc_indr_block_bind_cb_t *cb, void *cb_ident)
473 {
474         struct tc_indr_block_cb *indr_block_cb;
475
476         indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
477         if (indr_block_cb)
478                 return ERR_PTR(-EEXIST);
479
480         indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
481         if (!indr_block_cb)
482                 return ERR_PTR(-ENOMEM);
483
484         indr_block_cb->cb_priv = cb_priv;
485         indr_block_cb->cb = cb;
486         indr_block_cb->cb_ident = cb_ident;
487         list_add(&indr_block_cb->list, &indr_dev->cb_list);
488
489         return indr_block_cb;
490 }
491
492 static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
493 {
494         list_del(&indr_block_cb->list);
495         kfree(indr_block_cb);
496 }
497
498 static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
499                                   struct tc_indr_block_cb *indr_block_cb,
500                                   enum tc_block_command command)
501 {
502         struct tc_block_offload bo = {
503                 .command        = command,
504                 .binder_type    = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
505                 .block          = indr_dev->block,
506         };
507
508         if (!indr_dev->block)
509                 return;
510
511         indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
512                           &bo);
513 }
514
515 int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
516                                 tc_indr_block_bind_cb_t *cb, void *cb_ident)
517 {
518         struct tc_indr_block_cb *indr_block_cb;
519         struct tc_indr_block_dev *indr_dev;
520         int err;
521
522         indr_dev = tc_indr_block_dev_get(dev);
523         if (!indr_dev)
524                 return -ENOMEM;
525
526         indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
527         err = PTR_ERR_OR_ZERO(indr_block_cb);
528         if (err)
529                 goto err_dev_put;
530
531         tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_BIND);
532         return 0;
533
534 err_dev_put:
535         tc_indr_block_dev_put(indr_dev);
536         return err;
537 }
538 EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
539
540 int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
541                               tc_indr_block_bind_cb_t *cb, void *cb_ident)
542 {
543         int err;
544
545         rtnl_lock();
546         err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
547         rtnl_unlock();
548
549         return err;
550 }
551 EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
552
553 void __tc_indr_block_cb_unregister(struct net_device *dev,
554                                    tc_indr_block_bind_cb_t *cb, void *cb_ident)
555 {
556         struct tc_indr_block_cb *indr_block_cb;
557         struct tc_indr_block_dev *indr_dev;
558
559         indr_dev = tc_indr_block_dev_lookup(dev);
560         if (!indr_dev)
561                 return;
562
563         indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
564         if (!indr_block_cb)
565                 return;
566
567         /* Send unbind message if required to free any block cbs. */
568         tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND);
569         tc_indr_block_cb_del(indr_block_cb);
570         tc_indr_block_dev_put(indr_dev);
571 }
572 EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
573
574 void tc_indr_block_cb_unregister(struct net_device *dev,
575                                  tc_indr_block_bind_cb_t *cb, void *cb_ident)
576 {
577         rtnl_lock();
578         __tc_indr_block_cb_unregister(dev, cb, cb_ident);
579         rtnl_unlock();
580 }
581 EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
582
583 static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
584                                struct tcf_block_ext_info *ei,
585                                enum tc_block_command command,
586                                struct netlink_ext_ack *extack)
587 {
588         struct tc_indr_block_cb *indr_block_cb;
589         struct tc_indr_block_dev *indr_dev;
590         struct tc_block_offload bo = {
591                 .command        = command,
592                 .binder_type    = ei->binder_type,
593                 .block          = block,
594                 .extack         = extack,
595         };
596
597         indr_dev = tc_indr_block_dev_lookup(dev);
598         if (!indr_dev)
599                 return;
600
601         indr_dev->block = command == TC_BLOCK_BIND ? block : NULL;
602
603         list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
604                 indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
605                                   &bo);
606 }
607
608 static bool tcf_block_offload_in_use(struct tcf_block *block)
609 {
610         return block->offloadcnt;
611 }
612
613 static int tcf_block_offload_cmd(struct tcf_block *block,
614                                  struct net_device *dev,
615                                  struct tcf_block_ext_info *ei,
616                                  enum tc_block_command command,
617                                  struct netlink_ext_ack *extack)
618 {
619         struct tc_block_offload bo = {};
620
621         bo.command = command;
622         bo.binder_type = ei->binder_type;
623         bo.block = block;
624         bo.extack = extack;
625         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
626 }
627
628 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
629                                   struct tcf_block_ext_info *ei,
630                                   struct netlink_ext_ack *extack)
631 {
632         struct net_device *dev = q->dev_queue->dev;
633         int err;
634
635         if (!dev->netdev_ops->ndo_setup_tc)
636                 goto no_offload_dev_inc;
637
638         /* If tc offload feature is disabled and the block we try to bind
639          * to already has some offloaded filters, forbid to bind.
640          */
641         if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
642                 NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
643                 return -EOPNOTSUPP;
644         }
645
646         err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
647         if (err == -EOPNOTSUPP)
648                 goto no_offload_dev_inc;
649         if (err)
650                 return err;
651
652         tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
653         return 0;
654
655 no_offload_dev_inc:
656         if (tcf_block_offload_in_use(block))
657                 return -EOPNOTSUPP;
658         block->nooffloaddevcnt++;
659         tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
660         return 0;
661 }
662
663 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
664                                      struct tcf_block_ext_info *ei)
665 {
666         struct net_device *dev = q->dev_queue->dev;
667         int err;
668
669         tc_indr_block_call(block, dev, ei, TC_BLOCK_UNBIND, NULL);
670
671         if (!dev->netdev_ops->ndo_setup_tc)
672                 goto no_offload_dev_dec;
673         err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
674         if (err == -EOPNOTSUPP)
675                 goto no_offload_dev_dec;
676         return;
677
678 no_offload_dev_dec:
679         WARN_ON(block->nooffloaddevcnt-- == 0);
680 }
681
682 static int
683 tcf_chain0_head_change_cb_add(struct tcf_block *block,
684                               struct tcf_block_ext_info *ei,
685                               struct netlink_ext_ack *extack)
686 {
687         struct tcf_chain *chain0 = block->chain0.chain;
688         struct tcf_filter_chain_list_item *item;
689
690         item = kmalloc(sizeof(*item), GFP_KERNEL);
691         if (!item) {
692                 NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
693                 return -ENOMEM;
694         }
695         item->chain_head_change = ei->chain_head_change;
696         item->chain_head_change_priv = ei->chain_head_change_priv;
697         if (chain0 && chain0->filter_chain)
698                 tcf_chain_head_change_item(item, chain0->filter_chain);
699         list_add(&item->list, &block->chain0.filter_chain_list);
700         return 0;
701 }
702
703 static void
704 tcf_chain0_head_change_cb_del(struct tcf_block *block,
705                               struct tcf_block_ext_info *ei)
706 {
707         struct tcf_chain *chain0 = block->chain0.chain;
708         struct tcf_filter_chain_list_item *item;
709
710         list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
711                 if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
712                     (item->chain_head_change == ei->chain_head_change &&
713                      item->chain_head_change_priv == ei->chain_head_change_priv)) {
714                         if (chain0)
715                                 tcf_chain_head_change_item(item, NULL);
716                         list_del(&item->list);
717                         kfree(item);
718                         return;
719                 }
720         }
721         WARN_ON(1);
722 }
723
724 struct tcf_net {
725         spinlock_t idr_lock; /* Protects idr */
726         struct idr idr;
727 };
728
729 static unsigned int tcf_net_id;
730
731 static int tcf_block_insert(struct tcf_block *block, struct net *net,
732                             struct netlink_ext_ack *extack)
733 {
734         struct tcf_net *tn = net_generic(net, tcf_net_id);
735         int err;
736
737         idr_preload(GFP_KERNEL);
738         spin_lock(&tn->idr_lock);
739         err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
740                             GFP_NOWAIT);
741         spin_unlock(&tn->idr_lock);
742         idr_preload_end();
743
744         return err;
745 }
746
747 static void tcf_block_remove(struct tcf_block *block, struct net *net)
748 {
749         struct tcf_net *tn = net_generic(net, tcf_net_id);
750
751         spin_lock(&tn->idr_lock);
752         idr_remove(&tn->idr, block->index);
753         spin_unlock(&tn->idr_lock);
754 }
755
756 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
757                                           u32 block_index,
758                                           struct netlink_ext_ack *extack)
759 {
760         struct tcf_block *block;
761
762         block = kzalloc(sizeof(*block), GFP_KERNEL);
763         if (!block) {
764                 NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
765                 return ERR_PTR(-ENOMEM);
766         }
767         INIT_LIST_HEAD(&block->chain_list);
768         INIT_LIST_HEAD(&block->cb_list);
769         INIT_LIST_HEAD(&block->owner_list);
770         INIT_LIST_HEAD(&block->chain0.filter_chain_list);
771
772         refcount_set(&block->refcnt, 1);
773         block->net = net;
774         block->index = block_index;
775
776         /* Don't store q pointer for blocks which are shared */
777         if (!tcf_block_shared(block))
778                 block->q = q;
779         return block;
780 }
781
782 static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
783 {
784         struct tcf_net *tn = net_generic(net, tcf_net_id);
785
786         return idr_find(&tn->idr, block_index);
787 }
788
789 static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
790 {
791         struct tcf_block *block;
792
793         rcu_read_lock();
794         block = tcf_block_lookup(net, block_index);
795         if (block && !refcount_inc_not_zero(&block->refcnt))
796                 block = NULL;
797         rcu_read_unlock();
798
799         return block;
800 }
801
802 static void tcf_block_flush_all_chains(struct tcf_block *block)
803 {
804         struct tcf_chain *chain;
805
806         /* Hold a refcnt for all chains, so that they don't disappear
807          * while we are iterating.
808          */
809         list_for_each_entry(chain, &block->chain_list, list)
810                 tcf_chain_hold(chain);
811
812         list_for_each_entry(chain, &block->chain_list, list)
813                 tcf_chain_flush(chain);
814 }
815
816 static void tcf_block_put_all_chains(struct tcf_block *block)
817 {
818         struct tcf_chain *chain, *tmp;
819
820         /* At this point, all the chains should have refcnt >= 1. */
821         list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
822                 tcf_chain_put_explicitly_created(chain);
823                 tcf_chain_put(chain);
824         }
825 }
826
827 static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
828                             struct tcf_block_ext_info *ei)
829 {
830         if (refcount_dec_and_test(&block->refcnt)) {
831                 /* Flushing/putting all chains will cause the block to be
832                  * deallocated when last chain is freed. However, if chain_list
833                  * is empty, block has to be manually deallocated. After block
834                  * reference counter reached 0, it is no longer possible to
835                  * increment it or add new chains to block.
836                  */
837                 bool free_block = list_empty(&block->chain_list);
838
839                 if (tcf_block_shared(block))
840                         tcf_block_remove(block, block->net);
841                 if (!free_block)
842                         tcf_block_flush_all_chains(block);
843
844                 if (q)
845                         tcf_block_offload_unbind(block, q, ei);
846
847                 if (free_block)
848                         kfree_rcu(block, rcu);
849                 else
850                         tcf_block_put_all_chains(block);
851         } else if (q) {
852                 tcf_block_offload_unbind(block, q, ei);
853         }
854 }
855
856 static void tcf_block_refcnt_put(struct tcf_block *block)
857 {
858         __tcf_block_put(block, NULL, NULL);
859 }
860
861 /* Find tcf block.
862  * Set q, parent, cl when appropriate.
863  */
864
865 static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
866                                         u32 *parent, unsigned long *cl,
867                                         int ifindex, u32 block_index,
868                                         struct netlink_ext_ack *extack)
869 {
870         struct tcf_block *block;
871         int err = 0;
872
873         if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
874                 block = tcf_block_refcnt_get(net, block_index);
875                 if (!block) {
876                         NL_SET_ERR_MSG(extack, "Block of given index was not found");
877                         return ERR_PTR(-EINVAL);
878                 }
879         } else {
880                 const struct Qdisc_class_ops *cops;
881                 struct net_device *dev;
882
883                 rcu_read_lock();
884
885                 /* Find link */
886                 dev = dev_get_by_index_rcu(net, ifindex);
887                 if (!dev) {
888                         rcu_read_unlock();
889                         return ERR_PTR(-ENODEV);
890                 }
891
892                 /* Find qdisc */
893                 if (!*parent) {
894                         *q = dev->qdisc;
895                         *parent = (*q)->handle;
896                 } else {
897                         *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
898                         if (!*q) {
899                                 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
900                                 err = -EINVAL;
901                                 goto errout_rcu;
902                         }
903                 }
904
905                 *q = qdisc_refcount_inc_nz(*q);
906                 if (!*q) {
907                         NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
908                         err = -EINVAL;
909                         goto errout_rcu;
910                 }
911
912                 /* Is it classful? */
913                 cops = (*q)->ops->cl_ops;
914                 if (!cops) {
915                         NL_SET_ERR_MSG(extack, "Qdisc not classful");
916                         err = -EINVAL;
917                         goto errout_rcu;
918                 }
919
920                 if (!cops->tcf_block) {
921                         NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
922                         err = -EOPNOTSUPP;
923                         goto errout_rcu;
924                 }
925
926                 /* At this point we know that qdisc is not noop_qdisc,
927                  * which means that qdisc holds a reference to net_device
928                  * and we hold a reference to qdisc, so it is safe to release
929                  * rcu read lock.
930                  */
931                 rcu_read_unlock();
932
933                 /* Do we search for filter, attached to class? */
934                 if (TC_H_MIN(*parent)) {
935                         *cl = cops->find(*q, *parent);
936                         if (*cl == 0) {
937                                 NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
938                                 err = -ENOENT;
939                                 goto errout_qdisc;
940                         }
941                 }
942
943                 /* And the last stroke */
944                 block = cops->tcf_block(*q, *cl, extack);
945                 if (!block) {
946                         err = -EINVAL;
947                         goto errout_qdisc;
948                 }
949                 if (tcf_block_shared(block)) {
950                         NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
951                         err = -EOPNOTSUPP;
952                         goto errout_qdisc;
953                 }
954
955                 /* Always take reference to block in order to support execution
956                  * of rules update path of cls API without rtnl lock. Caller
957                  * must release block when it is finished using it. 'if' block
958                  * of this conditional obtain reference to block by calling
959                  * tcf_block_refcnt_get().
960                  */
961                 refcount_inc(&block->refcnt);
962         }
963
964         return block;
965
966 errout_rcu:
967         rcu_read_unlock();
968 errout_qdisc:
969         if (*q) {
970                 qdisc_put(*q);
971                 *q = NULL;
972         }
973         return ERR_PTR(err);
974 }
975
976 static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
977 {
978         if (!IS_ERR_OR_NULL(block))
979                 tcf_block_refcnt_put(block);
980
981         if (q)
982                 qdisc_put(q);
983 }
984
985 struct tcf_block_owner_item {
986         struct list_head list;
987         struct Qdisc *q;
988         enum tcf_block_binder_type binder_type;
989 };
990
991 static void
992 tcf_block_owner_netif_keep_dst(struct tcf_block *block,
993                                struct Qdisc *q,
994                                enum tcf_block_binder_type binder_type)
995 {
996         if (block->keep_dst &&
997             binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
998             binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
999                 netif_keep_dst(qdisc_dev(q));
1000 }
1001
1002 void tcf_block_netif_keep_dst(struct tcf_block *block)
1003 {
1004         struct tcf_block_owner_item *item;
1005
1006         block->keep_dst = true;
1007         list_for_each_entry(item, &block->owner_list, list)
1008                 tcf_block_owner_netif_keep_dst(block, item->q,
1009                                                item->binder_type);
1010 }
1011 EXPORT_SYMBOL(tcf_block_netif_keep_dst);
1012
1013 static int tcf_block_owner_add(struct tcf_block *block,
1014                                struct Qdisc *q,
1015                                enum tcf_block_binder_type binder_type)
1016 {
1017         struct tcf_block_owner_item *item;
1018
1019         item = kmalloc(sizeof(*item), GFP_KERNEL);
1020         if (!item)
1021                 return -ENOMEM;
1022         item->q = q;
1023         item->binder_type = binder_type;
1024         list_add(&item->list, &block->owner_list);
1025         return 0;
1026 }
1027
1028 static void tcf_block_owner_del(struct tcf_block *block,
1029                                 struct Qdisc *q,
1030                                 enum tcf_block_binder_type binder_type)
1031 {
1032         struct tcf_block_owner_item *item;
1033
1034         list_for_each_entry(item, &block->owner_list, list) {
1035                 if (item->q == q && item->binder_type == binder_type) {
1036                         list_del(&item->list);
1037                         kfree(item);
1038                         return;
1039                 }
1040         }
1041         WARN_ON(1);
1042 }
1043
1044 int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
1045                       struct tcf_block_ext_info *ei,
1046                       struct netlink_ext_ack *extack)
1047 {
1048         struct net *net = qdisc_net(q);
1049         struct tcf_block *block = NULL;
1050         int err;
1051
1052         if (ei->block_index)
1053                 /* block_index not 0 means the shared block is requested */
1054                 block = tcf_block_refcnt_get(net, ei->block_index);
1055
1056         if (!block) {
1057                 block = tcf_block_create(net, q, ei->block_index, extack);
1058                 if (IS_ERR(block))
1059                         return PTR_ERR(block);
1060                 if (tcf_block_shared(block)) {
1061                         err = tcf_block_insert(block, net, extack);
1062                         if (err)
1063                                 goto err_block_insert;
1064                 }
1065         }
1066
1067         err = tcf_block_owner_add(block, q, ei->binder_type);
1068         if (err)
1069                 goto err_block_owner_add;
1070
1071         tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
1072
1073         err = tcf_chain0_head_change_cb_add(block, ei, extack);
1074         if (err)
1075                 goto err_chain0_head_change_cb_add;
1076
1077         err = tcf_block_offload_bind(block, q, ei, extack);
1078         if (err)
1079                 goto err_block_offload_bind;
1080
1081         *p_block = block;
1082         return 0;
1083
1084 err_block_offload_bind:
1085         tcf_chain0_head_change_cb_del(block, ei);
1086 err_chain0_head_change_cb_add:
1087         tcf_block_owner_del(block, q, ei->binder_type);
1088 err_block_owner_add:
1089 err_block_insert:
1090         tcf_block_refcnt_put(block);
1091         return err;
1092 }
1093 EXPORT_SYMBOL(tcf_block_get_ext);
1094
1095 static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
1096 {
1097         struct tcf_proto __rcu **p_filter_chain = priv;
1098
1099         rcu_assign_pointer(*p_filter_chain, tp_head);
1100 }
1101
1102 int tcf_block_get(struct tcf_block **p_block,
1103                   struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
1104                   struct netlink_ext_ack *extack)
1105 {
1106         struct tcf_block_ext_info ei = {
1107                 .chain_head_change = tcf_chain_head_change_dflt,
1108                 .chain_head_change_priv = p_filter_chain,
1109         };
1110
1111         WARN_ON(!p_filter_chain);
1112         return tcf_block_get_ext(p_block, q, &ei, extack);
1113 }
1114 EXPORT_SYMBOL(tcf_block_get);
1115
1116 /* XXX: Standalone actions are not allowed to jump to any chain, and bound
1117  * actions should be all removed after flushing.
1118  */
1119 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1120                        struct tcf_block_ext_info *ei)
1121 {
1122         if (!block)
1123                 return;
1124         tcf_chain0_head_change_cb_del(block, ei);
1125         tcf_block_owner_del(block, q, ei->binder_type);
1126
1127         __tcf_block_put(block, q, ei);
1128 }
1129 EXPORT_SYMBOL(tcf_block_put_ext);
1130
1131 void tcf_block_put(struct tcf_block *block)
1132 {
1133         struct tcf_block_ext_info ei = {0, };
1134
1135         if (!block)
1136                 return;
1137         tcf_block_put_ext(block, block->q, &ei);
1138 }
1139
1140 EXPORT_SYMBOL(tcf_block_put);
1141
1142 struct tcf_block_cb {
1143         struct list_head list;
1144         tc_setup_cb_t *cb;
1145         void *cb_ident;
1146         void *cb_priv;
1147         unsigned int refcnt;
1148 };
1149
1150 void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
1151 {
1152         return block_cb->cb_priv;
1153 }
1154 EXPORT_SYMBOL(tcf_block_cb_priv);
1155
1156 struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
1157                                          tc_setup_cb_t *cb, void *cb_ident)
1158 {       struct tcf_block_cb *block_cb;
1159
1160         list_for_each_entry(block_cb, &block->cb_list, list)
1161                 if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
1162                         return block_cb;
1163         return NULL;
1164 }
1165 EXPORT_SYMBOL(tcf_block_cb_lookup);
1166
1167 void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
1168 {
1169         block_cb->refcnt++;
1170 }
1171 EXPORT_SYMBOL(tcf_block_cb_incref);
1172
1173 unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
1174 {
1175         return --block_cb->refcnt;
1176 }
1177 EXPORT_SYMBOL(tcf_block_cb_decref);
1178
1179 static int
1180 tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
1181                             void *cb_priv, bool add, bool offload_in_use,
1182                             struct netlink_ext_ack *extack)
1183 {
1184         struct tcf_chain *chain;
1185         struct tcf_proto *tp;
1186         int err;
1187
1188         list_for_each_entry(chain, &block->chain_list, list) {
1189                 for (tp = rtnl_dereference(chain->filter_chain); tp;
1190                      tp = rtnl_dereference(tp->next)) {
1191                         if (tp->ops->reoffload) {
1192                                 err = tp->ops->reoffload(tp, add, cb, cb_priv,
1193                                                          extack);
1194                                 if (err && add)
1195                                         goto err_playback_remove;
1196                         } else if (add && offload_in_use) {
1197                                 err = -EOPNOTSUPP;
1198                                 NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
1199                                 goto err_playback_remove;
1200                         }
1201                 }
1202         }
1203
1204         return 0;
1205
1206 err_playback_remove:
1207         tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
1208                                     extack);
1209         return err;
1210 }
1211
1212 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
1213                                              tc_setup_cb_t *cb, void *cb_ident,
1214                                              void *cb_priv,
1215                                              struct netlink_ext_ack *extack)
1216 {
1217         struct tcf_block_cb *block_cb;
1218         int err;
1219
1220         /* Replay any already present rules */
1221         err = tcf_block_playback_offloads(block, cb, cb_priv, true,
1222                                           tcf_block_offload_in_use(block),
1223                                           extack);
1224         if (err)
1225                 return ERR_PTR(err);
1226
1227         block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
1228         if (!block_cb)
1229                 return ERR_PTR(-ENOMEM);
1230         block_cb->cb = cb;
1231         block_cb->cb_ident = cb_ident;
1232         block_cb->cb_priv = cb_priv;
1233         list_add(&block_cb->list, &block->cb_list);
1234         return block_cb;
1235 }
1236 EXPORT_SYMBOL(__tcf_block_cb_register);
1237
1238 int tcf_block_cb_register(struct tcf_block *block,
1239                           tc_setup_cb_t *cb, void *cb_ident,
1240                           void *cb_priv, struct netlink_ext_ack *extack)
1241 {
1242         struct tcf_block_cb *block_cb;
1243
1244         block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
1245                                            extack);
1246         return PTR_ERR_OR_ZERO(block_cb);
1247 }
1248 EXPORT_SYMBOL(tcf_block_cb_register);
1249
1250 void __tcf_block_cb_unregister(struct tcf_block *block,
1251                                struct tcf_block_cb *block_cb)
1252 {
1253         tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
1254                                     false, tcf_block_offload_in_use(block),
1255                                     NULL);
1256         list_del(&block_cb->list);
1257         kfree(block_cb);
1258 }
1259 EXPORT_SYMBOL(__tcf_block_cb_unregister);
1260
1261 void tcf_block_cb_unregister(struct tcf_block *block,
1262                              tc_setup_cb_t *cb, void *cb_ident)
1263 {
1264         struct tcf_block_cb *block_cb;
1265
1266         block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
1267         if (!block_cb)
1268                 return;
1269         __tcf_block_cb_unregister(block, block_cb);
1270 }
1271 EXPORT_SYMBOL(tcf_block_cb_unregister);
1272
1273 /* Main classifier routine: scans classifier chain attached
1274  * to this qdisc, (optionally) tests for protocol and asks
1275  * specific classifiers.
1276  */
1277 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1278                  struct tcf_result *res, bool compat_mode)
1279 {
1280 #ifdef CONFIG_NET_CLS_ACT
1281         const int max_reclassify_loop = 4;
1282         const struct tcf_proto *orig_tp = tp;
1283         const struct tcf_proto *first_tp;
1284         int limit = 0;
1285
1286 reclassify:
1287 #endif
1288         for (; tp; tp = rcu_dereference_bh(tp->next)) {
1289                 __be16 protocol = tc_skb_protocol(skb);
1290                 int err;
1291
1292                 if (tp->protocol != protocol &&
1293                     tp->protocol != htons(ETH_P_ALL))
1294                         continue;
1295
1296                 err = tp->classify(skb, tp, res);
1297 #ifdef CONFIG_NET_CLS_ACT
1298                 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1299                         first_tp = orig_tp;
1300                         goto reset;
1301                 } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1302                         first_tp = res->goto_tp;
1303                         goto reset;
1304                 }
1305 #endif
1306                 if (err >= 0)
1307                         return err;
1308         }
1309
1310         return TC_ACT_UNSPEC; /* signal: continue lookup */
1311 #ifdef CONFIG_NET_CLS_ACT
1312 reset:
1313         if (unlikely(limit++ >= max_reclassify_loop)) {
1314                 net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
1315                                        tp->chain->block->index,
1316                                        tp->prio & 0xffff,
1317                                        ntohs(tp->protocol));
1318                 return TC_ACT_SHOT;
1319         }
1320
1321         tp = first_tp;
1322         goto reclassify;
1323 #endif
1324 }
1325 EXPORT_SYMBOL(tcf_classify);
1326
1327 struct tcf_chain_info {
1328         struct tcf_proto __rcu **pprev;
1329         struct tcf_proto __rcu *next;
1330 };
1331
1332 static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
1333 {
1334         return rtnl_dereference(*chain_info->pprev);
1335 }
1336
1337 static void tcf_chain_tp_insert(struct tcf_chain *chain,
1338                                 struct tcf_chain_info *chain_info,
1339                                 struct tcf_proto *tp)
1340 {
1341         if (*chain_info->pprev == chain->filter_chain)
1342                 tcf_chain0_head_change(chain, tp);
1343         RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
1344         rcu_assign_pointer(*chain_info->pprev, tp);
1345         tcf_chain_hold(chain);
1346 }
1347
1348 static void tcf_chain_tp_remove(struct tcf_chain *chain,
1349                                 struct tcf_chain_info *chain_info,
1350                                 struct tcf_proto *tp)
1351 {
1352         struct tcf_proto *next = rtnl_dereference(chain_info->next);
1353
1354         if (tp == chain->filter_chain)
1355                 tcf_chain0_head_change(chain, next);
1356         RCU_INIT_POINTER(*chain_info->pprev, next);
1357         tcf_chain_put(chain);
1358 }
1359
1360 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1361                                            struct tcf_chain_info *chain_info,
1362                                            u32 protocol, u32 prio,
1363                                            bool prio_allocate)
1364 {
1365         struct tcf_proto **pprev;
1366         struct tcf_proto *tp;
1367
1368         /* Check the chain for existence of proto-tcf with this priority */
1369         for (pprev = &chain->filter_chain;
1370              (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
1371                 if (tp->prio >= prio) {
1372                         if (tp->prio == prio) {
1373                                 if (prio_allocate ||
1374                                     (tp->protocol != protocol && protocol))
1375                                         return ERR_PTR(-EINVAL);
1376                         } else {
1377                                 tp = NULL;
1378                         }
1379                         break;
1380                 }
1381         }
1382         chain_info->pprev = pprev;
1383         chain_info->next = tp ? tp->next : NULL;
1384         return tp;
1385 }
1386
1387 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1388                          struct tcf_proto *tp, struct tcf_block *block,
1389                          struct Qdisc *q, u32 parent, void *fh,
1390                          u32 portid, u32 seq, u16 flags, int event)
1391 {
1392         struct tcmsg *tcm;
1393         struct nlmsghdr  *nlh;
1394         unsigned char *b = skb_tail_pointer(skb);
1395
1396         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1397         if (!nlh)
1398                 goto out_nlmsg_trim;
1399         tcm = nlmsg_data(nlh);
1400         tcm->tcm_family = AF_UNSPEC;
1401         tcm->tcm__pad1 = 0;
1402         tcm->tcm__pad2 = 0;
1403         if (q) {
1404                 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1405                 tcm->tcm_parent = parent;
1406         } else {
1407                 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
1408                 tcm->tcm_block_index = block->index;
1409         }
1410         tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
1411         if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
1412                 goto nla_put_failure;
1413         if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
1414                 goto nla_put_failure;
1415         if (!fh) {
1416                 tcm->tcm_handle = 0;
1417         } else {
1418                 if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
1419                         goto nla_put_failure;
1420         }
1421         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1422         return skb->len;
1423
1424 out_nlmsg_trim:
1425 nla_put_failure:
1426         nlmsg_trim(skb, b);
1427         return -1;
1428 }
1429
1430 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
1431                           struct nlmsghdr *n, struct tcf_proto *tp,
1432                           struct tcf_block *block, struct Qdisc *q,
1433                           u32 parent, void *fh, int event, bool unicast)
1434 {
1435         struct sk_buff *skb;
1436         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1437
1438         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1439         if (!skb)
1440                 return -ENOBUFS;
1441
1442         if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1443                           n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
1444                 kfree_skb(skb);
1445                 return -EINVAL;
1446         }
1447
1448         if (unicast)
1449                 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1450
1451         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1452                               n->nlmsg_flags & NLM_F_ECHO);
1453 }
1454
1455 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
1456                               struct nlmsghdr *n, struct tcf_proto *tp,
1457                               struct tcf_block *block, struct Qdisc *q,
1458                               u32 parent, void *fh, bool unicast, bool *last,
1459                               struct netlink_ext_ack *extack)
1460 {
1461         struct sk_buff *skb;
1462         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1463         int err;
1464
1465         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1466         if (!skb)
1467                 return -ENOBUFS;
1468
1469         if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1470                           n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
1471                 NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1472                 kfree_skb(skb);
1473                 return -EINVAL;
1474         }
1475
1476         err = tp->ops->delete(tp, fh, last, extack);
1477         if (err) {
1478                 kfree_skb(skb);
1479                 return err;
1480         }
1481
1482         if (unicast)
1483                 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1484
1485         err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1486                              n->nlmsg_flags & NLM_F_ECHO);
1487         if (err < 0)
1488                 NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1489         return err;
1490 }
1491
1492 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1493                                  struct tcf_block *block, struct Qdisc *q,
1494                                  u32 parent, struct nlmsghdr *n,
1495                                  struct tcf_chain *chain, int event)
1496 {
1497         struct tcf_proto *tp;
1498
1499         for (tp = rtnl_dereference(chain->filter_chain);
1500              tp; tp = rtnl_dereference(tp->next))
1501                 tfilter_notify(net, oskb, n, tp, block,
1502                                q, parent, NULL, event, false);
1503 }
1504
1505 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1506                           struct netlink_ext_ack *extack)
1507 {
1508         struct net *net = sock_net(skb->sk);
1509         struct nlattr *tca[TCA_MAX + 1];
1510         struct tcmsg *t;
1511         u32 protocol;
1512         u32 prio;
1513         bool prio_allocate;
1514         u32 parent;
1515         u32 chain_index;
1516         struct Qdisc *q = NULL;
1517         struct tcf_chain_info chain_info;
1518         struct tcf_chain *chain = NULL;
1519         struct tcf_block *block;
1520         struct tcf_proto *tp;
1521         unsigned long cl;
1522         void *fh;
1523         int err;
1524         int tp_created;
1525
1526         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1527                 return -EPERM;
1528
1529 replay:
1530         tp_created = 0;
1531
1532         err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1533         if (err < 0)
1534                 return err;
1535
1536         t = nlmsg_data(n);
1537         protocol = TC_H_MIN(t->tcm_info);
1538         prio = TC_H_MAJ(t->tcm_info);
1539         prio_allocate = false;
1540         parent = t->tcm_parent;
1541         cl = 0;
1542
1543         if (prio == 0) {
1544                 /* If no priority is provided by the user,
1545                  * we allocate one.
1546                  */
1547                 if (n->nlmsg_flags & NLM_F_CREATE) {
1548                         prio = TC_H_MAKE(0x80000000U, 0U);
1549                         prio_allocate = true;
1550                 } else {
1551                         NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
1552                         return -ENOENT;
1553                 }
1554         }
1555
1556         /* Find head of filter chain. */
1557
1558         block = tcf_block_find(net, &q, &parent, &cl,
1559                                t->tcm_ifindex, t->tcm_block_index, extack);
1560         if (IS_ERR(block)) {
1561                 err = PTR_ERR(block);
1562                 goto errout;
1563         }
1564
1565         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
1566         if (chain_index > TC_ACT_EXT_VAL_MASK) {
1567                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
1568                 err = -EINVAL;
1569                 goto errout;
1570         }
1571         chain = tcf_chain_get(block, chain_index, true);
1572         if (!chain) {
1573                 NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
1574                 err = -ENOMEM;
1575                 goto errout;
1576         }
1577
1578         tp = tcf_chain_tp_find(chain, &chain_info, protocol,
1579                                prio, prio_allocate);
1580         if (IS_ERR(tp)) {
1581                 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1582                 err = PTR_ERR(tp);
1583                 goto errout;
1584         }
1585
1586         if (tp == NULL) {
1587                 /* Proto-tcf does not exist, create new one */
1588
1589                 if (tca[TCA_KIND] == NULL || !protocol) {
1590                         NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
1591                         err = -EINVAL;
1592                         goto errout;
1593                 }
1594
1595                 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1596                         NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
1597                         err = -ENOENT;
1598                         goto errout;
1599                 }
1600
1601                 if (prio_allocate)
1602                         prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
1603
1604                 tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
1605                                       protocol, prio, chain, extack);
1606                 if (IS_ERR(tp)) {
1607                         err = PTR_ERR(tp);
1608                         goto errout;
1609                 }
1610                 tp_created = 1;
1611         } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1612                 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
1613                 err = -EINVAL;
1614                 goto errout;
1615         }
1616
1617         fh = tp->ops->get(tp, t->tcm_handle);
1618
1619         if (!fh) {
1620                 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1621                         NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
1622                         err = -ENOENT;
1623                         goto errout;
1624                 }
1625         } else if (n->nlmsg_flags & NLM_F_EXCL) {
1626                 NL_SET_ERR_MSG(extack, "Filter already exists");
1627                 err = -EEXIST;
1628                 goto errout;
1629         }
1630
1631         if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
1632                 NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
1633                 err = -EINVAL;
1634                 goto errout;
1635         }
1636
1637         err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
1638                               n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
1639                               extack);
1640         if (err == 0) {
1641                 if (tp_created)
1642                         tcf_chain_tp_insert(chain, &chain_info, tp);
1643                 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1644                                RTM_NEWTFILTER, false);
1645         } else {
1646                 if (tp_created)
1647                         tcf_proto_destroy(tp, NULL);
1648         }
1649
1650 errout:
1651         if (chain)
1652                 tcf_chain_put(chain);
1653         tcf_block_release(q, block);
1654         if (err == -EAGAIN)
1655                 /* Replay the request. */
1656                 goto replay;
1657         return err;
1658 }
1659
1660 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1661                           struct netlink_ext_ack *extack)
1662 {
1663         struct net *net = sock_net(skb->sk);
1664         struct nlattr *tca[TCA_MAX + 1];
1665         struct tcmsg *t;
1666         u32 protocol;
1667         u32 prio;
1668         u32 parent;
1669         u32 chain_index;
1670         struct Qdisc *q = NULL;
1671         struct tcf_chain_info chain_info;
1672         struct tcf_chain *chain = NULL;
1673         struct tcf_block *block;
1674         struct tcf_proto *tp = NULL;
1675         unsigned long cl = 0;
1676         void *fh = NULL;
1677         int err;
1678
1679         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1680                 return -EPERM;
1681
1682         err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1683         if (err < 0)
1684                 return err;
1685
1686         t = nlmsg_data(n);
1687         protocol = TC_H_MIN(t->tcm_info);
1688         prio = TC_H_MAJ(t->tcm_info);
1689         parent = t->tcm_parent;
1690
1691         if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
1692                 NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
1693                 return -ENOENT;
1694         }
1695
1696         /* Find head of filter chain. */
1697
1698         block = tcf_block_find(net, &q, &parent, &cl,
1699                                t->tcm_ifindex, t->tcm_block_index, extack);
1700         if (IS_ERR(block)) {
1701                 err = PTR_ERR(block);
1702                 goto errout;
1703         }
1704
1705         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
1706         if (chain_index > TC_ACT_EXT_VAL_MASK) {
1707                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
1708                 err = -EINVAL;
1709                 goto errout;
1710         }
1711         chain = tcf_chain_get(block, chain_index, false);
1712         if (!chain) {
1713                 /* User requested flush on non-existent chain. Nothing to do,
1714                  * so just return success.
1715                  */
1716                 if (prio == 0) {
1717                         err = 0;
1718                         goto errout;
1719                 }
1720                 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
1721                 err = -ENOENT;
1722                 goto errout;
1723         }
1724
1725         if (prio == 0) {
1726                 tfilter_notify_chain(net, skb, block, q, parent, n,
1727                                      chain, RTM_DELTFILTER);
1728                 tcf_chain_flush(chain);
1729                 err = 0;
1730                 goto errout;
1731         }
1732
1733         tp = tcf_chain_tp_find(chain, &chain_info, protocol,
1734                                prio, false);
1735         if (!tp || IS_ERR(tp)) {
1736                 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1737                 err = tp ? PTR_ERR(tp) : -ENOENT;
1738                 goto errout;
1739         } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1740                 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
1741                 err = -EINVAL;
1742                 goto errout;
1743         }
1744
1745         fh = tp->ops->get(tp, t->tcm_handle);
1746
1747         if (!fh) {
1748                 if (t->tcm_handle == 0) {
1749                         tcf_chain_tp_remove(chain, &chain_info, tp);
1750                         tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1751                                        RTM_DELTFILTER, false);
1752                         tcf_proto_destroy(tp, extack);
1753                         err = 0;
1754                 } else {
1755                         NL_SET_ERR_MSG(extack, "Specified filter handle not found");
1756                         err = -ENOENT;
1757                 }
1758         } else {
1759                 bool last;
1760
1761                 err = tfilter_del_notify(net, skb, n, tp, block,
1762                                          q, parent, fh, false, &last,
1763                                          extack);
1764                 if (err)
1765                         goto errout;
1766                 if (last) {
1767                         tcf_chain_tp_remove(chain, &chain_info, tp);
1768                         tcf_proto_destroy(tp, extack);
1769                 }
1770         }
1771
1772 errout:
1773         if (chain)
1774                 tcf_chain_put(chain);
1775         tcf_block_release(q, block);
1776         return err;
1777 }
1778
1779 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1780                           struct netlink_ext_ack *extack)
1781 {
1782         struct net *net = sock_net(skb->sk);
1783         struct nlattr *tca[TCA_MAX + 1];
1784         struct tcmsg *t;
1785         u32 protocol;
1786         u32 prio;
1787         u32 parent;
1788         u32 chain_index;
1789         struct Qdisc *q = NULL;
1790         struct tcf_chain_info chain_info;
1791         struct tcf_chain *chain = NULL;
1792         struct tcf_block *block;
1793         struct tcf_proto *tp = NULL;
1794         unsigned long cl = 0;
1795         void *fh = NULL;
1796         int err;
1797
1798         err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1799         if (err < 0)
1800                 return err;
1801
1802         t = nlmsg_data(n);
1803         protocol = TC_H_MIN(t->tcm_info);
1804         prio = TC_H_MAJ(t->tcm_info);
1805         parent = t->tcm_parent;
1806
1807         if (prio == 0) {
1808                 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
1809                 return -ENOENT;
1810         }
1811
1812         /* Find head of filter chain. */
1813
1814         block = tcf_block_find(net, &q, &parent, &cl,
1815                                t->tcm_ifindex, t->tcm_block_index, extack);
1816         if (IS_ERR(block)) {
1817                 err = PTR_ERR(block);
1818                 goto errout;
1819         }
1820
1821         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
1822         if (chain_index > TC_ACT_EXT_VAL_MASK) {
1823                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
1824                 err = -EINVAL;
1825                 goto errout;
1826         }
1827         chain = tcf_chain_get(block, chain_index, false);
1828         if (!chain) {
1829                 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
1830                 err = -EINVAL;
1831                 goto errout;
1832         }
1833
1834         tp = tcf_chain_tp_find(chain, &chain_info, protocol,
1835                                prio, false);
1836         if (!tp || IS_ERR(tp)) {
1837                 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1838                 err = tp ? PTR_ERR(tp) : -ENOENT;
1839                 goto errout;
1840         } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1841                 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
1842                 err = -EINVAL;
1843                 goto errout;
1844         }
1845
1846         fh = tp->ops->get(tp, t->tcm_handle);
1847
1848         if (!fh) {
1849                 NL_SET_ERR_MSG(extack, "Specified filter handle not found");
1850                 err = -ENOENT;
1851         } else {
1852                 err = tfilter_notify(net, skb, n, tp, block, q, parent,
1853                                      fh, RTM_NEWTFILTER, true);
1854                 if (err < 0)
1855                         NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
1856         }
1857
1858 errout:
1859         if (chain)
1860                 tcf_chain_put(chain);
1861         tcf_block_release(q, block);
1862         return err;
1863 }
1864
1865 struct tcf_dump_args {
1866         struct tcf_walker w;
1867         struct sk_buff *skb;
1868         struct netlink_callback *cb;
1869         struct tcf_block *block;
1870         struct Qdisc *q;
1871         u32 parent;
1872 };
1873
1874 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1875 {
1876         struct tcf_dump_args *a = (void *)arg;
1877         struct net *net = sock_net(a->skb->sk);
1878
1879         return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
1880                              n, NETLINK_CB(a->cb->skb).portid,
1881                              a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1882                              RTM_NEWTFILTER);
1883 }
1884
1885 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
1886                            struct sk_buff *skb, struct netlink_callback *cb,
1887                            long index_start, long *p_index)
1888 {
1889         struct net *net = sock_net(skb->sk);
1890         struct tcf_block *block = chain->block;
1891         struct tcmsg *tcm = nlmsg_data(cb->nlh);
1892         struct tcf_dump_args arg;
1893         struct tcf_proto *tp;
1894
1895         for (tp = rtnl_dereference(chain->filter_chain);
1896              tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
1897                 if (*p_index < index_start)
1898                         continue;
1899                 if (TC_H_MAJ(tcm->tcm_info) &&
1900                     TC_H_MAJ(tcm->tcm_info) != tp->prio)
1901                         continue;
1902                 if (TC_H_MIN(tcm->tcm_info) &&
1903                     TC_H_MIN(tcm->tcm_info) != tp->protocol)
1904                         continue;
1905                 if (*p_index > index_start)
1906                         memset(&cb->args[1], 0,
1907                                sizeof(cb->args) - sizeof(cb->args[0]));
1908                 if (cb->args[1] == 0) {
1909                         if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
1910                                           NETLINK_CB(cb->skb).portid,
1911                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1912                                           RTM_NEWTFILTER) <= 0)
1913                                 return false;
1914
1915                         cb->args[1] = 1;
1916                 }
1917                 if (!tp->ops->walk)
1918                         continue;
1919                 arg.w.fn = tcf_node_dump;
1920                 arg.skb = skb;
1921                 arg.cb = cb;
1922                 arg.block = block;
1923                 arg.q = q;
1924                 arg.parent = parent;
1925                 arg.w.stop = 0;
1926                 arg.w.skip = cb->args[1] - 1;
1927                 arg.w.count = 0;
1928                 arg.w.cookie = cb->args[2];
1929                 tp->ops->walk(tp, &arg.w);
1930                 cb->args[2] = arg.w.cookie;
1931                 cb->args[1] = arg.w.count + 1;
1932                 if (arg.w.stop)
1933                         return false;
1934         }
1935         return true;
1936 }
1937
1938 /* called with RTNL */
1939 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
1940 {
1941         struct net *net = sock_net(skb->sk);
1942         struct nlattr *tca[TCA_MAX + 1];
1943         struct Qdisc *q = NULL;
1944         struct tcf_block *block;
1945         struct tcf_chain *chain;
1946         struct tcmsg *tcm = nlmsg_data(cb->nlh);
1947         long index_start;
1948         long index;
1949         u32 parent;
1950         int err;
1951
1952         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1953                 return skb->len;
1954
1955         err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL,
1956                           cb->extack);
1957         if (err)
1958                 return err;
1959
1960         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1961                 block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
1962                 if (!block)
1963                         goto out;
1964                 /* If we work with block index, q is NULL and parent value
1965                  * will never be used in the following code. The check
1966                  * in tcf_fill_node prevents it. However, compiler does not
1967                  * see that far, so set parent to zero to silence the warning
1968                  * about parent being uninitialized.
1969                  */
1970                 parent = 0;
1971         } else {
1972                 const struct Qdisc_class_ops *cops;
1973                 struct net_device *dev;
1974                 unsigned long cl = 0;
1975
1976                 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1977                 if (!dev)
1978                         return skb->len;
1979
1980                 parent = tcm->tcm_parent;
1981                 if (!parent) {
1982                         q = dev->qdisc;
1983                         parent = q->handle;
1984                 } else {
1985                         q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
1986                 }
1987                 if (!q)
1988                         goto out;
1989                 cops = q->ops->cl_ops;
1990                 if (!cops)
1991                         goto out;
1992                 if (!cops->tcf_block)
1993                         goto out;
1994                 if (TC_H_MIN(tcm->tcm_parent)) {
1995                         cl = cops->find(q, tcm->tcm_parent);
1996                         if (cl == 0)
1997                                 goto out;
1998                 }
1999                 block = cops->tcf_block(q, cl, NULL);
2000                 if (!block)
2001                         goto out;
2002                 if (tcf_block_shared(block))
2003                         q = NULL;
2004         }
2005
2006         index_start = cb->args[0];
2007         index = 0;
2008
2009         list_for_each_entry(chain, &block->chain_list, list) {
2010                 if (tca[TCA_CHAIN] &&
2011                     nla_get_u32(tca[TCA_CHAIN]) != chain->index)
2012                         continue;
2013                 if (!tcf_chain_dump(chain, q, parent, skb, cb,
2014                                     index_start, &index)) {
2015                         err = -EMSGSIZE;
2016                         break;
2017                 }
2018         }
2019
2020         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2021                 tcf_block_refcnt_put(block);
2022         cb->args[0] = index;
2023
2024 out:
2025         /* If we did no progress, the error (EMSGSIZE) is real */
2026         if (skb->len == 0 && err)
2027                 return err;
2028         return skb->len;
2029 }
2030
2031 static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
2032                               struct sk_buff *skb, struct tcf_block *block,
2033                               u32 portid, u32 seq, u16 flags, int event)
2034 {
2035         unsigned char *b = skb_tail_pointer(skb);
2036         const struct tcf_proto_ops *ops;
2037         struct nlmsghdr *nlh;
2038         struct tcmsg *tcm;
2039         void *priv;
2040
2041         ops = chain->tmplt_ops;
2042         priv = chain->tmplt_priv;
2043
2044         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
2045         if (!nlh)
2046                 goto out_nlmsg_trim;
2047         tcm = nlmsg_data(nlh);
2048         tcm->tcm_family = AF_UNSPEC;
2049         tcm->tcm__pad1 = 0;
2050         tcm->tcm__pad2 = 0;
2051         tcm->tcm_handle = 0;
2052         if (block->q) {
2053                 tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
2054                 tcm->tcm_parent = block->q->handle;
2055         } else {
2056                 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
2057                 tcm->tcm_block_index = block->index;
2058         }
2059
2060         if (nla_put_u32(skb, TCA_CHAIN, chain->index))
2061                 goto nla_put_failure;
2062
2063         if (ops) {
2064                 if (nla_put_string(skb, TCA_KIND, ops->kind))
2065                         goto nla_put_failure;
2066                 if (ops->tmplt_dump(skb, net, priv) < 0)
2067                         goto nla_put_failure;
2068         }
2069
2070         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2071         return skb->len;
2072
2073 out_nlmsg_trim:
2074 nla_put_failure:
2075         nlmsg_trim(skb, b);
2076         return -EMSGSIZE;
2077 }
2078
2079 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
2080                            u32 seq, u16 flags, int event, bool unicast)
2081 {
2082         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2083         struct tcf_block *block = chain->block;
2084         struct net *net = block->net;
2085         struct sk_buff *skb;
2086
2087         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2088         if (!skb)
2089                 return -ENOBUFS;
2090
2091         if (tc_chain_fill_node(chain, net, skb, block, portid,
2092                                seq, flags, event) <= 0) {
2093                 kfree_skb(skb);
2094                 return -EINVAL;
2095         }
2096
2097         if (unicast)
2098                 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2099
2100         return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
2101 }
2102
2103 static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
2104                               struct nlattr **tca,
2105                               struct netlink_ext_ack *extack)
2106 {
2107         const struct tcf_proto_ops *ops;
2108         void *tmplt_priv;
2109
2110         /* If kind is not set, user did not specify template. */
2111         if (!tca[TCA_KIND])
2112                 return 0;
2113
2114         ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
2115         if (IS_ERR(ops))
2116                 return PTR_ERR(ops);
2117         if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
2118                 NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
2119                 return -EOPNOTSUPP;
2120         }
2121
2122         tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
2123         if (IS_ERR(tmplt_priv)) {
2124                 module_put(ops->owner);
2125                 return PTR_ERR(tmplt_priv);
2126         }
2127         chain->tmplt_ops = ops;
2128         chain->tmplt_priv = tmplt_priv;
2129         return 0;
2130 }
2131
2132 static void tc_chain_tmplt_del(struct tcf_chain *chain)
2133 {
2134         const struct tcf_proto_ops *ops = chain->tmplt_ops;
2135
2136         /* If template ops are set, no work to do for us. */
2137         if (!ops)
2138                 return;
2139
2140         ops->tmplt_destroy(chain->tmplt_priv);
2141         module_put(ops->owner);
2142 }
2143
2144 /* Add/delete/get a chain */
2145
2146 static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
2147                         struct netlink_ext_ack *extack)
2148 {
2149         struct net *net = sock_net(skb->sk);
2150         struct nlattr *tca[TCA_MAX + 1];
2151         struct tcmsg *t;
2152         u32 parent;
2153         u32 chain_index;
2154         struct Qdisc *q = NULL;
2155         struct tcf_chain *chain = NULL;
2156         struct tcf_block *block;
2157         unsigned long cl;
2158         int err;
2159
2160         if (n->nlmsg_type != RTM_GETCHAIN &&
2161             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2162                 return -EPERM;
2163
2164 replay:
2165         err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2166         if (err < 0)
2167                 return err;
2168
2169         t = nlmsg_data(n);
2170         parent = t->tcm_parent;
2171         cl = 0;
2172
2173         block = tcf_block_find(net, &q, &parent, &cl,
2174                                t->tcm_ifindex, t->tcm_block_index, extack);
2175         if (IS_ERR(block))
2176                 return PTR_ERR(block);
2177
2178         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2179         if (chain_index > TC_ACT_EXT_VAL_MASK) {
2180                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2181                 err = -EINVAL;
2182                 goto errout_block;
2183         }
2184         chain = tcf_chain_lookup(block, chain_index);
2185         if (n->nlmsg_type == RTM_NEWCHAIN) {
2186                 if (chain) {
2187                         if (tcf_chain_held_by_acts_only(chain)) {
2188                                 /* The chain exists only because there is
2189                                  * some action referencing it.
2190                                  */
2191                                 tcf_chain_hold(chain);
2192                         } else {
2193                                 NL_SET_ERR_MSG(extack, "Filter chain already exists");
2194                                 err = -EEXIST;
2195                                 goto errout_block;
2196                         }
2197                 } else {
2198                         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2199                                 NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2200                                 err = -ENOENT;
2201                                 goto errout_block;
2202                         }
2203                         chain = tcf_chain_create(block, chain_index);
2204                         if (!chain) {
2205                                 NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2206                                 err = -ENOMEM;
2207                                 goto errout_block;
2208                         }
2209                 }
2210         } else {
2211                 if (!chain || tcf_chain_held_by_acts_only(chain)) {
2212                         NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2213                         err = -EINVAL;
2214                         goto errout_block;
2215                 }
2216                 tcf_chain_hold(chain);
2217         }
2218
2219         switch (n->nlmsg_type) {
2220         case RTM_NEWCHAIN:
2221                 err = tc_chain_tmplt_add(chain, net, tca, extack);
2222                 if (err)
2223                         goto errout;
2224                 /* In case the chain was successfully added, take a reference
2225                  * to the chain. This ensures that an empty chain
2226                  * does not disappear at the end of this function.
2227                  */
2228                 tcf_chain_hold(chain);
2229                 chain->explicitly_created = true;
2230                 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
2231                                 RTM_NEWCHAIN, false);
2232                 break;
2233         case RTM_DELCHAIN:
2234                 tfilter_notify_chain(net, skb, block, q, parent, n,
2235                                      chain, RTM_DELTFILTER);
2236                 /* Flush the chain first as the user requested chain removal. */
2237                 tcf_chain_flush(chain);
2238                 /* In case the chain was successfully deleted, put a reference
2239                  * to the chain previously taken during addition.
2240                  */
2241                 tcf_chain_put_explicitly_created(chain);
2242                 chain->explicitly_created = false;
2243                 break;
2244         case RTM_GETCHAIN:
2245                 err = tc_chain_notify(chain, skb, n->nlmsg_seq,
2246                                       n->nlmsg_seq, n->nlmsg_type, true);
2247                 if (err < 0)
2248                         NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
2249                 break;
2250         default:
2251                 err = -EOPNOTSUPP;
2252                 NL_SET_ERR_MSG(extack, "Unsupported message type");
2253                 goto errout;
2254         }
2255
2256 errout:
2257         tcf_chain_put(chain);
2258 errout_block:
2259         tcf_block_release(q, block);
2260         if (err == -EAGAIN)
2261                 /* Replay the request. */
2262                 goto replay;
2263         return err;
2264 }
2265
2266 /* called with RTNL */
2267 static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
2268 {
2269         struct net *net = sock_net(skb->sk);
2270         struct nlattr *tca[TCA_MAX + 1];
2271         struct Qdisc *q = NULL;
2272         struct tcf_block *block;
2273         struct tcf_chain *chain;
2274         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2275         long index_start;
2276         long index;
2277         u32 parent;
2278         int err;
2279
2280         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2281                 return skb->len;
2282
2283         err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
2284                           cb->extack);
2285         if (err)
2286                 return err;
2287
2288         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2289                 block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2290                 if (!block)
2291                         goto out;
2292                 /* If we work with block index, q is NULL and parent value
2293                  * will never be used in the following code. The check
2294                  * in tcf_fill_node prevents it. However, compiler does not
2295                  * see that far, so set parent to zero to silence the warning
2296                  * about parent being uninitialized.
2297                  */
2298                 parent = 0;
2299         } else {
2300                 const struct Qdisc_class_ops *cops;
2301                 struct net_device *dev;
2302                 unsigned long cl = 0;
2303
2304                 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2305                 if (!dev)
2306                         return skb->len;
2307
2308                 parent = tcm->tcm_parent;
2309                 if (!parent) {
2310                         q = dev->qdisc;
2311                         parent = q->handle;
2312                 } else {
2313                         q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2314                 }
2315                 if (!q)
2316                         goto out;
2317                 cops = q->ops->cl_ops;
2318                 if (!cops)
2319                         goto out;
2320                 if (!cops->tcf_block)
2321                         goto out;
2322                 if (TC_H_MIN(tcm->tcm_parent)) {
2323                         cl = cops->find(q, tcm->tcm_parent);
2324                         if (cl == 0)
2325                                 goto out;
2326                 }
2327                 block = cops->tcf_block(q, cl, NULL);
2328                 if (!block)
2329                         goto out;
2330                 if (tcf_block_shared(block))
2331                         q = NULL;
2332         }
2333
2334         index_start = cb->args[0];
2335         index = 0;
2336
2337         list_for_each_entry(chain, &block->chain_list, list) {
2338                 if ((tca[TCA_CHAIN] &&
2339                      nla_get_u32(tca[TCA_CHAIN]) != chain->index))
2340                         continue;
2341                 if (index < index_start) {
2342                         index++;
2343                         continue;
2344                 }
2345                 if (tcf_chain_held_by_acts_only(chain))
2346                         continue;
2347                 err = tc_chain_fill_node(chain, net, skb, block,
2348                                          NETLINK_CB(cb->skb).portid,
2349                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
2350                                          RTM_NEWCHAIN);
2351                 if (err <= 0)
2352                         break;
2353                 index++;
2354         }
2355
2356         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2357                 tcf_block_refcnt_put(block);
2358         cb->args[0] = index;
2359
2360 out:
2361         /* If we did no progress, the error (EMSGSIZE) is real */
2362         if (skb->len == 0 && err)
2363                 return err;
2364         return skb->len;
2365 }
2366
2367 void tcf_exts_destroy(struct tcf_exts *exts)
2368 {
2369 #ifdef CONFIG_NET_CLS_ACT
2370         tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
2371         kfree(exts->actions);
2372         exts->nr_actions = 0;
2373 #endif
2374 }
2375 EXPORT_SYMBOL(tcf_exts_destroy);
2376
2377 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
2378                       struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
2379                       struct netlink_ext_ack *extack)
2380 {
2381 #ifdef CONFIG_NET_CLS_ACT
2382         {
2383                 struct tc_action *act;
2384                 size_t attr_size = 0;
2385
2386                 if (exts->police && tb[exts->police]) {
2387                         act = tcf_action_init_1(net, tp, tb[exts->police],
2388                                                 rate_tlv, "police", ovr,
2389                                                 TCA_ACT_BIND, true, extack);
2390                         if (IS_ERR(act))
2391                                 return PTR_ERR(act);
2392
2393                         act->type = exts->type = TCA_OLD_COMPAT;
2394                         exts->actions[0] = act;
2395                         exts->nr_actions = 1;
2396                 } else if (exts->action && tb[exts->action]) {
2397                         int err;
2398
2399                         err = tcf_action_init(net, tp, tb[exts->action],
2400                                               rate_tlv, NULL, ovr, TCA_ACT_BIND,
2401                                               exts->actions, &attr_size, true,
2402                                               extack);
2403                         if (err < 0)
2404                                 return err;
2405                         exts->nr_actions = err;
2406                 }
2407                 exts->net = net;
2408         }
2409 #else
2410         if ((exts->action && tb[exts->action]) ||
2411             (exts->police && tb[exts->police])) {
2412                 NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
2413                 return -EOPNOTSUPP;
2414         }
2415 #endif
2416
2417         return 0;
2418 }
2419 EXPORT_SYMBOL(tcf_exts_validate);
2420
2421 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
2422 {
2423 #ifdef CONFIG_NET_CLS_ACT
2424         struct tcf_exts old = *dst;
2425
2426         *dst = *src;
2427         tcf_exts_destroy(&old);
2428 #endif
2429 }
2430 EXPORT_SYMBOL(tcf_exts_change);
2431
2432 #ifdef CONFIG_NET_CLS_ACT
2433 static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
2434 {
2435         if (exts->nr_actions == 0)
2436                 return NULL;
2437         else
2438                 return exts->actions[0];
2439 }
2440 #endif
2441
2442 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
2443 {
2444 #ifdef CONFIG_NET_CLS_ACT
2445         struct nlattr *nest;
2446
2447         if (exts->action && tcf_exts_has_actions(exts)) {
2448                 /*
2449                  * again for backward compatible mode - we want
2450                  * to work with both old and new modes of entering
2451                  * tc data even if iproute2  was newer - jhs
2452                  */
2453                 if (exts->type != TCA_OLD_COMPAT) {
2454                         nest = nla_nest_start(skb, exts->action);
2455                         if (nest == NULL)
2456                                 goto nla_put_failure;
2457
2458                         if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
2459                                 goto nla_put_failure;
2460                         nla_nest_end(skb, nest);
2461                 } else if (exts->police) {
2462                         struct tc_action *act = tcf_exts_first_act(exts);
2463                         nest = nla_nest_start(skb, exts->police);
2464                         if (nest == NULL || !act)
2465                                 goto nla_put_failure;
2466                         if (tcf_action_dump_old(skb, act, 0, 0) < 0)
2467                                 goto nla_put_failure;
2468                         nla_nest_end(skb, nest);
2469                 }
2470         }
2471         return 0;
2472
2473 nla_put_failure:
2474         nla_nest_cancel(skb, nest);
2475         return -1;
2476 #else
2477         return 0;
2478 #endif
2479 }
2480 EXPORT_SYMBOL(tcf_exts_dump);
2481
2482
2483 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
2484 {
2485 #ifdef CONFIG_NET_CLS_ACT
2486         struct tc_action *a = tcf_exts_first_act(exts);
2487         if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
2488                 return -1;
2489 #endif
2490         return 0;
2491 }
2492 EXPORT_SYMBOL(tcf_exts_dump_stats);
2493
2494 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
2495                      void *type_data, bool err_stop)
2496 {
2497         struct tcf_block_cb *block_cb;
2498         int ok_count = 0;
2499         int err;
2500
2501         /* Make sure all netdevs sharing this block are offload-capable. */
2502         if (block->nooffloaddevcnt && err_stop)
2503                 return -EOPNOTSUPP;
2504
2505         list_for_each_entry(block_cb, &block->cb_list, list) {
2506                 err = block_cb->cb(type, type_data, block_cb->cb_priv);
2507                 if (err) {
2508                         if (err_stop)
2509                                 return err;
2510                 } else {
2511                         ok_count++;
2512                 }
2513         }
2514         return ok_count;
2515 }
2516 EXPORT_SYMBOL(tc_setup_cb_call);
2517
2518 static __net_init int tcf_net_init(struct net *net)
2519 {
2520         struct tcf_net *tn = net_generic(net, tcf_net_id);
2521
2522         spin_lock_init(&tn->idr_lock);
2523         idr_init(&tn->idr);
2524         return 0;
2525 }
2526
2527 static void __net_exit tcf_net_exit(struct net *net)
2528 {
2529         struct tcf_net *tn = net_generic(net, tcf_net_id);
2530
2531         idr_destroy(&tn->idr);
2532 }
2533
2534 static struct pernet_operations tcf_net_ops = {
2535         .init = tcf_net_init,
2536         .exit = tcf_net_exit,
2537         .id   = &tcf_net_id,
2538         .size = sizeof(struct tcf_net),
2539 };
2540
2541 static int __init tc_filter_init(void)
2542 {
2543         int err;
2544
2545         tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
2546         if (!tc_filter_wq)
2547                 return -ENOMEM;
2548
2549         err = register_pernet_subsys(&tcf_net_ops);
2550         if (err)
2551                 goto err_register_pernet_subsys;
2552
2553         err = rhashtable_init(&indr_setup_block_ht,
2554                               &tc_indr_setup_block_ht_params);
2555         if (err)
2556                 goto err_rhash_setup_block_ht;
2557
2558         rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
2559         rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
2560         rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
2561                       tc_dump_tfilter, 0);
2562         rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
2563         rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
2564         rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
2565                       tc_dump_chain, 0);
2566
2567         return 0;
2568
2569 err_rhash_setup_block_ht:
2570         unregister_pernet_subsys(&tcf_net_ops);
2571 err_register_pernet_subsys:
2572         destroy_workqueue(tc_filter_wq);
2573         return err;
2574 }
2575
2576 subsys_initcall(tc_filter_init);