]> asedeno.scripts.mit.edu Git - linux.git/blob - net/sched/cls_api.c
Merge tag 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm
[linux.git] / net / sched / cls_api.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/cls_api.c  Packet classifier API.
4  *
5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  *
9  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
10  */
11
12 #include <linux/module.h>
13 #include <linux/types.h>
14 #include <linux/kernel.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/err.h>
18 #include <linux/skbuff.h>
19 #include <linux/init.h>
20 #include <linux/kmod.h>
21 #include <linux/slab.h>
22 #include <linux/idr.h>
23 #include <linux/rhashtable.h>
24 #include <linux/jhash.h>
25 #include <net/net_namespace.h>
26 #include <net/sock.h>
27 #include <net/netlink.h>
28 #include <net/pkt_sched.h>
29 #include <net/pkt_cls.h>
30 #include <net/tc_act/tc_pedit.h>
31 #include <net/tc_act/tc_mirred.h>
32 #include <net/tc_act/tc_vlan.h>
33 #include <net/tc_act/tc_tunnel_key.h>
34 #include <net/tc_act/tc_csum.h>
35 #include <net/tc_act/tc_gact.h>
36 #include <net/tc_act/tc_police.h>
37 #include <net/tc_act/tc_sample.h>
38 #include <net/tc_act/tc_skbedit.h>
39 #include <net/tc_act/tc_ct.h>
40 #include <net/tc_act/tc_mpls.h>
41 #include <net/flow_offload.h>
42
43 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
44
45 /* The list of all installed classifier types */
46 static LIST_HEAD(tcf_proto_base);
47
48 /* Protects list of registered TC modules. It is pure SMP lock. */
49 static DEFINE_RWLOCK(cls_mod_lock);
50
51 static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
52 {
53         return jhash_3words(tp->chain->index, tp->prio,
54                             (__force __u32)tp->protocol, 0);
55 }
56
57 static void tcf_proto_signal_destroying(struct tcf_chain *chain,
58                                         struct tcf_proto *tp)
59 {
60         struct tcf_block *block = chain->block;
61
62         mutex_lock(&block->proto_destroy_lock);
63         hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
64                      destroy_obj_hashfn(tp));
65         mutex_unlock(&block->proto_destroy_lock);
66 }
67
68 static bool tcf_proto_cmp(const struct tcf_proto *tp1,
69                           const struct tcf_proto *tp2)
70 {
71         return tp1->chain->index == tp2->chain->index &&
72                tp1->prio == tp2->prio &&
73                tp1->protocol == tp2->protocol;
74 }
75
76 static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
77                                         struct tcf_proto *tp)
78 {
79         u32 hash = destroy_obj_hashfn(tp);
80         struct tcf_proto *iter;
81         bool found = false;
82
83         rcu_read_lock();
84         hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
85                                    destroy_ht_node, hash) {
86                 if (tcf_proto_cmp(tp, iter)) {
87                         found = true;
88                         break;
89                 }
90         }
91         rcu_read_unlock();
92
93         return found;
94 }
95
96 static void
97 tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
98 {
99         struct tcf_block *block = chain->block;
100
101         mutex_lock(&block->proto_destroy_lock);
102         if (hash_hashed(&tp->destroy_ht_node))
103                 hash_del_rcu(&tp->destroy_ht_node);
104         mutex_unlock(&block->proto_destroy_lock);
105 }
106
107 /* Find classifier type by string name */
108
109 static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
110 {
111         const struct tcf_proto_ops *t, *res = NULL;
112
113         if (kind) {
114                 read_lock(&cls_mod_lock);
115                 list_for_each_entry(t, &tcf_proto_base, head) {
116                         if (strcmp(kind, t->kind) == 0) {
117                                 if (try_module_get(t->owner))
118                                         res = t;
119                                 break;
120                         }
121                 }
122                 read_unlock(&cls_mod_lock);
123         }
124         return res;
125 }
126
127 static const struct tcf_proto_ops *
128 tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
129                      struct netlink_ext_ack *extack)
130 {
131         const struct tcf_proto_ops *ops;
132
133         ops = __tcf_proto_lookup_ops(kind);
134         if (ops)
135                 return ops;
136 #ifdef CONFIG_MODULES
137         if (rtnl_held)
138                 rtnl_unlock();
139         request_module("cls_%s", kind);
140         if (rtnl_held)
141                 rtnl_lock();
142         ops = __tcf_proto_lookup_ops(kind);
143         /* We dropped the RTNL semaphore in order to perform
144          * the module load. So, even if we succeeded in loading
145          * the module we have to replay the request. We indicate
146          * this using -EAGAIN.
147          */
148         if (ops) {
149                 module_put(ops->owner);
150                 return ERR_PTR(-EAGAIN);
151         }
152 #endif
153         NL_SET_ERR_MSG(extack, "TC classifier not found");
154         return ERR_PTR(-ENOENT);
155 }
156
157 /* Register(unregister) new classifier type */
158
159 int register_tcf_proto_ops(struct tcf_proto_ops *ops)
160 {
161         struct tcf_proto_ops *t;
162         int rc = -EEXIST;
163
164         write_lock(&cls_mod_lock);
165         list_for_each_entry(t, &tcf_proto_base, head)
166                 if (!strcmp(ops->kind, t->kind))
167                         goto out;
168
169         list_add_tail(&ops->head, &tcf_proto_base);
170         rc = 0;
171 out:
172         write_unlock(&cls_mod_lock);
173         return rc;
174 }
175 EXPORT_SYMBOL(register_tcf_proto_ops);
176
177 static struct workqueue_struct *tc_filter_wq;
178
179 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
180 {
181         struct tcf_proto_ops *t;
182         int rc = -ENOENT;
183
184         /* Wait for outstanding call_rcu()s, if any, from a
185          * tcf_proto_ops's destroy() handler.
186          */
187         rcu_barrier();
188         flush_workqueue(tc_filter_wq);
189
190         write_lock(&cls_mod_lock);
191         list_for_each_entry(t, &tcf_proto_base, head) {
192                 if (t == ops) {
193                         list_del(&t->head);
194                         rc = 0;
195                         break;
196                 }
197         }
198         write_unlock(&cls_mod_lock);
199         return rc;
200 }
201 EXPORT_SYMBOL(unregister_tcf_proto_ops);
202
203 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
204 {
205         INIT_RCU_WORK(rwork, func);
206         return queue_rcu_work(tc_filter_wq, rwork);
207 }
208 EXPORT_SYMBOL(tcf_queue_work);
209
210 /* Select new prio value from the range, managed by kernel. */
211
212 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
213 {
214         u32 first = TC_H_MAKE(0xC0000000U, 0U);
215
216         if (tp)
217                 first = tp->prio - 1;
218
219         return TC_H_MAJ(first);
220 }
221
222 static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
223 {
224         if (kind)
225                 return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ;
226         memset(name, 0, IFNAMSIZ);
227         return false;
228 }
229
230 static bool tcf_proto_is_unlocked(const char *kind)
231 {
232         const struct tcf_proto_ops *ops;
233         bool ret;
234
235         if (strlen(kind) == 0)
236                 return false;
237
238         ops = tcf_proto_lookup_ops(kind, false, NULL);
239         /* On error return false to take rtnl lock. Proto lookup/create
240          * functions will perform lookup again and properly handle errors.
241          */
242         if (IS_ERR(ops))
243                 return false;
244
245         ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
246         module_put(ops->owner);
247         return ret;
248 }
249
250 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
251                                           u32 prio, struct tcf_chain *chain,
252                                           bool rtnl_held,
253                                           struct netlink_ext_ack *extack)
254 {
255         struct tcf_proto *tp;
256         int err;
257
258         tp = kzalloc(sizeof(*tp), GFP_KERNEL);
259         if (!tp)
260                 return ERR_PTR(-ENOBUFS);
261
262         tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
263         if (IS_ERR(tp->ops)) {
264                 err = PTR_ERR(tp->ops);
265                 goto errout;
266         }
267         tp->classify = tp->ops->classify;
268         tp->protocol = protocol;
269         tp->prio = prio;
270         tp->chain = chain;
271         spin_lock_init(&tp->lock);
272         refcount_set(&tp->refcnt, 1);
273
274         err = tp->ops->init(tp);
275         if (err) {
276                 module_put(tp->ops->owner);
277                 goto errout;
278         }
279         return tp;
280
281 errout:
282         kfree(tp);
283         return ERR_PTR(err);
284 }
285
286 static void tcf_proto_get(struct tcf_proto *tp)
287 {
288         refcount_inc(&tp->refcnt);
289 }
290
291 static void tcf_chain_put(struct tcf_chain *chain);
292
293 static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
294                               bool sig_destroy, struct netlink_ext_ack *extack)
295 {
296         tp->ops->destroy(tp, rtnl_held, extack);
297         if (sig_destroy)
298                 tcf_proto_signal_destroyed(tp->chain, tp);
299         tcf_chain_put(tp->chain);
300         module_put(tp->ops->owner);
301         kfree_rcu(tp, rcu);
302 }
303
304 static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
305                           struct netlink_ext_ack *extack)
306 {
307         if (refcount_dec_and_test(&tp->refcnt))
308                 tcf_proto_destroy(tp, rtnl_held, true, extack);
309 }
310
311 static int walker_check_empty(struct tcf_proto *tp, void *fh,
312                               struct tcf_walker *arg)
313 {
314         if (fh) {
315                 arg->nonempty = true;
316                 return -1;
317         }
318         return 0;
319 }
320
321 static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
322 {
323         struct tcf_walker walker = { .fn = walker_check_empty, };
324
325         if (tp->ops->walk) {
326                 tp->ops->walk(tp, &walker, rtnl_held);
327                 return !walker.nonempty;
328         }
329         return true;
330 }
331
332 static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
333 {
334         spin_lock(&tp->lock);
335         if (tcf_proto_is_empty(tp, rtnl_held))
336                 tp->deleting = true;
337         spin_unlock(&tp->lock);
338         return tp->deleting;
339 }
340
341 static void tcf_proto_mark_delete(struct tcf_proto *tp)
342 {
343         spin_lock(&tp->lock);
344         tp->deleting = true;
345         spin_unlock(&tp->lock);
346 }
347
348 static bool tcf_proto_is_deleting(struct tcf_proto *tp)
349 {
350         bool deleting;
351
352         spin_lock(&tp->lock);
353         deleting = tp->deleting;
354         spin_unlock(&tp->lock);
355
356         return deleting;
357 }
358
359 #define ASSERT_BLOCK_LOCKED(block)                                      \
360         lockdep_assert_held(&(block)->lock)
361
362 struct tcf_filter_chain_list_item {
363         struct list_head list;
364         tcf_chain_head_change_t *chain_head_change;
365         void *chain_head_change_priv;
366 };
367
368 static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
369                                           u32 chain_index)
370 {
371         struct tcf_chain *chain;
372
373         ASSERT_BLOCK_LOCKED(block);
374
375         chain = kzalloc(sizeof(*chain), GFP_KERNEL);
376         if (!chain)
377                 return NULL;
378         list_add_tail(&chain->list, &block->chain_list);
379         mutex_init(&chain->filter_chain_lock);
380         chain->block = block;
381         chain->index = chain_index;
382         chain->refcnt = 1;
383         if (!chain->index)
384                 block->chain0.chain = chain;
385         return chain;
386 }
387
388 static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
389                                        struct tcf_proto *tp_head)
390 {
391         if (item->chain_head_change)
392                 item->chain_head_change(tp_head, item->chain_head_change_priv);
393 }
394
395 static void tcf_chain0_head_change(struct tcf_chain *chain,
396                                    struct tcf_proto *tp_head)
397 {
398         struct tcf_filter_chain_list_item *item;
399         struct tcf_block *block = chain->block;
400
401         if (chain->index)
402                 return;
403
404         mutex_lock(&block->lock);
405         list_for_each_entry(item, &block->chain0.filter_chain_list, list)
406                 tcf_chain_head_change_item(item, tp_head);
407         mutex_unlock(&block->lock);
408 }
409
410 /* Returns true if block can be safely freed. */
411
412 static bool tcf_chain_detach(struct tcf_chain *chain)
413 {
414         struct tcf_block *block = chain->block;
415
416         ASSERT_BLOCK_LOCKED(block);
417
418         list_del(&chain->list);
419         if (!chain->index)
420                 block->chain0.chain = NULL;
421
422         if (list_empty(&block->chain_list) &&
423             refcount_read(&block->refcnt) == 0)
424                 return true;
425
426         return false;
427 }
428
429 static void tcf_block_destroy(struct tcf_block *block)
430 {
431         mutex_destroy(&block->lock);
432         mutex_destroy(&block->proto_destroy_lock);
433         kfree_rcu(block, rcu);
434 }
435
436 static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
437 {
438         struct tcf_block *block = chain->block;
439
440         mutex_destroy(&chain->filter_chain_lock);
441         kfree_rcu(chain, rcu);
442         if (free_block)
443                 tcf_block_destroy(block);
444 }
445
446 static void tcf_chain_hold(struct tcf_chain *chain)
447 {
448         ASSERT_BLOCK_LOCKED(chain->block);
449
450         ++chain->refcnt;
451 }
452
453 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
454 {
455         ASSERT_BLOCK_LOCKED(chain->block);
456
457         /* In case all the references are action references, this
458          * chain should not be shown to the user.
459          */
460         return chain->refcnt == chain->action_refcnt;
461 }
462
463 static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
464                                           u32 chain_index)
465 {
466         struct tcf_chain *chain;
467
468         ASSERT_BLOCK_LOCKED(block);
469
470         list_for_each_entry(chain, &block->chain_list, list) {
471                 if (chain->index == chain_index)
472                         return chain;
473         }
474         return NULL;
475 }
476
477 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
478                            u32 seq, u16 flags, int event, bool unicast);
479
480 static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
481                                          u32 chain_index, bool create,
482                                          bool by_act)
483 {
484         struct tcf_chain *chain = NULL;
485         bool is_first_reference;
486
487         mutex_lock(&block->lock);
488         chain = tcf_chain_lookup(block, chain_index);
489         if (chain) {
490                 tcf_chain_hold(chain);
491         } else {
492                 if (!create)
493                         goto errout;
494                 chain = tcf_chain_create(block, chain_index);
495                 if (!chain)
496                         goto errout;
497         }
498
499         if (by_act)
500                 ++chain->action_refcnt;
501         is_first_reference = chain->refcnt - chain->action_refcnt == 1;
502         mutex_unlock(&block->lock);
503
504         /* Send notification only in case we got the first
505          * non-action reference. Until then, the chain acts only as
506          * a placeholder for actions pointing to it and user ought
507          * not know about them.
508          */
509         if (is_first_reference && !by_act)
510                 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
511                                 RTM_NEWCHAIN, false);
512
513         return chain;
514
515 errout:
516         mutex_unlock(&block->lock);
517         return chain;
518 }
519
520 static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
521                                        bool create)
522 {
523         return __tcf_chain_get(block, chain_index, create, false);
524 }
525
526 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
527 {
528         return __tcf_chain_get(block, chain_index, true, true);
529 }
530 EXPORT_SYMBOL(tcf_chain_get_by_act);
531
532 static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
533                                void *tmplt_priv);
534 static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
535                                   void *tmplt_priv, u32 chain_index,
536                                   struct tcf_block *block, struct sk_buff *oskb,
537                                   u32 seq, u16 flags, bool unicast);
538
539 static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
540                             bool explicitly_created)
541 {
542         struct tcf_block *block = chain->block;
543         const struct tcf_proto_ops *tmplt_ops;
544         bool free_block = false;
545         unsigned int refcnt;
546         void *tmplt_priv;
547
548         mutex_lock(&block->lock);
549         if (explicitly_created) {
550                 if (!chain->explicitly_created) {
551                         mutex_unlock(&block->lock);
552                         return;
553                 }
554                 chain->explicitly_created = false;
555         }
556
557         if (by_act)
558                 chain->action_refcnt--;
559
560         /* tc_chain_notify_delete can't be called while holding block lock.
561          * However, when block is unlocked chain can be changed concurrently, so
562          * save these to temporary variables.
563          */
564         refcnt = --chain->refcnt;
565         tmplt_ops = chain->tmplt_ops;
566         tmplt_priv = chain->tmplt_priv;
567
568         /* The last dropped non-action reference will trigger notification. */
569         if (refcnt - chain->action_refcnt == 0 && !by_act) {
570                 tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
571                                        block, NULL, 0, 0, false);
572                 /* Last reference to chain, no need to lock. */
573                 chain->flushing = false;
574         }
575
576         if (refcnt == 0)
577                 free_block = tcf_chain_detach(chain);
578         mutex_unlock(&block->lock);
579
580         if (refcnt == 0) {
581                 tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
582                 tcf_chain_destroy(chain, free_block);
583         }
584 }
585
586 static void tcf_chain_put(struct tcf_chain *chain)
587 {
588         __tcf_chain_put(chain, false, false);
589 }
590
591 void tcf_chain_put_by_act(struct tcf_chain *chain)
592 {
593         __tcf_chain_put(chain, true, false);
594 }
595 EXPORT_SYMBOL(tcf_chain_put_by_act);
596
597 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
598 {
599         __tcf_chain_put(chain, false, true);
600 }
601
602 static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
603 {
604         struct tcf_proto *tp, *tp_next;
605
606         mutex_lock(&chain->filter_chain_lock);
607         tp = tcf_chain_dereference(chain->filter_chain, chain);
608         while (tp) {
609                 tp_next = rcu_dereference_protected(tp->next, 1);
610                 tcf_proto_signal_destroying(chain, tp);
611                 tp = tp_next;
612         }
613         tp = tcf_chain_dereference(chain->filter_chain, chain);
614         RCU_INIT_POINTER(chain->filter_chain, NULL);
615         tcf_chain0_head_change(chain, NULL);
616         chain->flushing = true;
617         mutex_unlock(&chain->filter_chain_lock);
618
619         while (tp) {
620                 tp_next = rcu_dereference_protected(tp->next, 1);
621                 tcf_proto_put(tp, rtnl_held, NULL);
622                 tp = tp_next;
623         }
624 }
625
626 static int tcf_block_setup(struct tcf_block *block,
627                            struct flow_block_offload *bo);
628
629 static void tc_indr_block_ing_cmd(struct net_device *dev,
630                                   struct tcf_block *block,
631                                   flow_indr_block_bind_cb_t *cb,
632                                   void *cb_priv,
633                                   enum flow_block_command command)
634 {
635         struct flow_block_offload bo = {
636                 .command        = command,
637                 .binder_type    = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
638                 .net            = dev_net(dev),
639                 .block_shared   = tcf_block_non_null_shared(block),
640         };
641         INIT_LIST_HEAD(&bo.cb_list);
642
643         if (!block)
644                 return;
645
646         bo.block = &block->flow_block;
647
648         down_write(&block->cb_lock);
649         cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
650
651         tcf_block_setup(block, &bo);
652         up_write(&block->cb_lock);
653 }
654
655 static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
656 {
657         const struct Qdisc_class_ops *cops;
658         struct Qdisc *qdisc;
659
660         if (!dev_ingress_queue(dev))
661                 return NULL;
662
663         qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
664         if (!qdisc)
665                 return NULL;
666
667         cops = qdisc->ops->cl_ops;
668         if (!cops)
669                 return NULL;
670
671         if (!cops->tcf_block)
672                 return NULL;
673
674         return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
675 }
676
677 static void tc_indr_block_get_and_ing_cmd(struct net_device *dev,
678                                           flow_indr_block_bind_cb_t *cb,
679                                           void *cb_priv,
680                                           enum flow_block_command command)
681 {
682         struct tcf_block *block = tc_dev_ingress_block(dev);
683
684         tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command);
685 }
686
687 static void tc_indr_block_call(struct tcf_block *block,
688                                struct net_device *dev,
689                                struct tcf_block_ext_info *ei,
690                                enum flow_block_command command,
691                                struct netlink_ext_ack *extack)
692 {
693         struct flow_block_offload bo = {
694                 .command        = command,
695                 .binder_type    = ei->binder_type,
696                 .net            = dev_net(dev),
697                 .block          = &block->flow_block,
698                 .block_shared   = tcf_block_shared(block),
699                 .extack         = extack,
700         };
701         INIT_LIST_HEAD(&bo.cb_list);
702
703         flow_indr_block_call(dev, &bo, command);
704         tcf_block_setup(block, &bo);
705 }
706
707 static bool tcf_block_offload_in_use(struct tcf_block *block)
708 {
709         return atomic_read(&block->offloadcnt);
710 }
711
712 static int tcf_block_offload_cmd(struct tcf_block *block,
713                                  struct net_device *dev,
714                                  struct tcf_block_ext_info *ei,
715                                  enum flow_block_command command,
716                                  struct netlink_ext_ack *extack)
717 {
718         struct flow_block_offload bo = {};
719         int err;
720
721         bo.net = dev_net(dev);
722         bo.command = command;
723         bo.binder_type = ei->binder_type;
724         bo.block = &block->flow_block;
725         bo.block_shared = tcf_block_shared(block);
726         bo.extack = extack;
727         INIT_LIST_HEAD(&bo.cb_list);
728
729         err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
730         if (err < 0)
731                 return err;
732
733         return tcf_block_setup(block, &bo);
734 }
735
736 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
737                                   struct tcf_block_ext_info *ei,
738                                   struct netlink_ext_ack *extack)
739 {
740         struct net_device *dev = q->dev_queue->dev;
741         int err;
742
743         down_write(&block->cb_lock);
744         if (!dev->netdev_ops->ndo_setup_tc)
745                 goto no_offload_dev_inc;
746
747         /* If tc offload feature is disabled and the block we try to bind
748          * to already has some offloaded filters, forbid to bind.
749          */
750         if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
751                 NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
752                 err = -EOPNOTSUPP;
753                 goto err_unlock;
754         }
755
756         err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack);
757         if (err == -EOPNOTSUPP)
758                 goto no_offload_dev_inc;
759         if (err)
760                 goto err_unlock;
761
762         tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
763         up_write(&block->cb_lock);
764         return 0;
765
766 no_offload_dev_inc:
767         if (tcf_block_offload_in_use(block)) {
768                 err = -EOPNOTSUPP;
769                 goto err_unlock;
770         }
771         err = 0;
772         block->nooffloaddevcnt++;
773         tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
774 err_unlock:
775         up_write(&block->cb_lock);
776         return err;
777 }
778
779 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
780                                      struct tcf_block_ext_info *ei)
781 {
782         struct net_device *dev = q->dev_queue->dev;
783         int err;
784
785         down_write(&block->cb_lock);
786         tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
787
788         if (!dev->netdev_ops->ndo_setup_tc)
789                 goto no_offload_dev_dec;
790         err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
791         if (err == -EOPNOTSUPP)
792                 goto no_offload_dev_dec;
793         up_write(&block->cb_lock);
794         return;
795
796 no_offload_dev_dec:
797         WARN_ON(block->nooffloaddevcnt-- == 0);
798         up_write(&block->cb_lock);
799 }
800
801 static int
802 tcf_chain0_head_change_cb_add(struct tcf_block *block,
803                               struct tcf_block_ext_info *ei,
804                               struct netlink_ext_ack *extack)
805 {
806         struct tcf_filter_chain_list_item *item;
807         struct tcf_chain *chain0;
808
809         item = kmalloc(sizeof(*item), GFP_KERNEL);
810         if (!item) {
811                 NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
812                 return -ENOMEM;
813         }
814         item->chain_head_change = ei->chain_head_change;
815         item->chain_head_change_priv = ei->chain_head_change_priv;
816
817         mutex_lock(&block->lock);
818         chain0 = block->chain0.chain;
819         if (chain0)
820                 tcf_chain_hold(chain0);
821         else
822                 list_add(&item->list, &block->chain0.filter_chain_list);
823         mutex_unlock(&block->lock);
824
825         if (chain0) {
826                 struct tcf_proto *tp_head;
827
828                 mutex_lock(&chain0->filter_chain_lock);
829
830                 tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
831                 if (tp_head)
832                         tcf_chain_head_change_item(item, tp_head);
833
834                 mutex_lock(&block->lock);
835                 list_add(&item->list, &block->chain0.filter_chain_list);
836                 mutex_unlock(&block->lock);
837
838                 mutex_unlock(&chain0->filter_chain_lock);
839                 tcf_chain_put(chain0);
840         }
841
842         return 0;
843 }
844
845 static void
846 tcf_chain0_head_change_cb_del(struct tcf_block *block,
847                               struct tcf_block_ext_info *ei)
848 {
849         struct tcf_filter_chain_list_item *item;
850
851         mutex_lock(&block->lock);
852         list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
853                 if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
854                     (item->chain_head_change == ei->chain_head_change &&
855                      item->chain_head_change_priv == ei->chain_head_change_priv)) {
856                         if (block->chain0.chain)
857                                 tcf_chain_head_change_item(item, NULL);
858                         list_del(&item->list);
859                         mutex_unlock(&block->lock);
860
861                         kfree(item);
862                         return;
863                 }
864         }
865         mutex_unlock(&block->lock);
866         WARN_ON(1);
867 }
868
869 struct tcf_net {
870         spinlock_t idr_lock; /* Protects idr */
871         struct idr idr;
872 };
873
874 static unsigned int tcf_net_id;
875
876 static int tcf_block_insert(struct tcf_block *block, struct net *net,
877                             struct netlink_ext_ack *extack)
878 {
879         struct tcf_net *tn = net_generic(net, tcf_net_id);
880         int err;
881
882         idr_preload(GFP_KERNEL);
883         spin_lock(&tn->idr_lock);
884         err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
885                             GFP_NOWAIT);
886         spin_unlock(&tn->idr_lock);
887         idr_preload_end();
888
889         return err;
890 }
891
892 static void tcf_block_remove(struct tcf_block *block, struct net *net)
893 {
894         struct tcf_net *tn = net_generic(net, tcf_net_id);
895
896         spin_lock(&tn->idr_lock);
897         idr_remove(&tn->idr, block->index);
898         spin_unlock(&tn->idr_lock);
899 }
900
901 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
902                                           u32 block_index,
903                                           struct netlink_ext_ack *extack)
904 {
905         struct tcf_block *block;
906
907         block = kzalloc(sizeof(*block), GFP_KERNEL);
908         if (!block) {
909                 NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
910                 return ERR_PTR(-ENOMEM);
911         }
912         mutex_init(&block->lock);
913         mutex_init(&block->proto_destroy_lock);
914         init_rwsem(&block->cb_lock);
915         flow_block_init(&block->flow_block);
916         INIT_LIST_HEAD(&block->chain_list);
917         INIT_LIST_HEAD(&block->owner_list);
918         INIT_LIST_HEAD(&block->chain0.filter_chain_list);
919
920         refcount_set(&block->refcnt, 1);
921         block->net = net;
922         block->index = block_index;
923
924         /* Don't store q pointer for blocks which are shared */
925         if (!tcf_block_shared(block))
926                 block->q = q;
927         return block;
928 }
929
930 static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
931 {
932         struct tcf_net *tn = net_generic(net, tcf_net_id);
933
934         return idr_find(&tn->idr, block_index);
935 }
936
937 static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
938 {
939         struct tcf_block *block;
940
941         rcu_read_lock();
942         block = tcf_block_lookup(net, block_index);
943         if (block && !refcount_inc_not_zero(&block->refcnt))
944                 block = NULL;
945         rcu_read_unlock();
946
947         return block;
948 }
949
950 static struct tcf_chain *
951 __tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
952 {
953         mutex_lock(&block->lock);
954         if (chain)
955                 chain = list_is_last(&chain->list, &block->chain_list) ?
956                         NULL : list_next_entry(chain, list);
957         else
958                 chain = list_first_entry_or_null(&block->chain_list,
959                                                  struct tcf_chain, list);
960
961         /* skip all action-only chains */
962         while (chain && tcf_chain_held_by_acts_only(chain))
963                 chain = list_is_last(&chain->list, &block->chain_list) ?
964                         NULL : list_next_entry(chain, list);
965
966         if (chain)
967                 tcf_chain_hold(chain);
968         mutex_unlock(&block->lock);
969
970         return chain;
971 }
972
973 /* Function to be used by all clients that want to iterate over all chains on
974  * block. It properly obtains block->lock and takes reference to chain before
975  * returning it. Users of this function must be tolerant to concurrent chain
976  * insertion/deletion or ensure that no concurrent chain modification is
977  * possible. Note that all netlink dump callbacks cannot guarantee to provide
978  * consistent dump because rtnl lock is released each time skb is filled with
979  * data and sent to user-space.
980  */
981
982 struct tcf_chain *
983 tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
984 {
985         struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
986
987         if (chain)
988                 tcf_chain_put(chain);
989
990         return chain_next;
991 }
992 EXPORT_SYMBOL(tcf_get_next_chain);
993
994 static struct tcf_proto *
995 __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
996 {
997         u32 prio = 0;
998
999         ASSERT_RTNL();
1000         mutex_lock(&chain->filter_chain_lock);
1001
1002         if (!tp) {
1003                 tp = tcf_chain_dereference(chain->filter_chain, chain);
1004         } else if (tcf_proto_is_deleting(tp)) {
1005                 /* 'deleting' flag is set and chain->filter_chain_lock was
1006                  * unlocked, which means next pointer could be invalid. Restart
1007                  * search.
1008                  */
1009                 prio = tp->prio + 1;
1010                 tp = tcf_chain_dereference(chain->filter_chain, chain);
1011
1012                 for (; tp; tp = tcf_chain_dereference(tp->next, chain))
1013                         if (!tp->deleting && tp->prio >= prio)
1014                                 break;
1015         } else {
1016                 tp = tcf_chain_dereference(tp->next, chain);
1017         }
1018
1019         if (tp)
1020                 tcf_proto_get(tp);
1021
1022         mutex_unlock(&chain->filter_chain_lock);
1023
1024         return tp;
1025 }
1026
1027 /* Function to be used by all clients that want to iterate over all tp's on
1028  * chain. Users of this function must be tolerant to concurrent tp
1029  * insertion/deletion or ensure that no concurrent chain modification is
1030  * possible. Note that all netlink dump callbacks cannot guarantee to provide
1031  * consistent dump because rtnl lock is released each time skb is filled with
1032  * data and sent to user-space.
1033  */
1034
1035 struct tcf_proto *
1036 tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
1037                    bool rtnl_held)
1038 {
1039         struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
1040
1041         if (tp)
1042                 tcf_proto_put(tp, rtnl_held, NULL);
1043
1044         return tp_next;
1045 }
1046 EXPORT_SYMBOL(tcf_get_next_proto);
1047
1048 static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1049 {
1050         struct tcf_chain *chain;
1051
1052         /* Last reference to block. At this point chains cannot be added or
1053          * removed concurrently.
1054          */
1055         for (chain = tcf_get_next_chain(block, NULL);
1056              chain;
1057              chain = tcf_get_next_chain(block, chain)) {
1058                 tcf_chain_put_explicitly_created(chain);
1059                 tcf_chain_flush(chain, rtnl_held);
1060         }
1061 }
1062
1063 /* Lookup Qdisc and increments its reference counter.
1064  * Set parent, if necessary.
1065  */
1066
1067 static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
1068                             u32 *parent, int ifindex, bool rtnl_held,
1069                             struct netlink_ext_ack *extack)
1070 {
1071         const struct Qdisc_class_ops *cops;
1072         struct net_device *dev;
1073         int err = 0;
1074
1075         if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1076                 return 0;
1077
1078         rcu_read_lock();
1079
1080         /* Find link */
1081         dev = dev_get_by_index_rcu(net, ifindex);
1082         if (!dev) {
1083                 rcu_read_unlock();
1084                 return -ENODEV;
1085         }
1086
1087         /* Find qdisc */
1088         if (!*parent) {
1089                 *q = dev->qdisc;
1090                 *parent = (*q)->handle;
1091         } else {
1092                 *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
1093                 if (!*q) {
1094                         NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1095                         err = -EINVAL;
1096                         goto errout_rcu;
1097                 }
1098         }
1099
1100         *q = qdisc_refcount_inc_nz(*q);
1101         if (!*q) {
1102                 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1103                 err = -EINVAL;
1104                 goto errout_rcu;
1105         }
1106
1107         /* Is it classful? */
1108         cops = (*q)->ops->cl_ops;
1109         if (!cops) {
1110                 NL_SET_ERR_MSG(extack, "Qdisc not classful");
1111                 err = -EINVAL;
1112                 goto errout_qdisc;
1113         }
1114
1115         if (!cops->tcf_block) {
1116                 NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
1117                 err = -EOPNOTSUPP;
1118                 goto errout_qdisc;
1119         }
1120
1121 errout_rcu:
1122         /* At this point we know that qdisc is not noop_qdisc,
1123          * which means that qdisc holds a reference to net_device
1124          * and we hold a reference to qdisc, so it is safe to release
1125          * rcu read lock.
1126          */
1127         rcu_read_unlock();
1128         return err;
1129
1130 errout_qdisc:
1131         rcu_read_unlock();
1132
1133         if (rtnl_held)
1134                 qdisc_put(*q);
1135         else
1136                 qdisc_put_unlocked(*q);
1137         *q = NULL;
1138
1139         return err;
1140 }
1141
1142 static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
1143                                int ifindex, struct netlink_ext_ack *extack)
1144 {
1145         if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1146                 return 0;
1147
1148         /* Do we search for filter, attached to class? */
1149         if (TC_H_MIN(parent)) {
1150                 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1151
1152                 *cl = cops->find(q, parent);
1153                 if (*cl == 0) {
1154                         NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
1155                         return -ENOENT;
1156                 }
1157         }
1158
1159         return 0;
1160 }
1161
1162 static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
1163                                           unsigned long cl, int ifindex,
1164                                           u32 block_index,
1165                                           struct netlink_ext_ack *extack)
1166 {
1167         struct tcf_block *block;
1168
1169         if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1170                 block = tcf_block_refcnt_get(net, block_index);
1171                 if (!block) {
1172                         NL_SET_ERR_MSG(extack, "Block of given index was not found");
1173                         return ERR_PTR(-EINVAL);
1174                 }
1175         } else {
1176                 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1177
1178                 block = cops->tcf_block(q, cl, extack);
1179                 if (!block)
1180                         return ERR_PTR(-EINVAL);
1181
1182                 if (tcf_block_shared(block)) {
1183                         NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
1184                         return ERR_PTR(-EOPNOTSUPP);
1185                 }
1186
1187                 /* Always take reference to block in order to support execution
1188                  * of rules update path of cls API without rtnl lock. Caller
1189                  * must release block when it is finished using it. 'if' block
1190                  * of this conditional obtain reference to block by calling
1191                  * tcf_block_refcnt_get().
1192                  */
1193                 refcount_inc(&block->refcnt);
1194         }
1195
1196         return block;
1197 }
1198
1199 static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1200                             struct tcf_block_ext_info *ei, bool rtnl_held)
1201 {
1202         if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1203                 /* Flushing/putting all chains will cause the block to be
1204                  * deallocated when last chain is freed. However, if chain_list
1205                  * is empty, block has to be manually deallocated. After block
1206                  * reference counter reached 0, it is no longer possible to
1207                  * increment it or add new chains to block.
1208                  */
1209                 bool free_block = list_empty(&block->chain_list);
1210
1211                 mutex_unlock(&block->lock);
1212                 if (tcf_block_shared(block))
1213                         tcf_block_remove(block, block->net);
1214
1215                 if (q)
1216                         tcf_block_offload_unbind(block, q, ei);
1217
1218                 if (free_block)
1219                         tcf_block_destroy(block);
1220                 else
1221                         tcf_block_flush_all_chains(block, rtnl_held);
1222         } else if (q) {
1223                 tcf_block_offload_unbind(block, q, ei);
1224         }
1225 }
1226
1227 static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1228 {
1229         __tcf_block_put(block, NULL, NULL, rtnl_held);
1230 }
1231
1232 /* Find tcf block.
1233  * Set q, parent, cl when appropriate.
1234  */
1235
1236 static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
1237                                         u32 *parent, unsigned long *cl,
1238                                         int ifindex, u32 block_index,
1239                                         struct netlink_ext_ack *extack)
1240 {
1241         struct tcf_block *block;
1242         int err = 0;
1243
1244         ASSERT_RTNL();
1245
1246         err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
1247         if (err)
1248                 goto errout;
1249
1250         err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
1251         if (err)
1252                 goto errout_qdisc;
1253
1254         block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1255         if (IS_ERR(block)) {
1256                 err = PTR_ERR(block);
1257                 goto errout_qdisc;
1258         }
1259
1260         return block;
1261
1262 errout_qdisc:
1263         if (*q)
1264                 qdisc_put(*q);
1265 errout:
1266         *q = NULL;
1267         return ERR_PTR(err);
1268 }
1269
1270 static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
1271                               bool rtnl_held)
1272 {
1273         if (!IS_ERR_OR_NULL(block))
1274                 tcf_block_refcnt_put(block, rtnl_held);
1275
1276         if (q) {
1277                 if (rtnl_held)
1278                         qdisc_put(q);
1279                 else
1280                         qdisc_put_unlocked(q);
1281         }
1282 }
1283
1284 struct tcf_block_owner_item {
1285         struct list_head list;
1286         struct Qdisc *q;
1287         enum flow_block_binder_type binder_type;
1288 };
1289
1290 static void
1291 tcf_block_owner_netif_keep_dst(struct tcf_block *block,
1292                                struct Qdisc *q,
1293                                enum flow_block_binder_type binder_type)
1294 {
1295         if (block->keep_dst &&
1296             binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
1297             binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1298                 netif_keep_dst(qdisc_dev(q));
1299 }
1300
1301 void tcf_block_netif_keep_dst(struct tcf_block *block)
1302 {
1303         struct tcf_block_owner_item *item;
1304
1305         block->keep_dst = true;
1306         list_for_each_entry(item, &block->owner_list, list)
1307                 tcf_block_owner_netif_keep_dst(block, item->q,
1308                                                item->binder_type);
1309 }
1310 EXPORT_SYMBOL(tcf_block_netif_keep_dst);
1311
1312 static int tcf_block_owner_add(struct tcf_block *block,
1313                                struct Qdisc *q,
1314                                enum flow_block_binder_type binder_type)
1315 {
1316         struct tcf_block_owner_item *item;
1317
1318         item = kmalloc(sizeof(*item), GFP_KERNEL);
1319         if (!item)
1320                 return -ENOMEM;
1321         item->q = q;
1322         item->binder_type = binder_type;
1323         list_add(&item->list, &block->owner_list);
1324         return 0;
1325 }
1326
1327 static void tcf_block_owner_del(struct tcf_block *block,
1328                                 struct Qdisc *q,
1329                                 enum flow_block_binder_type binder_type)
1330 {
1331         struct tcf_block_owner_item *item;
1332
1333         list_for_each_entry(item, &block->owner_list, list) {
1334                 if (item->q == q && item->binder_type == binder_type) {
1335                         list_del(&item->list);
1336                         kfree(item);
1337                         return;
1338                 }
1339         }
1340         WARN_ON(1);
1341 }
1342
1343 int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
1344                       struct tcf_block_ext_info *ei,
1345                       struct netlink_ext_ack *extack)
1346 {
1347         struct net *net = qdisc_net(q);
1348         struct tcf_block *block = NULL;
1349         int err;
1350
1351         if (ei->block_index)
1352                 /* block_index not 0 means the shared block is requested */
1353                 block = tcf_block_refcnt_get(net, ei->block_index);
1354
1355         if (!block) {
1356                 block = tcf_block_create(net, q, ei->block_index, extack);
1357                 if (IS_ERR(block))
1358                         return PTR_ERR(block);
1359                 if (tcf_block_shared(block)) {
1360                         err = tcf_block_insert(block, net, extack);
1361                         if (err)
1362                                 goto err_block_insert;
1363                 }
1364         }
1365
1366         err = tcf_block_owner_add(block, q, ei->binder_type);
1367         if (err)
1368                 goto err_block_owner_add;
1369
1370         tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
1371
1372         err = tcf_chain0_head_change_cb_add(block, ei, extack);
1373         if (err)
1374                 goto err_chain0_head_change_cb_add;
1375
1376         err = tcf_block_offload_bind(block, q, ei, extack);
1377         if (err)
1378                 goto err_block_offload_bind;
1379
1380         *p_block = block;
1381         return 0;
1382
1383 err_block_offload_bind:
1384         tcf_chain0_head_change_cb_del(block, ei);
1385 err_chain0_head_change_cb_add:
1386         tcf_block_owner_del(block, q, ei->binder_type);
1387 err_block_owner_add:
1388 err_block_insert:
1389         tcf_block_refcnt_put(block, true);
1390         return err;
1391 }
1392 EXPORT_SYMBOL(tcf_block_get_ext);
1393
1394 static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
1395 {
1396         struct tcf_proto __rcu **p_filter_chain = priv;
1397
1398         rcu_assign_pointer(*p_filter_chain, tp_head);
1399 }
1400
1401 int tcf_block_get(struct tcf_block **p_block,
1402                   struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
1403                   struct netlink_ext_ack *extack)
1404 {
1405         struct tcf_block_ext_info ei = {
1406                 .chain_head_change = tcf_chain_head_change_dflt,
1407                 .chain_head_change_priv = p_filter_chain,
1408         };
1409
1410         WARN_ON(!p_filter_chain);
1411         return tcf_block_get_ext(p_block, q, &ei, extack);
1412 }
1413 EXPORT_SYMBOL(tcf_block_get);
1414
1415 /* XXX: Standalone actions are not allowed to jump to any chain, and bound
1416  * actions should be all removed after flushing.
1417  */
1418 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1419                        struct tcf_block_ext_info *ei)
1420 {
1421         if (!block)
1422                 return;
1423         tcf_chain0_head_change_cb_del(block, ei);
1424         tcf_block_owner_del(block, q, ei->binder_type);
1425
1426         __tcf_block_put(block, q, ei, true);
1427 }
1428 EXPORT_SYMBOL(tcf_block_put_ext);
1429
1430 void tcf_block_put(struct tcf_block *block)
1431 {
1432         struct tcf_block_ext_info ei = {0, };
1433
1434         if (!block)
1435                 return;
1436         tcf_block_put_ext(block, block->q, &ei);
1437 }
1438
1439 EXPORT_SYMBOL(tcf_block_put);
1440
1441 static int
1442 tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
1443                             void *cb_priv, bool add, bool offload_in_use,
1444                             struct netlink_ext_ack *extack)
1445 {
1446         struct tcf_chain *chain, *chain_prev;
1447         struct tcf_proto *tp, *tp_prev;
1448         int err;
1449
1450         lockdep_assert_held(&block->cb_lock);
1451
1452         for (chain = __tcf_get_next_chain(block, NULL);
1453              chain;
1454              chain_prev = chain,
1455                      chain = __tcf_get_next_chain(block, chain),
1456                      tcf_chain_put(chain_prev)) {
1457                 for (tp = __tcf_get_next_proto(chain, NULL); tp;
1458                      tp_prev = tp,
1459                              tp = __tcf_get_next_proto(chain, tp),
1460                              tcf_proto_put(tp_prev, true, NULL)) {
1461                         if (tp->ops->reoffload) {
1462                                 err = tp->ops->reoffload(tp, add, cb, cb_priv,
1463                                                          extack);
1464                                 if (err && add)
1465                                         goto err_playback_remove;
1466                         } else if (add && offload_in_use) {
1467                                 err = -EOPNOTSUPP;
1468                                 NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
1469                                 goto err_playback_remove;
1470                         }
1471                 }
1472         }
1473
1474         return 0;
1475
1476 err_playback_remove:
1477         tcf_proto_put(tp, true, NULL);
1478         tcf_chain_put(chain);
1479         tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
1480                                     extack);
1481         return err;
1482 }
1483
1484 static int tcf_block_bind(struct tcf_block *block,
1485                           struct flow_block_offload *bo)
1486 {
1487         struct flow_block_cb *block_cb, *next;
1488         int err, i = 0;
1489
1490         lockdep_assert_held(&block->cb_lock);
1491
1492         list_for_each_entry(block_cb, &bo->cb_list, list) {
1493                 err = tcf_block_playback_offloads(block, block_cb->cb,
1494                                                   block_cb->cb_priv, true,
1495                                                   tcf_block_offload_in_use(block),
1496                                                   bo->extack);
1497                 if (err)
1498                         goto err_unroll;
1499                 if (!bo->unlocked_driver_cb)
1500                         block->lockeddevcnt++;
1501
1502                 i++;
1503         }
1504         list_splice(&bo->cb_list, &block->flow_block.cb_list);
1505
1506         return 0;
1507
1508 err_unroll:
1509         list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1510                 if (i-- > 0) {
1511                         list_del(&block_cb->list);
1512                         tcf_block_playback_offloads(block, block_cb->cb,
1513                                                     block_cb->cb_priv, false,
1514                                                     tcf_block_offload_in_use(block),
1515                                                     NULL);
1516                         if (!bo->unlocked_driver_cb)
1517                                 block->lockeddevcnt--;
1518                 }
1519                 flow_block_cb_free(block_cb);
1520         }
1521
1522         return err;
1523 }
1524
1525 static void tcf_block_unbind(struct tcf_block *block,
1526                              struct flow_block_offload *bo)
1527 {
1528         struct flow_block_cb *block_cb, *next;
1529
1530         lockdep_assert_held(&block->cb_lock);
1531
1532         list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1533                 tcf_block_playback_offloads(block, block_cb->cb,
1534                                             block_cb->cb_priv, false,
1535                                             tcf_block_offload_in_use(block),
1536                                             NULL);
1537                 list_del(&block_cb->list);
1538                 flow_block_cb_free(block_cb);
1539                 if (!bo->unlocked_driver_cb)
1540                         block->lockeddevcnt--;
1541         }
1542 }
1543
1544 static int tcf_block_setup(struct tcf_block *block,
1545                            struct flow_block_offload *bo)
1546 {
1547         int err;
1548
1549         switch (bo->command) {
1550         case FLOW_BLOCK_BIND:
1551                 err = tcf_block_bind(block, bo);
1552                 break;
1553         case FLOW_BLOCK_UNBIND:
1554                 err = 0;
1555                 tcf_block_unbind(block, bo);
1556                 break;
1557         default:
1558                 WARN_ON_ONCE(1);
1559                 err = -EOPNOTSUPP;
1560         }
1561
1562         return err;
1563 }
1564
1565 /* Main classifier routine: scans classifier chain attached
1566  * to this qdisc, (optionally) tests for protocol and asks
1567  * specific classifiers.
1568  */
1569 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1570                  struct tcf_result *res, bool compat_mode)
1571 {
1572 #ifdef CONFIG_NET_CLS_ACT
1573         const int max_reclassify_loop = 4;
1574         const struct tcf_proto *orig_tp = tp;
1575         const struct tcf_proto *first_tp;
1576         int limit = 0;
1577
1578 reclassify:
1579 #endif
1580         for (; tp; tp = rcu_dereference_bh(tp->next)) {
1581                 __be16 protocol = tc_skb_protocol(skb);
1582                 int err;
1583
1584                 if (tp->protocol != protocol &&
1585                     tp->protocol != htons(ETH_P_ALL))
1586                         continue;
1587
1588                 err = tp->classify(skb, tp, res);
1589 #ifdef CONFIG_NET_CLS_ACT
1590                 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1591                         first_tp = orig_tp;
1592                         goto reset;
1593                 } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1594                         first_tp = res->goto_tp;
1595
1596 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1597                         {
1598                                 struct tc_skb_ext *ext;
1599
1600                                 ext = skb_ext_add(skb, TC_SKB_EXT);
1601                                 if (WARN_ON_ONCE(!ext))
1602                                         return TC_ACT_SHOT;
1603
1604                                 ext->chain = err & TC_ACT_EXT_VAL_MASK;
1605                         }
1606 #endif
1607                         goto reset;
1608                 }
1609 #endif
1610                 if (err >= 0)
1611                         return err;
1612         }
1613
1614         return TC_ACT_UNSPEC; /* signal: continue lookup */
1615 #ifdef CONFIG_NET_CLS_ACT
1616 reset:
1617         if (unlikely(limit++ >= max_reclassify_loop)) {
1618                 net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
1619                                        tp->chain->block->index,
1620                                        tp->prio & 0xffff,
1621                                        ntohs(tp->protocol));
1622                 return TC_ACT_SHOT;
1623         }
1624
1625         tp = first_tp;
1626         goto reclassify;
1627 #endif
1628 }
1629 EXPORT_SYMBOL(tcf_classify);
1630
1631 struct tcf_chain_info {
1632         struct tcf_proto __rcu **pprev;
1633         struct tcf_proto __rcu *next;
1634 };
1635
1636 static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
1637                                            struct tcf_chain_info *chain_info)
1638 {
1639         return tcf_chain_dereference(*chain_info->pprev, chain);
1640 }
1641
1642 static int tcf_chain_tp_insert(struct tcf_chain *chain,
1643                                struct tcf_chain_info *chain_info,
1644                                struct tcf_proto *tp)
1645 {
1646         if (chain->flushing)
1647                 return -EAGAIN;
1648
1649         if (*chain_info->pprev == chain->filter_chain)
1650                 tcf_chain0_head_change(chain, tp);
1651         tcf_proto_get(tp);
1652         RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1653         rcu_assign_pointer(*chain_info->pprev, tp);
1654
1655         return 0;
1656 }
1657
1658 static void tcf_chain_tp_remove(struct tcf_chain *chain,
1659                                 struct tcf_chain_info *chain_info,
1660                                 struct tcf_proto *tp)
1661 {
1662         struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1663
1664         tcf_proto_mark_delete(tp);
1665         if (tp == chain->filter_chain)
1666                 tcf_chain0_head_change(chain, next);
1667         RCU_INIT_POINTER(*chain_info->pprev, next);
1668 }
1669
1670 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1671                                            struct tcf_chain_info *chain_info,
1672                                            u32 protocol, u32 prio,
1673                                            bool prio_allocate);
1674
1675 /* Try to insert new proto.
1676  * If proto with specified priority already exists, free new proto
1677  * and return existing one.
1678  */
1679
1680 static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
1681                                                     struct tcf_proto *tp_new,
1682                                                     u32 protocol, u32 prio,
1683                                                     bool rtnl_held)
1684 {
1685         struct tcf_chain_info chain_info;
1686         struct tcf_proto *tp;
1687         int err = 0;
1688
1689         mutex_lock(&chain->filter_chain_lock);
1690
1691         if (tcf_proto_exists_destroying(chain, tp_new)) {
1692                 mutex_unlock(&chain->filter_chain_lock);
1693                 tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1694                 return ERR_PTR(-EAGAIN);
1695         }
1696
1697         tp = tcf_chain_tp_find(chain, &chain_info,
1698                                protocol, prio, false);
1699         if (!tp)
1700                 err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1701         mutex_unlock(&chain->filter_chain_lock);
1702
1703         if (tp) {
1704                 tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1705                 tp_new = tp;
1706         } else if (err) {
1707                 tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1708                 tp_new = ERR_PTR(err);
1709         }
1710
1711         return tp_new;
1712 }
1713
1714 static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1715                                       struct tcf_proto *tp, bool rtnl_held,
1716                                       struct netlink_ext_ack *extack)
1717 {
1718         struct tcf_chain_info chain_info;
1719         struct tcf_proto *tp_iter;
1720         struct tcf_proto **pprev;
1721         struct tcf_proto *next;
1722
1723         mutex_lock(&chain->filter_chain_lock);
1724
1725         /* Atomically find and remove tp from chain. */
1726         for (pprev = &chain->filter_chain;
1727              (tp_iter = tcf_chain_dereference(*pprev, chain));
1728              pprev = &tp_iter->next) {
1729                 if (tp_iter == tp) {
1730                         chain_info.pprev = pprev;
1731                         chain_info.next = tp_iter->next;
1732                         WARN_ON(tp_iter->deleting);
1733                         break;
1734                 }
1735         }
1736         /* Verify that tp still exists and no new filters were inserted
1737          * concurrently.
1738          * Mark tp for deletion if it is empty.
1739          */
1740         if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
1741                 mutex_unlock(&chain->filter_chain_lock);
1742                 return;
1743         }
1744
1745         tcf_proto_signal_destroying(chain, tp);
1746         next = tcf_chain_dereference(chain_info.next, chain);
1747         if (tp == chain->filter_chain)
1748                 tcf_chain0_head_change(chain, next);
1749         RCU_INIT_POINTER(*chain_info.pprev, next);
1750         mutex_unlock(&chain->filter_chain_lock);
1751
1752         tcf_proto_put(tp, rtnl_held, extack);
1753 }
1754
1755 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1756                                            struct tcf_chain_info *chain_info,
1757                                            u32 protocol, u32 prio,
1758                                            bool prio_allocate)
1759 {
1760         struct tcf_proto **pprev;
1761         struct tcf_proto *tp;
1762
1763         /* Check the chain for existence of proto-tcf with this priority */
1764         for (pprev = &chain->filter_chain;
1765              (tp = tcf_chain_dereference(*pprev, chain));
1766              pprev = &tp->next) {
1767                 if (tp->prio >= prio) {
1768                         if (tp->prio == prio) {
1769                                 if (prio_allocate ||
1770                                     (tp->protocol != protocol && protocol))
1771                                         return ERR_PTR(-EINVAL);
1772                         } else {
1773                                 tp = NULL;
1774                         }
1775                         break;
1776                 }
1777         }
1778         chain_info->pprev = pprev;
1779         if (tp) {
1780                 chain_info->next = tp->next;
1781                 tcf_proto_get(tp);
1782         } else {
1783                 chain_info->next = NULL;
1784         }
1785         return tp;
1786 }
1787
1788 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1789                          struct tcf_proto *tp, struct tcf_block *block,
1790                          struct Qdisc *q, u32 parent, void *fh,
1791                          u32 portid, u32 seq, u16 flags, int event,
1792                          bool rtnl_held)
1793 {
1794         struct tcmsg *tcm;
1795         struct nlmsghdr  *nlh;
1796         unsigned char *b = skb_tail_pointer(skb);
1797
1798         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1799         if (!nlh)
1800                 goto out_nlmsg_trim;
1801         tcm = nlmsg_data(nlh);
1802         tcm->tcm_family = AF_UNSPEC;
1803         tcm->tcm__pad1 = 0;
1804         tcm->tcm__pad2 = 0;
1805         if (q) {
1806                 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1807                 tcm->tcm_parent = parent;
1808         } else {
1809                 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
1810                 tcm->tcm_block_index = block->index;
1811         }
1812         tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
1813         if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
1814                 goto nla_put_failure;
1815         if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
1816                 goto nla_put_failure;
1817         if (!fh) {
1818                 tcm->tcm_handle = 0;
1819         } else {
1820                 if (tp->ops->dump &&
1821                     tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
1822                         goto nla_put_failure;
1823         }
1824         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1825         return skb->len;
1826
1827 out_nlmsg_trim:
1828 nla_put_failure:
1829         nlmsg_trim(skb, b);
1830         return -1;
1831 }
1832
1833 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
1834                           struct nlmsghdr *n, struct tcf_proto *tp,
1835                           struct tcf_block *block, struct Qdisc *q,
1836                           u32 parent, void *fh, int event, bool unicast,
1837                           bool rtnl_held)
1838 {
1839         struct sk_buff *skb;
1840         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1841         int err = 0;
1842
1843         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1844         if (!skb)
1845                 return -ENOBUFS;
1846
1847         if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1848                           n->nlmsg_seq, n->nlmsg_flags, event,
1849                           rtnl_held) <= 0) {
1850                 kfree_skb(skb);
1851                 return -EINVAL;
1852         }
1853
1854         if (unicast)
1855                 err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1856         else
1857                 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1858                                      n->nlmsg_flags & NLM_F_ECHO);
1859
1860         if (err > 0)
1861                 err = 0;
1862         return err;
1863 }
1864
1865 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
1866                               struct nlmsghdr *n, struct tcf_proto *tp,
1867                               struct tcf_block *block, struct Qdisc *q,
1868                               u32 parent, void *fh, bool unicast, bool *last,
1869                               bool rtnl_held, struct netlink_ext_ack *extack)
1870 {
1871         struct sk_buff *skb;
1872         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1873         int err;
1874
1875         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1876         if (!skb)
1877                 return -ENOBUFS;
1878
1879         if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1880                           n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
1881                           rtnl_held) <= 0) {
1882                 NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1883                 kfree_skb(skb);
1884                 return -EINVAL;
1885         }
1886
1887         err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
1888         if (err) {
1889                 kfree_skb(skb);
1890                 return err;
1891         }
1892
1893         if (unicast)
1894                 err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1895         else
1896                 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1897                                      n->nlmsg_flags & NLM_F_ECHO);
1898         if (err < 0)
1899                 NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1900
1901         if (err > 0)
1902                 err = 0;
1903         return err;
1904 }
1905
1906 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1907                                  struct tcf_block *block, struct Qdisc *q,
1908                                  u32 parent, struct nlmsghdr *n,
1909                                  struct tcf_chain *chain, int event,
1910                                  bool rtnl_held)
1911 {
1912         struct tcf_proto *tp;
1913
1914         for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
1915              tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
1916                 tfilter_notify(net, oskb, n, tp, block,
1917                                q, parent, NULL, event, false, rtnl_held);
1918 }
1919
1920 static void tfilter_put(struct tcf_proto *tp, void *fh)
1921 {
1922         if (tp->ops->put && fh)
1923                 tp->ops->put(tp, fh);
1924 }
1925
1926 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1927                           struct netlink_ext_ack *extack)
1928 {
1929         struct net *net = sock_net(skb->sk);
1930         struct nlattr *tca[TCA_MAX + 1];
1931         char name[IFNAMSIZ];
1932         struct tcmsg *t;
1933         u32 protocol;
1934         u32 prio;
1935         bool prio_allocate;
1936         u32 parent;
1937         u32 chain_index;
1938         struct Qdisc *q = NULL;
1939         struct tcf_chain_info chain_info;
1940         struct tcf_chain *chain = NULL;
1941         struct tcf_block *block;
1942         struct tcf_proto *tp;
1943         unsigned long cl;
1944         void *fh;
1945         int err;
1946         int tp_created;
1947         bool rtnl_held = false;
1948
1949         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1950                 return -EPERM;
1951
1952 replay:
1953         tp_created = 0;
1954
1955         err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
1956                                      rtm_tca_policy, extack);
1957         if (err < 0)
1958                 return err;
1959
1960         t = nlmsg_data(n);
1961         protocol = TC_H_MIN(t->tcm_info);
1962         prio = TC_H_MAJ(t->tcm_info);
1963         prio_allocate = false;
1964         parent = t->tcm_parent;
1965         tp = NULL;
1966         cl = 0;
1967         block = NULL;
1968
1969         if (prio == 0) {
1970                 /* If no priority is provided by the user,
1971                  * we allocate one.
1972                  */
1973                 if (n->nlmsg_flags & NLM_F_CREATE) {
1974                         prio = TC_H_MAKE(0x80000000U, 0U);
1975                         prio_allocate = true;
1976                 } else {
1977                         NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
1978                         return -ENOENT;
1979                 }
1980         }
1981
1982         /* Find head of filter chain. */
1983
1984         err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
1985         if (err)
1986                 return err;
1987
1988         if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
1989                 NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
1990                 err = -EINVAL;
1991                 goto errout;
1992         }
1993
1994         /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
1995          * block is shared (no qdisc found), qdisc is not unlocked, classifier
1996          * type is not specified, classifier is not unlocked.
1997          */
1998         if (rtnl_held ||
1999             (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2000             !tcf_proto_is_unlocked(name)) {
2001                 rtnl_held = true;
2002                 rtnl_lock();
2003         }
2004
2005         err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2006         if (err)
2007                 goto errout;
2008
2009         block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2010                                  extack);
2011         if (IS_ERR(block)) {
2012                 err = PTR_ERR(block);
2013                 goto errout;
2014         }
2015
2016         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2017         if (chain_index > TC_ACT_EXT_VAL_MASK) {
2018                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2019                 err = -EINVAL;
2020                 goto errout;
2021         }
2022         chain = tcf_chain_get(block, chain_index, true);
2023         if (!chain) {
2024                 NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
2025                 err = -ENOMEM;
2026                 goto errout;
2027         }
2028
2029         mutex_lock(&chain->filter_chain_lock);
2030         tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2031                                prio, prio_allocate);
2032         if (IS_ERR(tp)) {
2033                 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2034                 err = PTR_ERR(tp);
2035                 goto errout_locked;
2036         }
2037
2038         if (tp == NULL) {
2039                 struct tcf_proto *tp_new = NULL;
2040
2041                 if (chain->flushing) {
2042                         err = -EAGAIN;
2043                         goto errout_locked;
2044                 }
2045
2046                 /* Proto-tcf does not exist, create new one */
2047
2048                 if (tca[TCA_KIND] == NULL || !protocol) {
2049                         NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
2050                         err = -EINVAL;
2051                         goto errout_locked;
2052                 }
2053
2054                 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2055                         NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2056                         err = -ENOENT;
2057                         goto errout_locked;
2058                 }
2059
2060                 if (prio_allocate)
2061                         prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
2062                                                                &chain_info));
2063
2064                 mutex_unlock(&chain->filter_chain_lock);
2065                 tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
2066                                           protocol, prio, chain, rtnl_held,
2067                                           extack);
2068                 if (IS_ERR(tp_new)) {
2069                         err = PTR_ERR(tp_new);
2070                         goto errout_tp;
2071                 }
2072
2073                 tp_created = 1;
2074                 tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
2075                                                 rtnl_held);
2076                 if (IS_ERR(tp)) {
2077                         err = PTR_ERR(tp);
2078                         goto errout_tp;
2079                 }
2080         } else {
2081                 mutex_unlock(&chain->filter_chain_lock);
2082         }
2083
2084         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2085                 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2086                 err = -EINVAL;
2087                 goto errout;
2088         }
2089
2090         fh = tp->ops->get(tp, t->tcm_handle);
2091
2092         if (!fh) {
2093                 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2094                         NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2095                         err = -ENOENT;
2096                         goto errout;
2097                 }
2098         } else if (n->nlmsg_flags & NLM_F_EXCL) {
2099                 tfilter_put(tp, fh);
2100                 NL_SET_ERR_MSG(extack, "Filter already exists");
2101                 err = -EEXIST;
2102                 goto errout;
2103         }
2104
2105         if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
2106                 NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
2107                 err = -EINVAL;
2108                 goto errout;
2109         }
2110
2111         err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
2112                               n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
2113                               rtnl_held, extack);
2114         if (err == 0) {
2115                 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2116                                RTM_NEWTFILTER, false, rtnl_held);
2117                 tfilter_put(tp, fh);
2118                 /* q pointer is NULL for shared blocks */
2119                 if (q)
2120                         q->flags &= ~TCQ_F_CAN_BYPASS;
2121         }
2122
2123 errout:
2124         if (err && tp_created)
2125                 tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2126 errout_tp:
2127         if (chain) {
2128                 if (tp && !IS_ERR(tp))
2129                         tcf_proto_put(tp, rtnl_held, NULL);
2130                 if (!tp_created)
2131                         tcf_chain_put(chain);
2132         }
2133         tcf_block_release(q, block, rtnl_held);
2134
2135         if (rtnl_held)
2136                 rtnl_unlock();
2137
2138         if (err == -EAGAIN) {
2139                 /* Take rtnl lock in case EAGAIN is caused by concurrent flush
2140                  * of target chain.
2141                  */
2142                 rtnl_held = true;
2143                 /* Replay the request. */
2144                 goto replay;
2145         }
2146         return err;
2147
2148 errout_locked:
2149         mutex_unlock(&chain->filter_chain_lock);
2150         goto errout;
2151 }
2152
2153 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2154                           struct netlink_ext_ack *extack)
2155 {
2156         struct net *net = sock_net(skb->sk);
2157         struct nlattr *tca[TCA_MAX + 1];
2158         char name[IFNAMSIZ];
2159         struct tcmsg *t;
2160         u32 protocol;
2161         u32 prio;
2162         u32 parent;
2163         u32 chain_index;
2164         struct Qdisc *q = NULL;
2165         struct tcf_chain_info chain_info;
2166         struct tcf_chain *chain = NULL;
2167         struct tcf_block *block = NULL;
2168         struct tcf_proto *tp = NULL;
2169         unsigned long cl = 0;
2170         void *fh = NULL;
2171         int err;
2172         bool rtnl_held = false;
2173
2174         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2175                 return -EPERM;
2176
2177         err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2178                                      rtm_tca_policy, extack);
2179         if (err < 0)
2180                 return err;
2181
2182         t = nlmsg_data(n);
2183         protocol = TC_H_MIN(t->tcm_info);
2184         prio = TC_H_MAJ(t->tcm_info);
2185         parent = t->tcm_parent;
2186
2187         if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
2188                 NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
2189                 return -ENOENT;
2190         }
2191
2192         /* Find head of filter chain. */
2193
2194         err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2195         if (err)
2196                 return err;
2197
2198         if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2199                 NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2200                 err = -EINVAL;
2201                 goto errout;
2202         }
2203         /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
2204          * found), qdisc is not unlocked, classifier type is not specified,
2205          * classifier is not unlocked.
2206          */
2207         if (!prio ||
2208             (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2209             !tcf_proto_is_unlocked(name)) {
2210                 rtnl_held = true;
2211                 rtnl_lock();
2212         }
2213
2214         err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2215         if (err)
2216                 goto errout;
2217
2218         block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2219                                  extack);
2220         if (IS_ERR(block)) {
2221                 err = PTR_ERR(block);
2222                 goto errout;
2223         }
2224
2225         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2226         if (chain_index > TC_ACT_EXT_VAL_MASK) {
2227                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2228                 err = -EINVAL;
2229                 goto errout;
2230         }
2231         chain = tcf_chain_get(block, chain_index, false);
2232         if (!chain) {
2233                 /* User requested flush on non-existent chain. Nothing to do,
2234                  * so just return success.
2235                  */
2236                 if (prio == 0) {
2237                         err = 0;
2238                         goto errout;
2239                 }
2240                 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2241                 err = -ENOENT;
2242                 goto errout;
2243         }
2244
2245         if (prio == 0) {
2246                 tfilter_notify_chain(net, skb, block, q, parent, n,
2247                                      chain, RTM_DELTFILTER, rtnl_held);
2248                 tcf_chain_flush(chain, rtnl_held);
2249                 err = 0;
2250                 goto errout;
2251         }
2252
2253         mutex_lock(&chain->filter_chain_lock);
2254         tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2255                                prio, false);
2256         if (!tp || IS_ERR(tp)) {
2257                 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2258                 err = tp ? PTR_ERR(tp) : -ENOENT;
2259                 goto errout_locked;
2260         } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2261                 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2262                 err = -EINVAL;
2263                 goto errout_locked;
2264         } else if (t->tcm_handle == 0) {
2265                 tcf_proto_signal_destroying(chain, tp);
2266                 tcf_chain_tp_remove(chain, &chain_info, tp);
2267                 mutex_unlock(&chain->filter_chain_lock);
2268
2269                 tcf_proto_put(tp, rtnl_held, NULL);
2270                 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2271                                RTM_DELTFILTER, false, rtnl_held);
2272                 err = 0;
2273                 goto errout;
2274         }
2275         mutex_unlock(&chain->filter_chain_lock);
2276
2277         fh = tp->ops->get(tp, t->tcm_handle);
2278
2279         if (!fh) {
2280                 NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2281                 err = -ENOENT;
2282         } else {
2283                 bool last;
2284
2285                 err = tfilter_del_notify(net, skb, n, tp, block,
2286                                          q, parent, fh, false, &last,
2287                                          rtnl_held, extack);
2288
2289                 if (err)
2290                         goto errout;
2291                 if (last)
2292                         tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
2293         }
2294
2295 errout:
2296         if (chain) {
2297                 if (tp && !IS_ERR(tp))
2298                         tcf_proto_put(tp, rtnl_held, NULL);
2299                 tcf_chain_put(chain);
2300         }
2301         tcf_block_release(q, block, rtnl_held);
2302
2303         if (rtnl_held)
2304                 rtnl_unlock();
2305
2306         return err;
2307
2308 errout_locked:
2309         mutex_unlock(&chain->filter_chain_lock);
2310         goto errout;
2311 }
2312
2313 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2314                           struct netlink_ext_ack *extack)
2315 {
2316         struct net *net = sock_net(skb->sk);
2317         struct nlattr *tca[TCA_MAX + 1];
2318         char name[IFNAMSIZ];
2319         struct tcmsg *t;
2320         u32 protocol;
2321         u32 prio;
2322         u32 parent;
2323         u32 chain_index;
2324         struct Qdisc *q = NULL;
2325         struct tcf_chain_info chain_info;
2326         struct tcf_chain *chain = NULL;
2327         struct tcf_block *block = NULL;
2328         struct tcf_proto *tp = NULL;
2329         unsigned long cl = 0;
2330         void *fh = NULL;
2331         int err;
2332         bool rtnl_held = false;
2333
2334         err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2335                                      rtm_tca_policy, extack);
2336         if (err < 0)
2337                 return err;
2338
2339         t = nlmsg_data(n);
2340         protocol = TC_H_MIN(t->tcm_info);
2341         prio = TC_H_MAJ(t->tcm_info);
2342         parent = t->tcm_parent;
2343
2344         if (prio == 0) {
2345                 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
2346                 return -ENOENT;
2347         }
2348
2349         /* Find head of filter chain. */
2350
2351         err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2352         if (err)
2353                 return err;
2354
2355         if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2356                 NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2357                 err = -EINVAL;
2358                 goto errout;
2359         }
2360         /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
2361          * unlocked, classifier type is not specified, classifier is not
2362          * unlocked.
2363          */
2364         if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2365             !tcf_proto_is_unlocked(name)) {
2366                 rtnl_held = true;
2367                 rtnl_lock();
2368         }
2369
2370         err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2371         if (err)
2372                 goto errout;
2373
2374         block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2375                                  extack);
2376         if (IS_ERR(block)) {
2377                 err = PTR_ERR(block);
2378                 goto errout;
2379         }
2380
2381         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2382         if (chain_index > TC_ACT_EXT_VAL_MASK) {
2383                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2384                 err = -EINVAL;
2385                 goto errout;
2386         }
2387         chain = tcf_chain_get(block, chain_index, false);
2388         if (!chain) {
2389                 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2390                 err = -EINVAL;
2391                 goto errout;
2392         }
2393
2394         mutex_lock(&chain->filter_chain_lock);
2395         tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2396                                prio, false);
2397         mutex_unlock(&chain->filter_chain_lock);
2398         if (!tp || IS_ERR(tp)) {
2399                 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2400                 err = tp ? PTR_ERR(tp) : -ENOENT;
2401                 goto errout;
2402         } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2403                 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2404                 err = -EINVAL;
2405                 goto errout;
2406         }
2407
2408         fh = tp->ops->get(tp, t->tcm_handle);
2409
2410         if (!fh) {
2411                 NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2412                 err = -ENOENT;
2413         } else {
2414                 err = tfilter_notify(net, skb, n, tp, block, q, parent,
2415                                      fh, RTM_NEWTFILTER, true, rtnl_held);
2416                 if (err < 0)
2417                         NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
2418         }
2419
2420         tfilter_put(tp, fh);
2421 errout:
2422         if (chain) {
2423                 if (tp && !IS_ERR(tp))
2424                         tcf_proto_put(tp, rtnl_held, NULL);
2425                 tcf_chain_put(chain);
2426         }
2427         tcf_block_release(q, block, rtnl_held);
2428
2429         if (rtnl_held)
2430                 rtnl_unlock();
2431
2432         return err;
2433 }
2434
2435 struct tcf_dump_args {
2436         struct tcf_walker w;
2437         struct sk_buff *skb;
2438         struct netlink_callback *cb;
2439         struct tcf_block *block;
2440         struct Qdisc *q;
2441         u32 parent;
2442 };
2443
2444 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2445 {
2446         struct tcf_dump_args *a = (void *)arg;
2447         struct net *net = sock_net(a->skb->sk);
2448
2449         return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2450                              n, NETLINK_CB(a->cb->skb).portid,
2451                              a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2452                              RTM_NEWTFILTER, true);
2453 }
2454
2455 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
2456                            struct sk_buff *skb, struct netlink_callback *cb,
2457                            long index_start, long *p_index)
2458 {
2459         struct net *net = sock_net(skb->sk);
2460         struct tcf_block *block = chain->block;
2461         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2462         struct tcf_proto *tp, *tp_prev;
2463         struct tcf_dump_args arg;
2464
2465         for (tp = __tcf_get_next_proto(chain, NULL);
2466              tp;
2467              tp_prev = tp,
2468                      tp = __tcf_get_next_proto(chain, tp),
2469                      tcf_proto_put(tp_prev, true, NULL),
2470                      (*p_index)++) {
2471                 if (*p_index < index_start)
2472                         continue;
2473                 if (TC_H_MAJ(tcm->tcm_info) &&
2474                     TC_H_MAJ(tcm->tcm_info) != tp->prio)
2475                         continue;
2476                 if (TC_H_MIN(tcm->tcm_info) &&
2477                     TC_H_MIN(tcm->tcm_info) != tp->protocol)
2478                         continue;
2479                 if (*p_index > index_start)
2480                         memset(&cb->args[1], 0,
2481                                sizeof(cb->args) - sizeof(cb->args[0]));
2482                 if (cb->args[1] == 0) {
2483                         if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2484                                           NETLINK_CB(cb->skb).portid,
2485                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
2486                                           RTM_NEWTFILTER, true) <= 0)
2487                                 goto errout;
2488                         cb->args[1] = 1;
2489                 }
2490                 if (!tp->ops->walk)
2491                         continue;
2492                 arg.w.fn = tcf_node_dump;
2493                 arg.skb = skb;
2494                 arg.cb = cb;
2495                 arg.block = block;
2496                 arg.q = q;
2497                 arg.parent = parent;
2498                 arg.w.stop = 0;
2499                 arg.w.skip = cb->args[1] - 1;
2500                 arg.w.count = 0;
2501                 arg.w.cookie = cb->args[2];
2502                 tp->ops->walk(tp, &arg.w, true);
2503                 cb->args[2] = arg.w.cookie;
2504                 cb->args[1] = arg.w.count + 1;
2505                 if (arg.w.stop)
2506                         goto errout;
2507         }
2508         return true;
2509
2510 errout:
2511         tcf_proto_put(tp, true, NULL);
2512         return false;
2513 }
2514
2515 /* called with RTNL */
2516 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
2517 {
2518         struct tcf_chain *chain, *chain_prev;
2519         struct net *net = sock_net(skb->sk);
2520         struct nlattr *tca[TCA_MAX + 1];
2521         struct Qdisc *q = NULL;
2522         struct tcf_block *block;
2523         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2524         long index_start;
2525         long index;
2526         u32 parent;
2527         int err;
2528
2529         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2530                 return skb->len;
2531
2532         err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2533                                      NULL, cb->extack);
2534         if (err)
2535                 return err;
2536
2537         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2538                 block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2539                 if (!block)
2540                         goto out;
2541                 /* If we work with block index, q is NULL and parent value
2542                  * will never be used in the following code. The check
2543                  * in tcf_fill_node prevents it. However, compiler does not
2544                  * see that far, so set parent to zero to silence the warning
2545                  * about parent being uninitialized.
2546                  */
2547                 parent = 0;
2548         } else {
2549                 const struct Qdisc_class_ops *cops;
2550                 struct net_device *dev;
2551                 unsigned long cl = 0;
2552
2553                 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2554                 if (!dev)
2555                         return skb->len;
2556
2557                 parent = tcm->tcm_parent;
2558                 if (!parent) {
2559                         q = dev->qdisc;
2560                         parent = q->handle;
2561                 } else {
2562                         q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2563                 }
2564                 if (!q)
2565                         goto out;
2566                 cops = q->ops->cl_ops;
2567                 if (!cops)
2568                         goto out;
2569                 if (!cops->tcf_block)
2570                         goto out;
2571                 if (TC_H_MIN(tcm->tcm_parent)) {
2572                         cl = cops->find(q, tcm->tcm_parent);
2573                         if (cl == 0)
2574                                 goto out;
2575                 }
2576                 block = cops->tcf_block(q, cl, NULL);
2577                 if (!block)
2578                         goto out;
2579                 if (tcf_block_shared(block))
2580                         q = NULL;
2581         }
2582
2583         index_start = cb->args[0];
2584         index = 0;
2585
2586         for (chain = __tcf_get_next_chain(block, NULL);
2587              chain;
2588              chain_prev = chain,
2589                      chain = __tcf_get_next_chain(block, chain),
2590                      tcf_chain_put(chain_prev)) {
2591                 if (tca[TCA_CHAIN] &&
2592                     nla_get_u32(tca[TCA_CHAIN]) != chain->index)
2593                         continue;
2594                 if (!tcf_chain_dump(chain, q, parent, skb, cb,
2595                                     index_start, &index)) {
2596                         tcf_chain_put(chain);
2597                         err = -EMSGSIZE;
2598                         break;
2599                 }
2600         }
2601
2602         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2603                 tcf_block_refcnt_put(block, true);
2604         cb->args[0] = index;
2605
2606 out:
2607         /* If we did no progress, the error (EMSGSIZE) is real */
2608         if (skb->len == 0 && err)
2609                 return err;
2610         return skb->len;
2611 }
2612
2613 static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
2614                               void *tmplt_priv, u32 chain_index,
2615                               struct net *net, struct sk_buff *skb,
2616                               struct tcf_block *block,
2617                               u32 portid, u32 seq, u16 flags, int event)
2618 {
2619         unsigned char *b = skb_tail_pointer(skb);
2620         const struct tcf_proto_ops *ops;
2621         struct nlmsghdr *nlh;
2622         struct tcmsg *tcm;
2623         void *priv;
2624
2625         ops = tmplt_ops;
2626         priv = tmplt_priv;
2627
2628         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
2629         if (!nlh)
2630                 goto out_nlmsg_trim;
2631         tcm = nlmsg_data(nlh);
2632         tcm->tcm_family = AF_UNSPEC;
2633         tcm->tcm__pad1 = 0;
2634         tcm->tcm__pad2 = 0;
2635         tcm->tcm_handle = 0;
2636         if (block->q) {
2637                 tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
2638                 tcm->tcm_parent = block->q->handle;
2639         } else {
2640                 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
2641                 tcm->tcm_block_index = block->index;
2642         }
2643
2644         if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2645                 goto nla_put_failure;
2646
2647         if (ops) {
2648                 if (nla_put_string(skb, TCA_KIND, ops->kind))
2649                         goto nla_put_failure;
2650                 if (ops->tmplt_dump(skb, net, priv) < 0)
2651                         goto nla_put_failure;
2652         }
2653
2654         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2655         return skb->len;
2656
2657 out_nlmsg_trim:
2658 nla_put_failure:
2659         nlmsg_trim(skb, b);
2660         return -EMSGSIZE;
2661 }
2662
2663 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
2664                            u32 seq, u16 flags, int event, bool unicast)
2665 {
2666         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2667         struct tcf_block *block = chain->block;
2668         struct net *net = block->net;
2669         struct sk_buff *skb;
2670         int err = 0;
2671
2672         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2673         if (!skb)
2674                 return -ENOBUFS;
2675
2676         if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2677                                chain->index, net, skb, block, portid,
2678                                seq, flags, event) <= 0) {
2679                 kfree_skb(skb);
2680                 return -EINVAL;
2681         }
2682
2683         if (unicast)
2684                 err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2685         else
2686                 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2687                                      flags & NLM_F_ECHO);
2688
2689         if (err > 0)
2690                 err = 0;
2691         return err;
2692 }
2693
2694 static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
2695                                   void *tmplt_priv, u32 chain_index,
2696                                   struct tcf_block *block, struct sk_buff *oskb,
2697                                   u32 seq, u16 flags, bool unicast)
2698 {
2699         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2700         struct net *net = block->net;
2701         struct sk_buff *skb;
2702
2703         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2704         if (!skb)
2705                 return -ENOBUFS;
2706
2707         if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
2708                                block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
2709                 kfree_skb(skb);
2710                 return -EINVAL;
2711         }
2712
2713         if (unicast)
2714                 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2715
2716         return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
2717 }
2718
2719 static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
2720                               struct nlattr **tca,
2721                               struct netlink_ext_ack *extack)
2722 {
2723         const struct tcf_proto_ops *ops;
2724         void *tmplt_priv;
2725
2726         /* If kind is not set, user did not specify template. */
2727         if (!tca[TCA_KIND])
2728                 return 0;
2729
2730         ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
2731         if (IS_ERR(ops))
2732                 return PTR_ERR(ops);
2733         if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
2734                 NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
2735                 return -EOPNOTSUPP;
2736         }
2737
2738         tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
2739         if (IS_ERR(tmplt_priv)) {
2740                 module_put(ops->owner);
2741                 return PTR_ERR(tmplt_priv);
2742         }
2743         chain->tmplt_ops = ops;
2744         chain->tmplt_priv = tmplt_priv;
2745         return 0;
2746 }
2747
2748 static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
2749                                void *tmplt_priv)
2750 {
2751         /* If template ops are set, no work to do for us. */
2752         if (!tmplt_ops)
2753                 return;
2754
2755         tmplt_ops->tmplt_destroy(tmplt_priv);
2756         module_put(tmplt_ops->owner);
2757 }
2758
2759 /* Add/delete/get a chain */
2760
2761 static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
2762                         struct netlink_ext_ack *extack)
2763 {
2764         struct net *net = sock_net(skb->sk);
2765         struct nlattr *tca[TCA_MAX + 1];
2766         struct tcmsg *t;
2767         u32 parent;
2768         u32 chain_index;
2769         struct Qdisc *q = NULL;
2770         struct tcf_chain *chain = NULL;
2771         struct tcf_block *block;
2772         unsigned long cl;
2773         int err;
2774
2775         if (n->nlmsg_type != RTM_GETCHAIN &&
2776             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2777                 return -EPERM;
2778
2779 replay:
2780         err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2781                                      rtm_tca_policy, extack);
2782         if (err < 0)
2783                 return err;
2784
2785         t = nlmsg_data(n);
2786         parent = t->tcm_parent;
2787         cl = 0;
2788
2789         block = tcf_block_find(net, &q, &parent, &cl,
2790                                t->tcm_ifindex, t->tcm_block_index, extack);
2791         if (IS_ERR(block))
2792                 return PTR_ERR(block);
2793
2794         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2795         if (chain_index > TC_ACT_EXT_VAL_MASK) {
2796                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2797                 err = -EINVAL;
2798                 goto errout_block;
2799         }
2800
2801         mutex_lock(&block->lock);
2802         chain = tcf_chain_lookup(block, chain_index);
2803         if (n->nlmsg_type == RTM_NEWCHAIN) {
2804                 if (chain) {
2805                         if (tcf_chain_held_by_acts_only(chain)) {
2806                                 /* The chain exists only because there is
2807                                  * some action referencing it.
2808                                  */
2809                                 tcf_chain_hold(chain);
2810                         } else {
2811                                 NL_SET_ERR_MSG(extack, "Filter chain already exists");
2812                                 err = -EEXIST;
2813                                 goto errout_block_locked;
2814                         }
2815                 } else {
2816                         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2817                                 NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2818                                 err = -ENOENT;
2819                                 goto errout_block_locked;
2820                         }
2821                         chain = tcf_chain_create(block, chain_index);
2822                         if (!chain) {
2823                                 NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2824                                 err = -ENOMEM;
2825                                 goto errout_block_locked;
2826                         }
2827                 }
2828         } else {
2829                 if (!chain || tcf_chain_held_by_acts_only(chain)) {
2830                         NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2831                         err = -EINVAL;
2832                         goto errout_block_locked;
2833                 }
2834                 tcf_chain_hold(chain);
2835         }
2836
2837         if (n->nlmsg_type == RTM_NEWCHAIN) {
2838                 /* Modifying chain requires holding parent block lock. In case
2839                  * the chain was successfully added, take a reference to the
2840                  * chain. This ensures that an empty chain does not disappear at
2841                  * the end of this function.
2842                  */
2843                 tcf_chain_hold(chain);
2844                 chain->explicitly_created = true;
2845         }
2846         mutex_unlock(&block->lock);
2847
2848         switch (n->nlmsg_type) {
2849         case RTM_NEWCHAIN:
2850                 err = tc_chain_tmplt_add(chain, net, tca, extack);
2851                 if (err) {
2852                         tcf_chain_put_explicitly_created(chain);
2853                         goto errout;
2854                 }
2855
2856                 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
2857                                 RTM_NEWCHAIN, false);
2858                 break;
2859         case RTM_DELCHAIN:
2860                 tfilter_notify_chain(net, skb, block, q, parent, n,
2861                                      chain, RTM_DELTFILTER, true);
2862                 /* Flush the chain first as the user requested chain removal. */
2863                 tcf_chain_flush(chain, true);
2864                 /* In case the chain was successfully deleted, put a reference
2865                  * to the chain previously taken during addition.
2866                  */
2867                 tcf_chain_put_explicitly_created(chain);
2868                 break;
2869         case RTM_GETCHAIN:
2870                 err = tc_chain_notify(chain, skb, n->nlmsg_seq,
2871                                       n->nlmsg_seq, n->nlmsg_type, true);
2872                 if (err < 0)
2873                         NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
2874                 break;
2875         default:
2876                 err = -EOPNOTSUPP;
2877                 NL_SET_ERR_MSG(extack, "Unsupported message type");
2878                 goto errout;
2879         }
2880
2881 errout:
2882         tcf_chain_put(chain);
2883 errout_block:
2884         tcf_block_release(q, block, true);
2885         if (err == -EAGAIN)
2886                 /* Replay the request. */
2887                 goto replay;
2888         return err;
2889
2890 errout_block_locked:
2891         mutex_unlock(&block->lock);
2892         goto errout_block;
2893 }
2894
2895 /* called with RTNL */
2896 static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
2897 {
2898         struct net *net = sock_net(skb->sk);
2899         struct nlattr *tca[TCA_MAX + 1];
2900         struct Qdisc *q = NULL;
2901         struct tcf_block *block;
2902         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2903         struct tcf_chain *chain;
2904         long index_start;
2905         long index;
2906         u32 parent;
2907         int err;
2908
2909         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2910                 return skb->len;
2911
2912         err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2913                                      rtm_tca_policy, cb->extack);
2914         if (err)
2915                 return err;
2916
2917         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2918                 block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2919                 if (!block)
2920                         goto out;
2921                 /* If we work with block index, q is NULL and parent value
2922                  * will never be used in the following code. The check
2923                  * in tcf_fill_node prevents it. However, compiler does not
2924                  * see that far, so set parent to zero to silence the warning
2925                  * about parent being uninitialized.
2926                  */
2927                 parent = 0;
2928         } else {
2929                 const struct Qdisc_class_ops *cops;
2930                 struct net_device *dev;
2931                 unsigned long cl = 0;
2932
2933                 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2934                 if (!dev)
2935                         return skb->len;
2936
2937                 parent = tcm->tcm_parent;
2938                 if (!parent) {
2939                         q = dev->qdisc;
2940                         parent = q->handle;
2941                 } else {
2942                         q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2943                 }
2944                 if (!q)
2945                         goto out;
2946                 cops = q->ops->cl_ops;
2947                 if (!cops)
2948                         goto out;
2949                 if (!cops->tcf_block)
2950                         goto out;
2951                 if (TC_H_MIN(tcm->tcm_parent)) {
2952                         cl = cops->find(q, tcm->tcm_parent);
2953                         if (cl == 0)
2954                                 goto out;
2955                 }
2956                 block = cops->tcf_block(q, cl, NULL);
2957                 if (!block)
2958                         goto out;
2959                 if (tcf_block_shared(block))
2960                         q = NULL;
2961         }
2962
2963         index_start = cb->args[0];
2964         index = 0;
2965
2966         mutex_lock(&block->lock);
2967         list_for_each_entry(chain, &block->chain_list, list) {
2968                 if ((tca[TCA_CHAIN] &&
2969                      nla_get_u32(tca[TCA_CHAIN]) != chain->index))
2970                         continue;
2971                 if (index < index_start) {
2972                         index++;
2973                         continue;
2974                 }
2975                 if (tcf_chain_held_by_acts_only(chain))
2976                         continue;
2977                 err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2978                                          chain->index, net, skb, block,
2979                                          NETLINK_CB(cb->skb).portid,
2980                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
2981                                          RTM_NEWCHAIN);
2982                 if (err <= 0)
2983                         break;
2984                 index++;
2985         }
2986         mutex_unlock(&block->lock);
2987
2988         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2989                 tcf_block_refcnt_put(block, true);
2990         cb->args[0] = index;
2991
2992 out:
2993         /* If we did no progress, the error (EMSGSIZE) is real */
2994         if (skb->len == 0 && err)
2995                 return err;
2996         return skb->len;
2997 }
2998
2999 void tcf_exts_destroy(struct tcf_exts *exts)
3000 {
3001 #ifdef CONFIG_NET_CLS_ACT
3002         if (exts->actions) {
3003                 tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
3004                 kfree(exts->actions);
3005         }
3006         exts->nr_actions = 0;
3007 #endif
3008 }
3009 EXPORT_SYMBOL(tcf_exts_destroy);
3010
3011 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3012                       struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
3013                       bool rtnl_held, struct netlink_ext_ack *extack)
3014 {
3015 #ifdef CONFIG_NET_CLS_ACT
3016         {
3017                 struct tc_action *act;
3018                 size_t attr_size = 0;
3019
3020                 if (exts->police && tb[exts->police]) {
3021                         act = tcf_action_init_1(net, tp, tb[exts->police],
3022                                                 rate_tlv, "police", ovr,
3023                                                 TCA_ACT_BIND, rtnl_held,
3024                                                 extack);
3025                         if (IS_ERR(act))
3026                                 return PTR_ERR(act);
3027
3028                         act->type = exts->type = TCA_OLD_COMPAT;
3029                         exts->actions[0] = act;
3030                         exts->nr_actions = 1;
3031                 } else if (exts->action && tb[exts->action]) {
3032                         int err;
3033
3034                         err = tcf_action_init(net, tp, tb[exts->action],
3035                                               rate_tlv, NULL, ovr, TCA_ACT_BIND,
3036                                               exts->actions, &attr_size,
3037                                               rtnl_held, extack);
3038                         if (err < 0)
3039                                 return err;
3040                         exts->nr_actions = err;
3041                 }
3042         }
3043 #else
3044         if ((exts->action && tb[exts->action]) ||
3045             (exts->police && tb[exts->police])) {
3046                 NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
3047                 return -EOPNOTSUPP;
3048         }
3049 #endif
3050
3051         return 0;
3052 }
3053 EXPORT_SYMBOL(tcf_exts_validate);
3054
3055 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
3056 {
3057 #ifdef CONFIG_NET_CLS_ACT
3058         struct tcf_exts old = *dst;
3059
3060         *dst = *src;
3061         tcf_exts_destroy(&old);
3062 #endif
3063 }
3064 EXPORT_SYMBOL(tcf_exts_change);
3065
3066 #ifdef CONFIG_NET_CLS_ACT
3067 static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
3068 {
3069         if (exts->nr_actions == 0)
3070                 return NULL;
3071         else
3072                 return exts->actions[0];
3073 }
3074 #endif
3075
3076 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
3077 {
3078 #ifdef CONFIG_NET_CLS_ACT
3079         struct nlattr *nest;
3080
3081         if (exts->action && tcf_exts_has_actions(exts)) {
3082                 /*
3083                  * again for backward compatible mode - we want
3084                  * to work with both old and new modes of entering
3085                  * tc data even if iproute2  was newer - jhs
3086                  */
3087                 if (exts->type != TCA_OLD_COMPAT) {
3088                         nest = nla_nest_start_noflag(skb, exts->action);
3089                         if (nest == NULL)
3090                                 goto nla_put_failure;
3091
3092                         if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
3093                                 goto nla_put_failure;
3094                         nla_nest_end(skb, nest);
3095                 } else if (exts->police) {
3096                         struct tc_action *act = tcf_exts_first_act(exts);
3097                         nest = nla_nest_start_noflag(skb, exts->police);
3098                         if (nest == NULL || !act)
3099                                 goto nla_put_failure;
3100                         if (tcf_action_dump_old(skb, act, 0, 0) < 0)
3101                                 goto nla_put_failure;
3102                         nla_nest_end(skb, nest);
3103                 }
3104         }
3105         return 0;
3106
3107 nla_put_failure:
3108         nla_nest_cancel(skb, nest);
3109         return -1;
3110 #else
3111         return 0;
3112 #endif
3113 }
3114 EXPORT_SYMBOL(tcf_exts_dump);
3115
3116
3117 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
3118 {
3119 #ifdef CONFIG_NET_CLS_ACT
3120         struct tc_action *a = tcf_exts_first_act(exts);
3121         if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
3122                 return -1;
3123 #endif
3124         return 0;
3125 }
3126 EXPORT_SYMBOL(tcf_exts_dump_stats);
3127
3128 static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
3129 {
3130         if (*flags & TCA_CLS_FLAGS_IN_HW)
3131                 return;
3132         *flags |= TCA_CLS_FLAGS_IN_HW;
3133         atomic_inc(&block->offloadcnt);
3134 }
3135
3136 static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
3137 {
3138         if (!(*flags & TCA_CLS_FLAGS_IN_HW))
3139                 return;
3140         *flags &= ~TCA_CLS_FLAGS_IN_HW;
3141         atomic_dec(&block->offloadcnt);
3142 }
3143
3144 static void tc_cls_offload_cnt_update(struct tcf_block *block,
3145                                       struct tcf_proto *tp, u32 *cnt,
3146                                       u32 *flags, u32 diff, bool add)
3147 {
3148         lockdep_assert_held(&block->cb_lock);
3149
3150         spin_lock(&tp->lock);
3151         if (add) {
3152                 if (!*cnt)
3153                         tcf_block_offload_inc(block, flags);
3154                 *cnt += diff;
3155         } else {
3156                 *cnt -= diff;
3157                 if (!*cnt)
3158                         tcf_block_offload_dec(block, flags);
3159         }
3160         spin_unlock(&tp->lock);
3161 }
3162
3163 static void
3164 tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
3165                          u32 *cnt, u32 *flags)
3166 {
3167         lockdep_assert_held(&block->cb_lock);
3168
3169         spin_lock(&tp->lock);
3170         tcf_block_offload_dec(block, flags);
3171         *cnt = 0;
3172         spin_unlock(&tp->lock);
3173 }
3174
3175 static int
3176 __tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3177                    void *type_data, bool err_stop)
3178 {
3179         struct flow_block_cb *block_cb;
3180         int ok_count = 0;
3181         int err;
3182
3183         list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
3184                 err = block_cb->cb(type, type_data, block_cb->cb_priv);
3185                 if (err) {
3186                         if (err_stop)
3187                                 return err;
3188                 } else {
3189                         ok_count++;
3190                 }
3191         }
3192         return ok_count;
3193 }
3194
3195 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3196                      void *type_data, bool err_stop, bool rtnl_held)
3197 {
3198         bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3199         int ok_count;
3200
3201 retry:
3202         if (take_rtnl)
3203                 rtnl_lock();
3204         down_read(&block->cb_lock);
3205         /* Need to obtain rtnl lock if block is bound to devs that require it.
3206          * In block bind code cb_lock is obtained while holding rtnl, so we must
3207          * obtain the locks in same order here.
3208          */
3209         if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3210                 up_read(&block->cb_lock);
3211                 take_rtnl = true;
3212                 goto retry;
3213         }
3214
3215         ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3216
3217         up_read(&block->cb_lock);
3218         if (take_rtnl)
3219                 rtnl_unlock();
3220         return ok_count;
3221 }
3222 EXPORT_SYMBOL(tc_setup_cb_call);
3223
3224 /* Non-destructive filter add. If filter that wasn't already in hardware is
3225  * successfully offloaded, increment block offloads counter. On failure,
3226  * previously offloaded filter is considered to be intact and offloads counter
3227  * is not decremented.
3228  */
3229
3230 int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
3231                     enum tc_setup_type type, void *type_data, bool err_stop,
3232                     u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3233 {
3234         bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3235         int ok_count;
3236
3237 retry:
3238         if (take_rtnl)
3239                 rtnl_lock();
3240         down_read(&block->cb_lock);
3241         /* Need to obtain rtnl lock if block is bound to devs that require it.
3242          * In block bind code cb_lock is obtained while holding rtnl, so we must
3243          * obtain the locks in same order here.
3244          */
3245         if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3246                 up_read(&block->cb_lock);
3247                 take_rtnl = true;
3248                 goto retry;
3249         }
3250
3251         /* Make sure all netdevs sharing this block are offload-capable. */
3252         if (block->nooffloaddevcnt && err_stop) {
3253                 ok_count = -EOPNOTSUPP;
3254                 goto err_unlock;
3255         }
3256
3257         ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3258         if (ok_count < 0)
3259                 goto err_unlock;
3260
3261         if (tp->ops->hw_add)
3262                 tp->ops->hw_add(tp, type_data);
3263         if (ok_count > 0)
3264                 tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
3265                                           ok_count, true);
3266 err_unlock:
3267         up_read(&block->cb_lock);
3268         if (take_rtnl)
3269                 rtnl_unlock();
3270         return ok_count < 0 ? ok_count : 0;
3271 }
3272 EXPORT_SYMBOL(tc_setup_cb_add);
3273
3274 /* Destructive filter replace. If filter that wasn't already in hardware is
3275  * successfully offloaded, increment block offload counter. On failure,
3276  * previously offloaded filter is considered to be destroyed and offload counter
3277  * is decremented.
3278  */
3279
3280 int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
3281                         enum tc_setup_type type, void *type_data, bool err_stop,
3282                         u32 *old_flags, unsigned int *old_in_hw_count,
3283                         u32 *new_flags, unsigned int *new_in_hw_count,
3284                         bool rtnl_held)
3285 {
3286         bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3287         int ok_count;
3288
3289 retry:
3290         if (take_rtnl)
3291                 rtnl_lock();
3292         down_read(&block->cb_lock);
3293         /* Need to obtain rtnl lock if block is bound to devs that require it.
3294          * In block bind code cb_lock is obtained while holding rtnl, so we must
3295          * obtain the locks in same order here.
3296          */
3297         if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3298                 up_read(&block->cb_lock);
3299                 take_rtnl = true;
3300                 goto retry;
3301         }
3302
3303         /* Make sure all netdevs sharing this block are offload-capable. */
3304         if (block->nooffloaddevcnt && err_stop) {
3305                 ok_count = -EOPNOTSUPP;
3306                 goto err_unlock;
3307         }
3308
3309         tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
3310         if (tp->ops->hw_del)
3311                 tp->ops->hw_del(tp, type_data);
3312
3313         ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3314         if (ok_count < 0)
3315                 goto err_unlock;
3316
3317         if (tp->ops->hw_add)
3318                 tp->ops->hw_add(tp, type_data);
3319         if (ok_count > 0)
3320                 tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
3321                                           new_flags, ok_count, true);
3322 err_unlock:
3323         up_read(&block->cb_lock);
3324         if (take_rtnl)
3325                 rtnl_unlock();
3326         return ok_count < 0 ? ok_count : 0;
3327 }
3328 EXPORT_SYMBOL(tc_setup_cb_replace);
3329
3330 /* Destroy filter and decrement block offload counter, if filter was previously
3331  * offloaded.
3332  */
3333
3334 int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
3335                         enum tc_setup_type type, void *type_data, bool err_stop,
3336                         u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3337 {
3338         bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3339         int ok_count;
3340
3341 retry:
3342         if (take_rtnl)
3343                 rtnl_lock();
3344         down_read(&block->cb_lock);
3345         /* Need to obtain rtnl lock if block is bound to devs that require it.
3346          * In block bind code cb_lock is obtained while holding rtnl, so we must
3347          * obtain the locks in same order here.
3348          */
3349         if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3350                 up_read(&block->cb_lock);
3351                 take_rtnl = true;
3352                 goto retry;
3353         }
3354
3355         ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3356
3357         tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
3358         if (tp->ops->hw_del)
3359                 tp->ops->hw_del(tp, type_data);
3360
3361         up_read(&block->cb_lock);
3362         if (take_rtnl)
3363                 rtnl_unlock();
3364         return ok_count < 0 ? ok_count : 0;
3365 }
3366 EXPORT_SYMBOL(tc_setup_cb_destroy);
3367
3368 int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
3369                           bool add, flow_setup_cb_t *cb,
3370                           enum tc_setup_type type, void *type_data,
3371                           void *cb_priv, u32 *flags, unsigned int *in_hw_count)
3372 {
3373         int err = cb(type, type_data, cb_priv);
3374
3375         if (err) {
3376                 if (add && tc_skip_sw(*flags))
3377                         return err;
3378         } else {
3379                 tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
3380                                           add);
3381         }
3382
3383         return 0;
3384 }
3385 EXPORT_SYMBOL(tc_setup_cb_reoffload);
3386
3387 void tc_cleanup_flow_action(struct flow_action *flow_action)
3388 {
3389         struct flow_action_entry *entry;
3390         int i;
3391
3392         flow_action_for_each(i, entry, flow_action)
3393                 if (entry->destructor)
3394                         entry->destructor(entry->destructor_priv);
3395 }
3396 EXPORT_SYMBOL(tc_cleanup_flow_action);
3397
3398 static void tcf_mirred_get_dev(struct flow_action_entry *entry,
3399                                const struct tc_action *act)
3400 {
3401 #ifdef CONFIG_NET_CLS_ACT
3402         entry->dev = act->ops->get_dev(act, &entry->destructor);
3403         if (!entry->dev)
3404                 return;
3405         entry->destructor_priv = entry->dev;
3406 #endif
3407 }
3408
3409 static void tcf_tunnel_encap_put_tunnel(void *priv)
3410 {
3411         struct ip_tunnel_info *tunnel = priv;
3412
3413         kfree(tunnel);
3414 }
3415
3416 static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
3417                                        const struct tc_action *act)
3418 {
3419         entry->tunnel = tcf_tunnel_info_copy(act);
3420         if (!entry->tunnel)
3421                 return -ENOMEM;
3422         entry->destructor = tcf_tunnel_encap_put_tunnel;
3423         entry->destructor_priv = entry->tunnel;
3424         return 0;
3425 }
3426
3427 static void tcf_sample_get_group(struct flow_action_entry *entry,
3428                                  const struct tc_action *act)
3429 {
3430 #ifdef CONFIG_NET_CLS_ACT
3431         entry->sample.psample_group =
3432                 act->ops->get_psample_group(act, &entry->destructor);
3433         entry->destructor_priv = entry->sample.psample_group;
3434 #endif
3435 }
3436
3437 int tc_setup_flow_action(struct flow_action *flow_action,
3438                          const struct tcf_exts *exts, bool rtnl_held)
3439 {
3440         const struct tc_action *act;
3441         int i, j, k, err = 0;
3442
3443         if (!exts)
3444                 return 0;
3445
3446         if (!rtnl_held)
3447                 rtnl_lock();
3448
3449         j = 0;
3450         tcf_exts_for_each_action(i, act, exts) {
3451                 struct flow_action_entry *entry;
3452
3453                 entry = &flow_action->entries[j];
3454                 if (is_tcf_gact_ok(act)) {
3455                         entry->id = FLOW_ACTION_ACCEPT;
3456                 } else if (is_tcf_gact_shot(act)) {
3457                         entry->id = FLOW_ACTION_DROP;
3458                 } else if (is_tcf_gact_trap(act)) {
3459                         entry->id = FLOW_ACTION_TRAP;
3460                 } else if (is_tcf_gact_goto_chain(act)) {
3461                         entry->id = FLOW_ACTION_GOTO;
3462                         entry->chain_index = tcf_gact_goto_chain_index(act);
3463                 } else if (is_tcf_mirred_egress_redirect(act)) {
3464                         entry->id = FLOW_ACTION_REDIRECT;
3465                         tcf_mirred_get_dev(entry, act);
3466                 } else if (is_tcf_mirred_egress_mirror(act)) {
3467                         entry->id = FLOW_ACTION_MIRRED;
3468                         tcf_mirred_get_dev(entry, act);
3469                 } else if (is_tcf_mirred_ingress_redirect(act)) {
3470                         entry->id = FLOW_ACTION_REDIRECT_INGRESS;
3471                         tcf_mirred_get_dev(entry, act);
3472                 } else if (is_tcf_mirred_ingress_mirror(act)) {
3473                         entry->id = FLOW_ACTION_MIRRED_INGRESS;
3474                         tcf_mirred_get_dev(entry, act);
3475                 } else if (is_tcf_vlan(act)) {
3476                         switch (tcf_vlan_action(act)) {
3477                         case TCA_VLAN_ACT_PUSH:
3478                                 entry->id = FLOW_ACTION_VLAN_PUSH;
3479                                 entry->vlan.vid = tcf_vlan_push_vid(act);
3480                                 entry->vlan.proto = tcf_vlan_push_proto(act);
3481                                 entry->vlan.prio = tcf_vlan_push_prio(act);
3482                                 break;
3483                         case TCA_VLAN_ACT_POP:
3484                                 entry->id = FLOW_ACTION_VLAN_POP;
3485                                 break;
3486                         case TCA_VLAN_ACT_MODIFY:
3487                                 entry->id = FLOW_ACTION_VLAN_MANGLE;
3488                                 entry->vlan.vid = tcf_vlan_push_vid(act);
3489                                 entry->vlan.proto = tcf_vlan_push_proto(act);
3490                                 entry->vlan.prio = tcf_vlan_push_prio(act);
3491                                 break;
3492                         default:
3493                                 err = -EOPNOTSUPP;
3494                                 goto err_out;
3495                         }
3496                 } else if (is_tcf_tunnel_set(act)) {
3497                         entry->id = FLOW_ACTION_TUNNEL_ENCAP;
3498                         err = tcf_tunnel_encap_get_tunnel(entry, act);
3499                         if (err)
3500                                 goto err_out;
3501                 } else if (is_tcf_tunnel_release(act)) {
3502                         entry->id = FLOW_ACTION_TUNNEL_DECAP;
3503                 } else if (is_tcf_pedit(act)) {
3504                         for (k = 0; k < tcf_pedit_nkeys(act); k++) {
3505                                 switch (tcf_pedit_cmd(act, k)) {
3506                                 case TCA_PEDIT_KEY_EX_CMD_SET:
3507                                         entry->id = FLOW_ACTION_MANGLE;
3508                                         break;
3509                                 case TCA_PEDIT_KEY_EX_CMD_ADD:
3510                                         entry->id = FLOW_ACTION_ADD;
3511                                         break;
3512                                 default:
3513                                         err = -EOPNOTSUPP;
3514                                         goto err_out;
3515                                 }
3516                                 entry->mangle.htype = tcf_pedit_htype(act, k);
3517                                 entry->mangle.mask = tcf_pedit_mask(act, k);
3518                                 entry->mangle.val = tcf_pedit_val(act, k);
3519                                 entry->mangle.offset = tcf_pedit_offset(act, k);
3520                                 entry = &flow_action->entries[++j];
3521                         }
3522                 } else if (is_tcf_csum(act)) {
3523                         entry->id = FLOW_ACTION_CSUM;
3524                         entry->csum_flags = tcf_csum_update_flags(act);
3525                 } else if (is_tcf_skbedit_mark(act)) {
3526                         entry->id = FLOW_ACTION_MARK;
3527                         entry->mark = tcf_skbedit_mark(act);
3528                 } else if (is_tcf_sample(act)) {
3529                         entry->id = FLOW_ACTION_SAMPLE;
3530                         entry->sample.trunc_size = tcf_sample_trunc_size(act);
3531                         entry->sample.truncate = tcf_sample_truncate(act);
3532                         entry->sample.rate = tcf_sample_rate(act);
3533                         tcf_sample_get_group(entry, act);
3534                 } else if (is_tcf_police(act)) {
3535                         entry->id = FLOW_ACTION_POLICE;
3536                         entry->police.burst = tcf_police_tcfp_burst(act);
3537                         entry->police.rate_bytes_ps =
3538                                 tcf_police_rate_bytes_ps(act);
3539                 } else if (is_tcf_ct(act)) {
3540                         entry->id = FLOW_ACTION_CT;
3541                         entry->ct.action = tcf_ct_action(act);
3542                         entry->ct.zone = tcf_ct_zone(act);
3543                 } else if (is_tcf_mpls(act)) {
3544                         switch (tcf_mpls_action(act)) {
3545                         case TCA_MPLS_ACT_PUSH:
3546                                 entry->id = FLOW_ACTION_MPLS_PUSH;
3547                                 entry->mpls_push.proto = tcf_mpls_proto(act);
3548                                 entry->mpls_push.label = tcf_mpls_label(act);
3549                                 entry->mpls_push.tc = tcf_mpls_tc(act);
3550                                 entry->mpls_push.bos = tcf_mpls_bos(act);
3551                                 entry->mpls_push.ttl = tcf_mpls_ttl(act);
3552                                 break;
3553                         case TCA_MPLS_ACT_POP:
3554                                 entry->id = FLOW_ACTION_MPLS_POP;
3555                                 entry->mpls_pop.proto = tcf_mpls_proto(act);
3556                                 break;
3557                         case TCA_MPLS_ACT_MODIFY:
3558                                 entry->id = FLOW_ACTION_MPLS_MANGLE;
3559                                 entry->mpls_mangle.label = tcf_mpls_label(act);
3560                                 entry->mpls_mangle.tc = tcf_mpls_tc(act);
3561                                 entry->mpls_mangle.bos = tcf_mpls_bos(act);
3562                                 entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
3563                                 break;
3564                         default:
3565                                 goto err_out;
3566                         }
3567                 } else if (is_tcf_skbedit_ptype(act)) {
3568                         entry->id = FLOW_ACTION_PTYPE;
3569                         entry->ptype = tcf_skbedit_ptype(act);
3570                 } else {
3571                         err = -EOPNOTSUPP;
3572                         goto err_out;
3573                 }
3574
3575                 if (!is_tcf_pedit(act))
3576                         j++;
3577         }
3578
3579 err_out:
3580         if (!rtnl_held)
3581                 rtnl_unlock();
3582
3583         if (err)
3584                 tc_cleanup_flow_action(flow_action);
3585
3586         return err;
3587 }
3588 EXPORT_SYMBOL(tc_setup_flow_action);
3589
3590 unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
3591 {
3592         unsigned int num_acts = 0;
3593         struct tc_action *act;
3594         int i;
3595
3596         tcf_exts_for_each_action(i, act, exts) {
3597                 if (is_tcf_pedit(act))
3598                         num_acts += tcf_pedit_nkeys(act);
3599                 else
3600                         num_acts++;
3601         }
3602         return num_acts;
3603 }
3604 EXPORT_SYMBOL(tcf_exts_num_actions);
3605
3606 static __net_init int tcf_net_init(struct net *net)
3607 {
3608         struct tcf_net *tn = net_generic(net, tcf_net_id);
3609
3610         spin_lock_init(&tn->idr_lock);
3611         idr_init(&tn->idr);
3612         return 0;
3613 }
3614
3615 static void __net_exit tcf_net_exit(struct net *net)
3616 {
3617         struct tcf_net *tn = net_generic(net, tcf_net_id);
3618
3619         idr_destroy(&tn->idr);
3620 }
3621
3622 static struct pernet_operations tcf_net_ops = {
3623         .init = tcf_net_init,
3624         .exit = tcf_net_exit,
3625         .id   = &tcf_net_id,
3626         .size = sizeof(struct tcf_net),
3627 };
3628
3629 static struct flow_indr_block_ing_entry block_ing_entry = {
3630         .cb = tc_indr_block_get_and_ing_cmd,
3631         .list = LIST_HEAD_INIT(block_ing_entry.list),
3632 };
3633
3634 static int __init tc_filter_init(void)
3635 {
3636         int err;
3637
3638         tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
3639         if (!tc_filter_wq)
3640                 return -ENOMEM;
3641
3642         err = register_pernet_subsys(&tcf_net_ops);
3643         if (err)
3644                 goto err_register_pernet_subsys;
3645
3646         flow_indr_add_block_ing_cb(&block_ing_entry);
3647
3648         rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
3649                       RTNL_FLAG_DOIT_UNLOCKED);
3650         rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
3651                       RTNL_FLAG_DOIT_UNLOCKED);
3652         rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
3653                       tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
3654         rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
3655         rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
3656         rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
3657                       tc_dump_chain, 0);
3658
3659         return 0;
3660
3661 err_register_pernet_subsys:
3662         destroy_workqueue(tc_filter_wq);
3663         return err;
3664 }
3665
3666 subsys_initcall(tc_filter_init);