]> asedeno.scripts.mit.edu Git - linux.git/blob - net/sched/cls_flower.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
[linux.git] / net / sched / cls_flower.c
1 /*
2  * net/sched/cls_flower.c               Flower classifier
3  *
4  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/rhashtable.h>
16 #include <linux/workqueue.h>
17
18 #include <linux/if_ether.h>
19 #include <linux/in6.h>
20 #include <linux/ip.h>
21
22 #include <net/sch_generic.h>
23 #include <net/pkt_cls.h>
24 #include <net/ip.h>
25 #include <net/flow_dissector.h>
26
27 #include <net/dst.h>
28 #include <net/dst_metadata.h>
29
30 struct fl_flow_key {
31         int     indev_ifindex;
32         struct flow_dissector_key_control control;
33         struct flow_dissector_key_control enc_control;
34         struct flow_dissector_key_basic basic;
35         struct flow_dissector_key_eth_addrs eth;
36         struct flow_dissector_key_vlan vlan;
37         union {
38                 struct flow_dissector_key_ipv4_addrs ipv4;
39                 struct flow_dissector_key_ipv6_addrs ipv6;
40         };
41         struct flow_dissector_key_ports tp;
42         struct flow_dissector_key_keyid enc_key_id;
43         union {
44                 struct flow_dissector_key_ipv4_addrs enc_ipv4;
45                 struct flow_dissector_key_ipv6_addrs enc_ipv6;
46         };
47         struct flow_dissector_key_ports enc_tp;
48 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
49
50 struct fl_flow_mask_range {
51         unsigned short int start;
52         unsigned short int end;
53 };
54
55 struct fl_flow_mask {
56         struct fl_flow_key key;
57         struct fl_flow_mask_range range;
58         struct rcu_head rcu;
59 };
60
61 struct cls_fl_head {
62         struct rhashtable ht;
63         struct fl_flow_mask mask;
64         struct flow_dissector dissector;
65         u32 hgen;
66         bool mask_assigned;
67         struct list_head filters;
68         struct rhashtable_params ht_params;
69         union {
70                 struct work_struct work;
71                 struct rcu_head rcu;
72         };
73 };
74
75 struct cls_fl_filter {
76         struct rhash_head ht_node;
77         struct fl_flow_key mkey;
78         struct tcf_exts exts;
79         struct tcf_result res;
80         struct fl_flow_key key;
81         struct list_head list;
82         u32 handle;
83         u32 flags;
84         struct rcu_head rcu;
85         struct tc_to_netdev tc;
86         struct net_device *hw_dev;
87 };
88
89 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
90 {
91         return mask->range.end - mask->range.start;
92 }
93
94 static void fl_mask_update_range(struct fl_flow_mask *mask)
95 {
96         const u8 *bytes = (const u8 *) &mask->key;
97         size_t size = sizeof(mask->key);
98         size_t i, first = 0, last = size - 1;
99
100         for (i = 0; i < sizeof(mask->key); i++) {
101                 if (bytes[i]) {
102                         if (!first && i)
103                                 first = i;
104                         last = i;
105                 }
106         }
107         mask->range.start = rounddown(first, sizeof(long));
108         mask->range.end = roundup(last + 1, sizeof(long));
109 }
110
111 static void *fl_key_get_start(struct fl_flow_key *key,
112                               const struct fl_flow_mask *mask)
113 {
114         return (u8 *) key + mask->range.start;
115 }
116
117 static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
118                               struct fl_flow_mask *mask)
119 {
120         const long *lkey = fl_key_get_start(key, mask);
121         const long *lmask = fl_key_get_start(&mask->key, mask);
122         long *lmkey = fl_key_get_start(mkey, mask);
123         int i;
124
125         for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
126                 *lmkey++ = *lkey++ & *lmask++;
127 }
128
129 static void fl_clear_masked_range(struct fl_flow_key *key,
130                                   struct fl_flow_mask *mask)
131 {
132         memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
133 }
134
135 static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
136                        struct tcf_result *res)
137 {
138         struct cls_fl_head *head = rcu_dereference_bh(tp->root);
139         struct cls_fl_filter *f;
140         struct fl_flow_key skb_key;
141         struct fl_flow_key skb_mkey;
142         struct ip_tunnel_info *info;
143
144         if (!atomic_read(&head->ht.nelems))
145                 return -1;
146
147         fl_clear_masked_range(&skb_key, &head->mask);
148
149         info = skb_tunnel_info(skb);
150         if (info) {
151                 struct ip_tunnel_key *key = &info->key;
152
153                 switch (ip_tunnel_info_af(info)) {
154                 case AF_INET:
155                         skb_key.enc_ipv4.src = key->u.ipv4.src;
156                         skb_key.enc_ipv4.dst = key->u.ipv4.dst;
157                         break;
158                 case AF_INET6:
159                         skb_key.enc_ipv6.src = key->u.ipv6.src;
160                         skb_key.enc_ipv6.dst = key->u.ipv6.dst;
161                         break;
162                 }
163
164                 skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
165                 skb_key.enc_tp.src = key->tp_src;
166                 skb_key.enc_tp.dst = key->tp_dst;
167         }
168
169         skb_key.indev_ifindex = skb->skb_iif;
170         /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
171          * so do it rather here.
172          */
173         skb_key.basic.n_proto = skb->protocol;
174         skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
175
176         fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
177
178         f = rhashtable_lookup_fast(&head->ht,
179                                    fl_key_get_start(&skb_mkey, &head->mask),
180                                    head->ht_params);
181         if (f && !tc_skip_sw(f->flags)) {
182                 *res = f->res;
183                 return tcf_exts_exec(skb, &f->exts, res);
184         }
185         return -1;
186 }
187
188 static int fl_init(struct tcf_proto *tp)
189 {
190         struct cls_fl_head *head;
191
192         head = kzalloc(sizeof(*head), GFP_KERNEL);
193         if (!head)
194                 return -ENOBUFS;
195
196         INIT_LIST_HEAD_RCU(&head->filters);
197         rcu_assign_pointer(tp->root, head);
198
199         return 0;
200 }
201
202 static void fl_destroy_filter(struct rcu_head *head)
203 {
204         struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
205
206         tcf_exts_destroy(&f->exts);
207         kfree(f);
208 }
209
210 static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
211 {
212         struct tc_cls_flower_offload offload = {0};
213         struct net_device *dev = f->hw_dev;
214         struct tc_to_netdev *tc = &f->tc;
215
216         if (!tc_can_offload(dev, tp))
217                 return;
218
219         offload.command = TC_CLSFLOWER_DESTROY;
220         offload.cookie = (unsigned long)f;
221
222         tc->type = TC_SETUP_CLSFLOWER;
223         tc->cls_flower = &offload;
224
225         dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
226 }
227
228 static int fl_hw_replace_filter(struct tcf_proto *tp,
229                                 struct flow_dissector *dissector,
230                                 struct fl_flow_key *mask,
231                                 struct cls_fl_filter *f)
232 {
233         struct net_device *dev = tp->q->dev_queue->dev;
234         struct tc_cls_flower_offload offload = {0};
235         struct tc_to_netdev *tc = &f->tc;
236         int err;
237
238         if (!tc_can_offload(dev, tp)) {
239                 if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
240                     (f->hw_dev && !tc_can_offload(f->hw_dev, tp))) {
241                         f->hw_dev = dev;
242                         return tc_skip_sw(f->flags) ? -EINVAL : 0;
243                 }
244                 dev = f->hw_dev;
245                 tc->egress_dev = true;
246         } else {
247                 f->hw_dev = dev;
248         }
249
250         offload.command = TC_CLSFLOWER_REPLACE;
251         offload.cookie = (unsigned long)f;
252         offload.dissector = dissector;
253         offload.mask = mask;
254         offload.key = &f->key;
255         offload.exts = &f->exts;
256
257         tc->type = TC_SETUP_CLSFLOWER;
258         tc->cls_flower = &offload;
259
260         err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
261                                             tc);
262
263         if (tc_skip_sw(f->flags))
264                 return err;
265         return 0;
266 }
267
268 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
269 {
270         struct tc_cls_flower_offload offload = {0};
271         struct net_device *dev = f->hw_dev;
272         struct tc_to_netdev *tc = &f->tc;
273
274         if (!tc_can_offload(dev, tp))
275                 return;
276
277         offload.command = TC_CLSFLOWER_STATS;
278         offload.cookie = (unsigned long)f;
279         offload.exts = &f->exts;
280
281         tc->type = TC_SETUP_CLSFLOWER;
282         tc->cls_flower = &offload;
283
284         dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
285 }
286
287 static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
288 {
289         list_del_rcu(&f->list);
290         if (!tc_skip_hw(f->flags))
291                 fl_hw_destroy_filter(tp, f);
292         tcf_unbind_filter(tp, &f->res);
293         call_rcu(&f->rcu, fl_destroy_filter);
294 }
295
296 static void fl_destroy_sleepable(struct work_struct *work)
297 {
298         struct cls_fl_head *head = container_of(work, struct cls_fl_head,
299                                                 work);
300         if (head->mask_assigned)
301                 rhashtable_destroy(&head->ht);
302         kfree(head);
303         module_put(THIS_MODULE);
304 }
305
306 static void fl_destroy_rcu(struct rcu_head *rcu)
307 {
308         struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu);
309
310         INIT_WORK(&head->work, fl_destroy_sleepable);
311         schedule_work(&head->work);
312 }
313
314 static bool fl_destroy(struct tcf_proto *tp, bool force)
315 {
316         struct cls_fl_head *head = rtnl_dereference(tp->root);
317         struct cls_fl_filter *f, *next;
318
319         if (!force && !list_empty(&head->filters))
320                 return false;
321
322         list_for_each_entry_safe(f, next, &head->filters, list)
323                 __fl_delete(tp, f);
324
325         __module_get(THIS_MODULE);
326         call_rcu(&head->rcu, fl_destroy_rcu);
327
328         return true;
329 }
330
331 static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
332 {
333         struct cls_fl_head *head = rtnl_dereference(tp->root);
334         struct cls_fl_filter *f;
335
336         list_for_each_entry(f, &head->filters, list)
337                 if (f->handle == handle)
338                         return (unsigned long) f;
339         return 0;
340 }
341
342 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
343         [TCA_FLOWER_UNSPEC]             = { .type = NLA_UNSPEC },
344         [TCA_FLOWER_CLASSID]            = { .type = NLA_U32 },
345         [TCA_FLOWER_INDEV]              = { .type = NLA_STRING,
346                                             .len = IFNAMSIZ },
347         [TCA_FLOWER_KEY_ETH_DST]        = { .len = ETH_ALEN },
348         [TCA_FLOWER_KEY_ETH_DST_MASK]   = { .len = ETH_ALEN },
349         [TCA_FLOWER_KEY_ETH_SRC]        = { .len = ETH_ALEN },
350         [TCA_FLOWER_KEY_ETH_SRC_MASK]   = { .len = ETH_ALEN },
351         [TCA_FLOWER_KEY_ETH_TYPE]       = { .type = NLA_U16 },
352         [TCA_FLOWER_KEY_IP_PROTO]       = { .type = NLA_U8 },
353         [TCA_FLOWER_KEY_IPV4_SRC]       = { .type = NLA_U32 },
354         [TCA_FLOWER_KEY_IPV4_SRC_MASK]  = { .type = NLA_U32 },
355         [TCA_FLOWER_KEY_IPV4_DST]       = { .type = NLA_U32 },
356         [TCA_FLOWER_KEY_IPV4_DST_MASK]  = { .type = NLA_U32 },
357         [TCA_FLOWER_KEY_IPV6_SRC]       = { .len = sizeof(struct in6_addr) },
358         [TCA_FLOWER_KEY_IPV6_SRC_MASK]  = { .len = sizeof(struct in6_addr) },
359         [TCA_FLOWER_KEY_IPV6_DST]       = { .len = sizeof(struct in6_addr) },
360         [TCA_FLOWER_KEY_IPV6_DST_MASK]  = { .len = sizeof(struct in6_addr) },
361         [TCA_FLOWER_KEY_TCP_SRC]        = { .type = NLA_U16 },
362         [TCA_FLOWER_KEY_TCP_DST]        = { .type = NLA_U16 },
363         [TCA_FLOWER_KEY_UDP_SRC]        = { .type = NLA_U16 },
364         [TCA_FLOWER_KEY_UDP_DST]        = { .type = NLA_U16 },
365         [TCA_FLOWER_KEY_VLAN_ID]        = { .type = NLA_U16 },
366         [TCA_FLOWER_KEY_VLAN_PRIO]      = { .type = NLA_U8 },
367         [TCA_FLOWER_KEY_VLAN_ETH_TYPE]  = { .type = NLA_U16 },
368         [TCA_FLOWER_KEY_ENC_KEY_ID]     = { .type = NLA_U32 },
369         [TCA_FLOWER_KEY_ENC_IPV4_SRC]   = { .type = NLA_U32 },
370         [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
371         [TCA_FLOWER_KEY_ENC_IPV4_DST]   = { .type = NLA_U32 },
372         [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
373         [TCA_FLOWER_KEY_ENC_IPV6_SRC]   = { .len = sizeof(struct in6_addr) },
374         [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
375         [TCA_FLOWER_KEY_ENC_IPV6_DST]   = { .len = sizeof(struct in6_addr) },
376         [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
377         [TCA_FLOWER_KEY_TCP_SRC_MASK]   = { .type = NLA_U16 },
378         [TCA_FLOWER_KEY_TCP_DST_MASK]   = { .type = NLA_U16 },
379         [TCA_FLOWER_KEY_UDP_SRC_MASK]   = { .type = NLA_U16 },
380         [TCA_FLOWER_KEY_UDP_DST_MASK]   = { .type = NLA_U16 },
381         [TCA_FLOWER_KEY_SCTP_SRC_MASK]  = { .type = NLA_U16 },
382         [TCA_FLOWER_KEY_SCTP_DST_MASK]  = { .type = NLA_U16 },
383         [TCA_FLOWER_KEY_SCTP_SRC]       = { .type = NLA_U16 },
384         [TCA_FLOWER_KEY_SCTP_DST]       = { .type = NLA_U16 },
385         [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT]       = { .type = NLA_U16 },
386         [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]  = { .type = NLA_U16 },
387         [TCA_FLOWER_KEY_ENC_UDP_DST_PORT]       = { .type = NLA_U16 },
388         [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]  = { .type = NLA_U16 },
389 };
390
391 static void fl_set_key_val(struct nlattr **tb,
392                            void *val, int val_type,
393                            void *mask, int mask_type, int len)
394 {
395         if (!tb[val_type])
396                 return;
397         memcpy(val, nla_data(tb[val_type]), len);
398         if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
399                 memset(mask, 0xff, len);
400         else
401                 memcpy(mask, nla_data(tb[mask_type]), len);
402 }
403
404 static void fl_set_key_vlan(struct nlattr **tb,
405                             struct flow_dissector_key_vlan *key_val,
406                             struct flow_dissector_key_vlan *key_mask)
407 {
408 #define VLAN_PRIORITY_MASK      0x7
409
410         if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
411                 key_val->vlan_id =
412                         nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
413                 key_mask->vlan_id = VLAN_VID_MASK;
414         }
415         if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
416                 key_val->vlan_priority =
417                         nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
418                         VLAN_PRIORITY_MASK;
419                 key_mask->vlan_priority = VLAN_PRIORITY_MASK;
420         }
421 }
422
423 static int fl_set_key(struct net *net, struct nlattr **tb,
424                       struct fl_flow_key *key, struct fl_flow_key *mask)
425 {
426         __be16 ethertype;
427 #ifdef CONFIG_NET_CLS_IND
428         if (tb[TCA_FLOWER_INDEV]) {
429                 int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
430                 if (err < 0)
431                         return err;
432                 key->indev_ifindex = err;
433                 mask->indev_ifindex = 0xffffffff;
434         }
435 #endif
436
437         fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
438                        mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
439                        sizeof(key->eth.dst));
440         fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
441                        mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
442                        sizeof(key->eth.src));
443
444         if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
445                 ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
446
447                 if (ethertype == htons(ETH_P_8021Q)) {
448                         fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
449                         fl_set_key_val(tb, &key->basic.n_proto,
450                                        TCA_FLOWER_KEY_VLAN_ETH_TYPE,
451                                        &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
452                                        sizeof(key->basic.n_proto));
453                 } else {
454                         key->basic.n_proto = ethertype;
455                         mask->basic.n_proto = cpu_to_be16(~0);
456                 }
457         }
458
459         if (key->basic.n_proto == htons(ETH_P_IP) ||
460             key->basic.n_proto == htons(ETH_P_IPV6)) {
461                 fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
462                                &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
463                                sizeof(key->basic.ip_proto));
464         }
465
466         if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
467                 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
468                 fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
469                                &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
470                                sizeof(key->ipv4.src));
471                 fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
472                                &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
473                                sizeof(key->ipv4.dst));
474         } else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
475                 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
476                 fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
477                                &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
478                                sizeof(key->ipv6.src));
479                 fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
480                                &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
481                                sizeof(key->ipv6.dst));
482         }
483
484         if (key->basic.ip_proto == IPPROTO_TCP) {
485                 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
486                                &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
487                                sizeof(key->tp.src));
488                 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
489                                &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
490                                sizeof(key->tp.dst));
491         } else if (key->basic.ip_proto == IPPROTO_UDP) {
492                 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
493                                &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
494                                sizeof(key->tp.src));
495                 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
496                                &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
497                                sizeof(key->tp.dst));
498         } else if (key->basic.ip_proto == IPPROTO_SCTP) {
499                 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
500                                &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
501                                sizeof(key->tp.src));
502                 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
503                                &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
504                                sizeof(key->tp.dst));
505         }
506
507         if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
508             tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
509                 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
510                 fl_set_key_val(tb, &key->enc_ipv4.src,
511                                TCA_FLOWER_KEY_ENC_IPV4_SRC,
512                                &mask->enc_ipv4.src,
513                                TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
514                                sizeof(key->enc_ipv4.src));
515                 fl_set_key_val(tb, &key->enc_ipv4.dst,
516                                TCA_FLOWER_KEY_ENC_IPV4_DST,
517                                &mask->enc_ipv4.dst,
518                                TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
519                                sizeof(key->enc_ipv4.dst));
520         }
521
522         if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
523             tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
524                 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
525                 fl_set_key_val(tb, &key->enc_ipv6.src,
526                                TCA_FLOWER_KEY_ENC_IPV6_SRC,
527                                &mask->enc_ipv6.src,
528                                TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
529                                sizeof(key->enc_ipv6.src));
530                 fl_set_key_val(tb, &key->enc_ipv6.dst,
531                                TCA_FLOWER_KEY_ENC_IPV6_DST,
532                                &mask->enc_ipv6.dst,
533                                TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
534                                sizeof(key->enc_ipv6.dst));
535         }
536
537         fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
538                        &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
539                        sizeof(key->enc_key_id.keyid));
540
541         fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
542                        &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
543                        sizeof(key->enc_tp.src));
544
545         fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
546                        &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
547                        sizeof(key->enc_tp.dst));
548
549         return 0;
550 }
551
552 static bool fl_mask_eq(struct fl_flow_mask *mask1,
553                        struct fl_flow_mask *mask2)
554 {
555         const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
556         const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
557
558         return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
559                !memcmp(lmask1, lmask2, fl_mask_range(mask1));
560 }
561
562 static const struct rhashtable_params fl_ht_params = {
563         .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
564         .head_offset = offsetof(struct cls_fl_filter, ht_node),
565         .automatic_shrinking = true,
566 };
567
568 static int fl_init_hashtable(struct cls_fl_head *head,
569                              struct fl_flow_mask *mask)
570 {
571         head->ht_params = fl_ht_params;
572         head->ht_params.key_len = fl_mask_range(mask);
573         head->ht_params.key_offset += mask->range.start;
574
575         return rhashtable_init(&head->ht, &head->ht_params);
576 }
577
578 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
579 #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
580
581 #define FL_KEY_IS_MASKED(mask, member)                                          \
582         memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),               \
583                    0, FL_KEY_MEMBER_SIZE(member))                               \
584
585 #define FL_KEY_SET(keys, cnt, id, member)                                       \
586         do {                                                                    \
587                 keys[cnt].key_id = id;                                          \
588                 keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);                \
589                 cnt++;                                                          \
590         } while(0);
591
592 #define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member)                       \
593         do {                                                                    \
594                 if (FL_KEY_IS_MASKED(mask, member))                             \
595                         FL_KEY_SET(keys, cnt, id, member);                      \
596         } while(0);
597
598 static void fl_init_dissector(struct cls_fl_head *head,
599                               struct fl_flow_mask *mask)
600 {
601         struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
602         size_t cnt = 0;
603
604         FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
605         FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
606         FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
607                              FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
608         FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
609                              FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
610         FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
611                              FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
612         FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
613                              FLOW_DISSECTOR_KEY_PORTS, tp);
614         FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
615                              FLOW_DISSECTOR_KEY_VLAN, vlan);
616         FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
617                              FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
618         FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
619                              FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
620         FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
621                              FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
622         if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) ||
623             FL_KEY_IS_MASKED(&mask->key, enc_ipv6))
624                 FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
625                            enc_control);
626         FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
627                              FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
628
629         skb_flow_dissector_init(&head->dissector, keys, cnt);
630 }
631
632 static int fl_check_assign_mask(struct cls_fl_head *head,
633                                 struct fl_flow_mask *mask)
634 {
635         int err;
636
637         if (head->mask_assigned) {
638                 if (!fl_mask_eq(&head->mask, mask))
639                         return -EINVAL;
640                 else
641                         return 0;
642         }
643
644         /* Mask is not assigned yet. So assign it and init hashtable
645          * according to that.
646          */
647         err = fl_init_hashtable(head, mask);
648         if (err)
649                 return err;
650         memcpy(&head->mask, mask, sizeof(head->mask));
651         head->mask_assigned = true;
652
653         fl_init_dissector(head, mask);
654
655         return 0;
656 }
657
658 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
659                         struct cls_fl_filter *f, struct fl_flow_mask *mask,
660                         unsigned long base, struct nlattr **tb,
661                         struct nlattr *est, bool ovr)
662 {
663         struct tcf_exts e;
664         int err;
665
666         err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
667         if (err < 0)
668                 return err;
669         err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
670         if (err < 0)
671                 goto errout;
672
673         if (tb[TCA_FLOWER_CLASSID]) {
674                 f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
675                 tcf_bind_filter(tp, &f->res, base);
676         }
677
678         err = fl_set_key(net, tb, &f->key, &mask->key);
679         if (err)
680                 goto errout;
681
682         fl_mask_update_range(mask);
683         fl_set_masked_key(&f->mkey, &f->key, mask);
684
685         tcf_exts_change(tp, &f->exts, &e);
686
687         return 0;
688 errout:
689         tcf_exts_destroy(&e);
690         return err;
691 }
692
693 static u32 fl_grab_new_handle(struct tcf_proto *tp,
694                               struct cls_fl_head *head)
695 {
696         unsigned int i = 0x80000000;
697         u32 handle;
698
699         do {
700                 if (++head->hgen == 0x7FFFFFFF)
701                         head->hgen = 1;
702         } while (--i > 0 && fl_get(tp, head->hgen));
703
704         if (unlikely(i == 0)) {
705                 pr_err("Insufficient number of handles\n");
706                 handle = 0;
707         } else {
708                 handle = head->hgen;
709         }
710
711         return handle;
712 }
713
714 static int fl_change(struct net *net, struct sk_buff *in_skb,
715                      struct tcf_proto *tp, unsigned long base,
716                      u32 handle, struct nlattr **tca,
717                      unsigned long *arg, bool ovr)
718 {
719         struct cls_fl_head *head = rtnl_dereference(tp->root);
720         struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
721         struct cls_fl_filter *fnew;
722         struct nlattr *tb[TCA_FLOWER_MAX + 1];
723         struct fl_flow_mask mask = {};
724         int err;
725
726         if (!tca[TCA_OPTIONS])
727                 return -EINVAL;
728
729         err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
730         if (err < 0)
731                 return err;
732
733         if (fold && handle && fold->handle != handle)
734                 return -EINVAL;
735
736         fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
737         if (!fnew)
738                 return -ENOBUFS;
739
740         err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
741         if (err < 0)
742                 goto errout;
743
744         if (!handle) {
745                 handle = fl_grab_new_handle(tp, head);
746                 if (!handle) {
747                         err = -EINVAL;
748                         goto errout;
749                 }
750         }
751         fnew->handle = handle;
752
753         if (tb[TCA_FLOWER_FLAGS]) {
754                 fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
755
756                 if (!tc_flags_valid(fnew->flags)) {
757                         err = -EINVAL;
758                         goto errout;
759                 }
760         }
761
762         err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
763         if (err)
764                 goto errout;
765
766         err = fl_check_assign_mask(head, &mask);
767         if (err)
768                 goto errout;
769
770         if (!tc_skip_sw(fnew->flags)) {
771                 err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
772                                              head->ht_params);
773                 if (err)
774                         goto errout;
775         }
776
777         if (!tc_skip_hw(fnew->flags)) {
778                 err = fl_hw_replace_filter(tp,
779                                            &head->dissector,
780                                            &mask.key,
781                                            fnew);
782                 if (err)
783                         goto errout;
784         }
785
786         if (fold) {
787                 if (!tc_skip_sw(fold->flags))
788                         rhashtable_remove_fast(&head->ht, &fold->ht_node,
789                                                head->ht_params);
790                 if (!tc_skip_hw(fold->flags))
791                         fl_hw_destroy_filter(tp, fold);
792         }
793
794         *arg = (unsigned long) fnew;
795
796         if (fold) {
797                 list_replace_rcu(&fold->list, &fnew->list);
798                 tcf_unbind_filter(tp, &fold->res);
799                 call_rcu(&fold->rcu, fl_destroy_filter);
800         } else {
801                 list_add_tail_rcu(&fnew->list, &head->filters);
802         }
803
804         return 0;
805
806 errout:
807         tcf_exts_destroy(&fnew->exts);
808         kfree(fnew);
809         return err;
810 }
811
812 static int fl_delete(struct tcf_proto *tp, unsigned long arg)
813 {
814         struct cls_fl_head *head = rtnl_dereference(tp->root);
815         struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
816
817         if (!tc_skip_sw(f->flags))
818                 rhashtable_remove_fast(&head->ht, &f->ht_node,
819                                        head->ht_params);
820         __fl_delete(tp, f);
821         return 0;
822 }
823
824 static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
825 {
826         struct cls_fl_head *head = rtnl_dereference(tp->root);
827         struct cls_fl_filter *f;
828
829         list_for_each_entry_rcu(f, &head->filters, list) {
830                 if (arg->count < arg->skip)
831                         goto skip;
832                 if (arg->fn(tp, (unsigned long) f, arg) < 0) {
833                         arg->stop = 1;
834                         break;
835                 }
836 skip:
837                 arg->count++;
838         }
839 }
840
841 static int fl_dump_key_val(struct sk_buff *skb,
842                            void *val, int val_type,
843                            void *mask, int mask_type, int len)
844 {
845         int err;
846
847         if (!memchr_inv(mask, 0, len))
848                 return 0;
849         err = nla_put(skb, val_type, len, val);
850         if (err)
851                 return err;
852         if (mask_type != TCA_FLOWER_UNSPEC) {
853                 err = nla_put(skb, mask_type, len, mask);
854                 if (err)
855                         return err;
856         }
857         return 0;
858 }
859
860 static int fl_dump_key_vlan(struct sk_buff *skb,
861                             struct flow_dissector_key_vlan *vlan_key,
862                             struct flow_dissector_key_vlan *vlan_mask)
863 {
864         int err;
865
866         if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
867                 return 0;
868         if (vlan_mask->vlan_id) {
869                 err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
870                                   vlan_key->vlan_id);
871                 if (err)
872                         return err;
873         }
874         if (vlan_mask->vlan_priority) {
875                 err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
876                                  vlan_key->vlan_priority);
877                 if (err)
878                         return err;
879         }
880         return 0;
881 }
882
883 static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
884                    struct sk_buff *skb, struct tcmsg *t)
885 {
886         struct cls_fl_head *head = rtnl_dereference(tp->root);
887         struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
888         struct nlattr *nest;
889         struct fl_flow_key *key, *mask;
890
891         if (!f)
892                 return skb->len;
893
894         t->tcm_handle = f->handle;
895
896         nest = nla_nest_start(skb, TCA_OPTIONS);
897         if (!nest)
898                 goto nla_put_failure;
899
900         if (f->res.classid &&
901             nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
902                 goto nla_put_failure;
903
904         key = &f->key;
905         mask = &head->mask.key;
906
907         if (mask->indev_ifindex) {
908                 struct net_device *dev;
909
910                 dev = __dev_get_by_index(net, key->indev_ifindex);
911                 if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
912                         goto nla_put_failure;
913         }
914
915         if (!tc_skip_hw(f->flags))
916                 fl_hw_update_stats(tp, f);
917
918         if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
919                             mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
920                             sizeof(key->eth.dst)) ||
921             fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
922                             mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
923                             sizeof(key->eth.src)) ||
924             fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
925                             &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
926                             sizeof(key->basic.n_proto)))
927                 goto nla_put_failure;
928
929         if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
930                 goto nla_put_failure;
931
932         if ((key->basic.n_proto == htons(ETH_P_IP) ||
933              key->basic.n_proto == htons(ETH_P_IPV6)) &&
934             fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
935                             &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
936                             sizeof(key->basic.ip_proto)))
937                 goto nla_put_failure;
938
939         if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
940             (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
941                              &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
942                              sizeof(key->ipv4.src)) ||
943              fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
944                              &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
945                              sizeof(key->ipv4.dst))))
946                 goto nla_put_failure;
947         else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
948                  (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
949                                   &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
950                                   sizeof(key->ipv6.src)) ||
951                   fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
952                                   &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
953                                   sizeof(key->ipv6.dst))))
954                 goto nla_put_failure;
955
956         if (key->basic.ip_proto == IPPROTO_TCP &&
957             (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
958                              &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
959                              sizeof(key->tp.src)) ||
960              fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
961                              &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
962                              sizeof(key->tp.dst))))
963                 goto nla_put_failure;
964         else if (key->basic.ip_proto == IPPROTO_UDP &&
965                  (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
966                                   &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
967                                   sizeof(key->tp.src)) ||
968                   fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
969                                   &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
970                                   sizeof(key->tp.dst))))
971                 goto nla_put_failure;
972         else if (key->basic.ip_proto == IPPROTO_SCTP &&
973                  (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
974                                   &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
975                                   sizeof(key->tp.src)) ||
976                   fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
977                                   &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
978                                   sizeof(key->tp.dst))))
979                 goto nla_put_failure;
980
981         if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
982             (fl_dump_key_val(skb, &key->enc_ipv4.src,
983                             TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
984                             TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
985                             sizeof(key->enc_ipv4.src)) ||
986              fl_dump_key_val(skb, &key->enc_ipv4.dst,
987                              TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst,
988                              TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
989                              sizeof(key->enc_ipv4.dst))))
990                 goto nla_put_failure;
991         else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
992                  (fl_dump_key_val(skb, &key->enc_ipv6.src,
993                             TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src,
994                             TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
995                             sizeof(key->enc_ipv6.src)) ||
996                  fl_dump_key_val(skb, &key->enc_ipv6.dst,
997                                  TCA_FLOWER_KEY_ENC_IPV6_DST,
998                                  &mask->enc_ipv6.dst,
999                                  TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
1000                             sizeof(key->enc_ipv6.dst))))
1001                 goto nla_put_failure;
1002
1003         if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
1004                             &mask->enc_key_id, TCA_FLOWER_UNSPEC,
1005                             sizeof(key->enc_key_id)) ||
1006             fl_dump_key_val(skb, &key->enc_tp.src,
1007                             TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
1008                             &mask->enc_tp.src,
1009                             TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
1010                             sizeof(key->enc_tp.src)) ||
1011             fl_dump_key_val(skb, &key->enc_tp.dst,
1012                             TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
1013                             &mask->enc_tp.dst,
1014                             TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
1015                             sizeof(key->enc_tp.dst)))
1016                 goto nla_put_failure;
1017
1018         nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
1019
1020         if (tcf_exts_dump(skb, &f->exts))
1021                 goto nla_put_failure;
1022
1023         nla_nest_end(skb, nest);
1024
1025         if (tcf_exts_dump_stats(skb, &f->exts) < 0)
1026                 goto nla_put_failure;
1027
1028         return skb->len;
1029
1030 nla_put_failure:
1031         nla_nest_cancel(skb, nest);
1032         return -1;
1033 }
1034
1035 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
1036         .kind           = "flower",
1037         .classify       = fl_classify,
1038         .init           = fl_init,
1039         .destroy        = fl_destroy,
1040         .get            = fl_get,
1041         .change         = fl_change,
1042         .delete         = fl_delete,
1043         .walk           = fl_walk,
1044         .dump           = fl_dump,
1045         .owner          = THIS_MODULE,
1046 };
1047
1048 static int __init cls_fl_init(void)
1049 {
1050         return register_tcf_proto_ops(&cls_fl_ops);
1051 }
1052
1053 static void __exit cls_fl_exit(void)
1054 {
1055         unregister_tcf_proto_ops(&cls_fl_ops);
1056 }
1057
1058 module_init(cls_fl_init);
1059 module_exit(cls_fl_exit);
1060
1061 MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
1062 MODULE_DESCRIPTION("Flower classifier");
1063 MODULE_LICENSE("GPL v2");