]> asedeno.scripts.mit.edu Git - linux.git/blob - net/netfilter/nf_flow_table_offload.c
Merge tag 'gvt-fixes-2020-02-12' of https://github.com/intel/gvt-linux into drm-intel...
[linux.git] / net / netfilter / nf_flow_table_offload.c
1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/netfilter/nf_flow_table.h>
10 #include <net/netfilter/nf_conntrack.h>
11 #include <net/netfilter/nf_conntrack_core.h>
12 #include <net/netfilter/nf_conntrack_tuple.h>
13
14 static struct work_struct nf_flow_offload_work;
15 static DEFINE_SPINLOCK(flow_offload_pending_list_lock);
16 static LIST_HEAD(flow_offload_pending_list);
17
18 struct flow_offload_work {
19         struct list_head        list;
20         enum flow_cls_command   cmd;
21         int                     priority;
22         struct nf_flowtable     *flowtable;
23         struct flow_offload     *flow;
24 };
25
26 struct nf_flow_key {
27         struct flow_dissector_key_control               control;
28         struct flow_dissector_key_basic                 basic;
29         union {
30                 struct flow_dissector_key_ipv4_addrs    ipv4;
31                 struct flow_dissector_key_ipv6_addrs    ipv6;
32         };
33         struct flow_dissector_key_tcp                   tcp;
34         struct flow_dissector_key_ports                 tp;
35 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
36
37 struct nf_flow_match {
38         struct flow_dissector   dissector;
39         struct nf_flow_key      key;
40         struct nf_flow_key      mask;
41 };
42
43 struct nf_flow_rule {
44         struct nf_flow_match    match;
45         struct flow_rule        *rule;
46 };
47
48 #define NF_FLOW_DISSECTOR(__match, __type, __field)     \
49         (__match)->dissector.offset[__type] =           \
50                 offsetof(struct nf_flow_key, __field)
51
52 static int nf_flow_rule_match(struct nf_flow_match *match,
53                               const struct flow_offload_tuple *tuple)
54 {
55         struct nf_flow_key *mask = &match->mask;
56         struct nf_flow_key *key = &match->key;
57
58         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
59         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
60         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
61         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
62         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
63         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
64
65         switch (tuple->l3proto) {
66         case AF_INET:
67                 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
68                 key->basic.n_proto = htons(ETH_P_IP);
69                 key->ipv4.src = tuple->src_v4.s_addr;
70                 mask->ipv4.src = 0xffffffff;
71                 key->ipv4.dst = tuple->dst_v4.s_addr;
72                 mask->ipv4.dst = 0xffffffff;
73                 break;
74        case AF_INET6:
75                 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
76                 key->basic.n_proto = htons(ETH_P_IPV6);
77                 key->ipv6.src = tuple->src_v6;
78                 memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
79                 key->ipv6.dst = tuple->dst_v6;
80                 memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
81                 break;
82         default:
83                 return -EOPNOTSUPP;
84         }
85         match->dissector.used_keys |= BIT(key->control.addr_type);
86         mask->basic.n_proto = 0xffff;
87
88         switch (tuple->l4proto) {
89         case IPPROTO_TCP:
90                 key->tcp.flags = 0;
91                 mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
92                 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
93                 break;
94         case IPPROTO_UDP:
95                 break;
96         default:
97                 return -EOPNOTSUPP;
98         }
99
100         key->basic.ip_proto = tuple->l4proto;
101         mask->basic.ip_proto = 0xff;
102
103         key->tp.src = tuple->src_port;
104         mask->tp.src = 0xffff;
105         key->tp.dst = tuple->dst_port;
106         mask->tp.dst = 0xffff;
107
108         match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL) |
109                                       BIT(FLOW_DISSECTOR_KEY_BASIC) |
110                                       BIT(FLOW_DISSECTOR_KEY_PORTS);
111         return 0;
112 }
113
114 static void flow_offload_mangle(struct flow_action_entry *entry,
115                                 enum flow_action_mangle_base htype, u32 offset,
116                                 const __be32 *value, const __be32 *mask)
117 {
118         entry->id = FLOW_ACTION_MANGLE;
119         entry->mangle.htype = htype;
120         entry->mangle.offset = offset;
121         memcpy(&entry->mangle.mask, mask, sizeof(u32));
122         memcpy(&entry->mangle.val, value, sizeof(u32));
123 }
124
125 static inline struct flow_action_entry *
126 flow_action_entry_next(struct nf_flow_rule *flow_rule)
127 {
128         int i = flow_rule->rule->action.num_entries++;
129
130         return &flow_rule->rule->action.entries[i];
131 }
132
133 static int flow_offload_eth_src(struct net *net,
134                                 const struct flow_offload *flow,
135                                 enum flow_offload_tuple_dir dir,
136                                 struct nf_flow_rule *flow_rule)
137 {
138         const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
139         struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
140         struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
141         struct net_device *dev;
142         u32 mask, val;
143         u16 val16;
144
145         dev = dev_get_by_index(net, tuple->iifidx);
146         if (!dev)
147                 return -ENOENT;
148
149         mask = ~0xffff0000;
150         memcpy(&val16, dev->dev_addr, 2);
151         val = val16 << 16;
152         flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
153                             &val, &mask);
154
155         mask = ~0xffffffff;
156         memcpy(&val, dev->dev_addr + 2, 4);
157         flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
158                             &val, &mask);
159         dev_put(dev);
160
161         return 0;
162 }
163
164 static int flow_offload_eth_dst(struct net *net,
165                                 const struct flow_offload *flow,
166                                 enum flow_offload_tuple_dir dir,
167                                 struct nf_flow_rule *flow_rule)
168 {
169         struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
170         struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
171         const void *daddr = &flow->tuplehash[!dir].tuple.src_v4;
172         const struct dst_entry *dst_cache;
173         unsigned char ha[ETH_ALEN];
174         struct neighbour *n;
175         u32 mask, val;
176         u8 nud_state;
177         u16 val16;
178
179         dst_cache = flow->tuplehash[dir].tuple.dst_cache;
180         n = dst_neigh_lookup(dst_cache, daddr);
181         if (!n)
182                 return -ENOENT;
183
184         read_lock_bh(&n->lock);
185         nud_state = n->nud_state;
186         ether_addr_copy(ha, n->ha);
187         read_unlock_bh(&n->lock);
188
189         if (!(nud_state & NUD_VALID)) {
190                 neigh_release(n);
191                 return -ENOENT;
192         }
193
194         mask = ~0xffffffff;
195         memcpy(&val, ha, 4);
196         flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
197                             &val, &mask);
198
199         mask = ~0x0000ffff;
200         memcpy(&val16, ha + 4, 2);
201         val = val16;
202         flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
203                             &val, &mask);
204         neigh_release(n);
205
206         return 0;
207 }
208
209 static void flow_offload_ipv4_snat(struct net *net,
210                                    const struct flow_offload *flow,
211                                    enum flow_offload_tuple_dir dir,
212                                    struct nf_flow_rule *flow_rule)
213 {
214         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
215         u32 mask = ~htonl(0xffffffff);
216         __be32 addr;
217         u32 offset;
218
219         switch (dir) {
220         case FLOW_OFFLOAD_DIR_ORIGINAL:
221                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
222                 offset = offsetof(struct iphdr, saddr);
223                 break;
224         case FLOW_OFFLOAD_DIR_REPLY:
225                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
226                 offset = offsetof(struct iphdr, daddr);
227                 break;
228         default:
229                 return;
230         }
231
232         flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
233                             &addr, &mask);
234 }
235
236 static void flow_offload_ipv4_dnat(struct net *net,
237                                    const struct flow_offload *flow,
238                                    enum flow_offload_tuple_dir dir,
239                                    struct nf_flow_rule *flow_rule)
240 {
241         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
242         u32 mask = ~htonl(0xffffffff);
243         __be32 addr;
244         u32 offset;
245
246         switch (dir) {
247         case FLOW_OFFLOAD_DIR_ORIGINAL:
248                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
249                 offset = offsetof(struct iphdr, daddr);
250                 break;
251         case FLOW_OFFLOAD_DIR_REPLY:
252                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
253                 offset = offsetof(struct iphdr, saddr);
254                 break;
255         default:
256                 return;
257         }
258
259         flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
260                             &addr, &mask);
261 }
262
263 static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
264                                      unsigned int offset,
265                                      const __be32 *addr, const __be32 *mask)
266 {
267         struct flow_action_entry *entry;
268         int i;
269
270         for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) {
271                 entry = flow_action_entry_next(flow_rule);
272                 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
273                                     offset + i, &addr[i], mask);
274         }
275 }
276
277 static void flow_offload_ipv6_snat(struct net *net,
278                                    const struct flow_offload *flow,
279                                    enum flow_offload_tuple_dir dir,
280                                    struct nf_flow_rule *flow_rule)
281 {
282         u32 mask = ~htonl(0xffffffff);
283         const __be32 *addr;
284         u32 offset;
285
286         switch (dir) {
287         case FLOW_OFFLOAD_DIR_ORIGINAL:
288                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
289                 offset = offsetof(struct ipv6hdr, saddr);
290                 break;
291         case FLOW_OFFLOAD_DIR_REPLY:
292                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
293                 offset = offsetof(struct ipv6hdr, daddr);
294                 break;
295         default:
296                 return;
297         }
298
299         flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
300 }
301
302 static void flow_offload_ipv6_dnat(struct net *net,
303                                    const struct flow_offload *flow,
304                                    enum flow_offload_tuple_dir dir,
305                                    struct nf_flow_rule *flow_rule)
306 {
307         u32 mask = ~htonl(0xffffffff);
308         const __be32 *addr;
309         u32 offset;
310
311         switch (dir) {
312         case FLOW_OFFLOAD_DIR_ORIGINAL:
313                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
314                 offset = offsetof(struct ipv6hdr, daddr);
315                 break;
316         case FLOW_OFFLOAD_DIR_REPLY:
317                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
318                 offset = offsetof(struct ipv6hdr, saddr);
319                 break;
320         default:
321                 return;
322         }
323
324         flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
325 }
326
327 static int flow_offload_l4proto(const struct flow_offload *flow)
328 {
329         u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
330         u8 type = 0;
331
332         switch (protonum) {
333         case IPPROTO_TCP:
334                 type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
335                 break;
336         case IPPROTO_UDP:
337                 type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
338                 break;
339         default:
340                 break;
341         }
342
343         return type;
344 }
345
346 static void flow_offload_port_snat(struct net *net,
347                                    const struct flow_offload *flow,
348                                    enum flow_offload_tuple_dir dir,
349                                    struct nf_flow_rule *flow_rule)
350 {
351         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
352         u32 mask, port;
353         u32 offset;
354
355         switch (dir) {
356         case FLOW_OFFLOAD_DIR_ORIGINAL:
357                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
358                 offset = 0; /* offsetof(struct tcphdr, source); */
359                 port = htonl(port << 16);
360                 mask = ~htonl(0xffff0000);
361                 break;
362         case FLOW_OFFLOAD_DIR_REPLY:
363                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
364                 offset = 0; /* offsetof(struct tcphdr, dest); */
365                 port = htonl(port);
366                 mask = ~htonl(0xffff);
367                 break;
368         default:
369                 return;
370         }
371
372         flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
373                             &port, &mask);
374 }
375
376 static void flow_offload_port_dnat(struct net *net,
377                                    const struct flow_offload *flow,
378                                    enum flow_offload_tuple_dir dir,
379                                    struct nf_flow_rule *flow_rule)
380 {
381         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
382         u32 mask, port;
383         u32 offset;
384
385         switch (dir) {
386         case FLOW_OFFLOAD_DIR_ORIGINAL:
387                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
388                 offset = 0; /* offsetof(struct tcphdr, dest); */
389                 port = htonl(port);
390                 mask = ~htonl(0xffff);
391                 break;
392         case FLOW_OFFLOAD_DIR_REPLY:
393                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
394                 offset = 0; /* offsetof(struct tcphdr, source); */
395                 port = htonl(port << 16);
396                 mask = ~htonl(0xffff0000);
397                 break;
398         default:
399                 return;
400         }
401
402         flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
403                             &port, &mask);
404 }
405
406 static void flow_offload_ipv4_checksum(struct net *net,
407                                        const struct flow_offload *flow,
408                                        struct nf_flow_rule *flow_rule)
409 {
410         u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
411         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
412
413         entry->id = FLOW_ACTION_CSUM;
414         entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
415
416         switch (protonum) {
417         case IPPROTO_TCP:
418                 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
419                 break;
420         case IPPROTO_UDP:
421                 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
422                 break;
423         }
424 }
425
426 static void flow_offload_redirect(const struct flow_offload *flow,
427                                   enum flow_offload_tuple_dir dir,
428                                   struct nf_flow_rule *flow_rule)
429 {
430         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
431         struct rtable *rt;
432
433         rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
434         entry->id = FLOW_ACTION_REDIRECT;
435         entry->dev = rt->dst.dev;
436         dev_hold(rt->dst.dev);
437 }
438
439 int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
440                             enum flow_offload_tuple_dir dir,
441                             struct nf_flow_rule *flow_rule)
442 {
443         if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
444             flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
445                 return -1;
446
447         if (flow->flags & FLOW_OFFLOAD_SNAT) {
448                 flow_offload_ipv4_snat(net, flow, dir, flow_rule);
449                 flow_offload_port_snat(net, flow, dir, flow_rule);
450         }
451         if (flow->flags & FLOW_OFFLOAD_DNAT) {
452                 flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
453                 flow_offload_port_dnat(net, flow, dir, flow_rule);
454         }
455         if (flow->flags & FLOW_OFFLOAD_SNAT ||
456             flow->flags & FLOW_OFFLOAD_DNAT)
457                 flow_offload_ipv4_checksum(net, flow, flow_rule);
458
459         flow_offload_redirect(flow, dir, flow_rule);
460
461         return 0;
462 }
463 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
464
465 int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
466                             enum flow_offload_tuple_dir dir,
467                             struct nf_flow_rule *flow_rule)
468 {
469         if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
470             flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
471                 return -1;
472
473         if (flow->flags & FLOW_OFFLOAD_SNAT) {
474                 flow_offload_ipv6_snat(net, flow, dir, flow_rule);
475                 flow_offload_port_snat(net, flow, dir, flow_rule);
476         }
477         if (flow->flags & FLOW_OFFLOAD_DNAT) {
478                 flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
479                 flow_offload_port_dnat(net, flow, dir, flow_rule);
480         }
481
482         flow_offload_redirect(flow, dir, flow_rule);
483
484         return 0;
485 }
486 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
487
488 #define NF_FLOW_RULE_ACTION_MAX 16
489
490 static struct nf_flow_rule *
491 nf_flow_offload_rule_alloc(struct net *net,
492                            const struct flow_offload_work *offload,
493                            enum flow_offload_tuple_dir dir)
494 {
495         const struct nf_flowtable *flowtable = offload->flowtable;
496         const struct flow_offload *flow = offload->flow;
497         const struct flow_offload_tuple *tuple;
498         struct nf_flow_rule *flow_rule;
499         int err = -ENOMEM;
500
501         flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
502         if (!flow_rule)
503                 goto err_flow;
504
505         flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
506         if (!flow_rule->rule)
507                 goto err_flow_rule;
508
509         flow_rule->rule->match.dissector = &flow_rule->match.dissector;
510         flow_rule->rule->match.mask = &flow_rule->match.mask;
511         flow_rule->rule->match.key = &flow_rule->match.key;
512
513         tuple = &flow->tuplehash[dir].tuple;
514         err = nf_flow_rule_match(&flow_rule->match, tuple);
515         if (err < 0)
516                 goto err_flow_match;
517
518         flow_rule->rule->action.num_entries = 0;
519         if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
520                 goto err_flow_match;
521
522         return flow_rule;
523
524 err_flow_match:
525         kfree(flow_rule->rule);
526 err_flow_rule:
527         kfree(flow_rule);
528 err_flow:
529         return NULL;
530 }
531
532 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
533 {
534         struct flow_action_entry *entry;
535         int i;
536
537         for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
538                 entry = &flow_rule->rule->action.entries[i];
539                 if (entry->id != FLOW_ACTION_REDIRECT)
540                         continue;
541
542                 dev_put(entry->dev);
543         }
544         kfree(flow_rule->rule);
545         kfree(flow_rule);
546 }
547
548 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
549 {
550         int i;
551
552         for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
553                 __nf_flow_offload_destroy(flow_rule[i]);
554 }
555
556 static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
557                                  struct nf_flow_rule *flow_rule[])
558 {
559         struct net *net = read_pnet(&offload->flowtable->net);
560
561         flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
562                                                   FLOW_OFFLOAD_DIR_ORIGINAL);
563         if (!flow_rule[0])
564                 return -ENOMEM;
565
566         flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
567                                                   FLOW_OFFLOAD_DIR_REPLY);
568         if (!flow_rule[1]) {
569                 __nf_flow_offload_destroy(flow_rule[0]);
570                 return -ENOMEM;
571         }
572
573         return 0;
574 }
575
576 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
577                                  __be16 proto, int priority,
578                                  enum flow_cls_command cmd,
579                                  const struct flow_offload_tuple *tuple,
580                                  struct netlink_ext_ack *extack)
581 {
582         cls_flow->common.protocol = proto;
583         cls_flow->common.prio = priority;
584         cls_flow->common.extack = extack;
585         cls_flow->command = cmd;
586         cls_flow->cookie = (unsigned long)tuple;
587 }
588
589 static int flow_offload_tuple_add(struct flow_offload_work *offload,
590                                   struct nf_flow_rule *flow_rule,
591                                   enum flow_offload_tuple_dir dir)
592 {
593         struct nf_flowtable *flowtable = offload->flowtable;
594         struct flow_cls_offload cls_flow = {};
595         struct flow_block_cb *block_cb;
596         struct netlink_ext_ack extack;
597         __be16 proto = ETH_P_ALL;
598         int err, i = 0;
599
600         nf_flow_offload_init(&cls_flow, proto, offload->priority,
601                              FLOW_CLS_REPLACE,
602                              &offload->flow->tuplehash[dir].tuple, &extack);
603         cls_flow.rule = flow_rule->rule;
604
605         list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) {
606                 err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
607                                    block_cb->cb_priv);
608                 if (err < 0)
609                         continue;
610
611                 i++;
612         }
613
614         return i;
615 }
616
617 static void flow_offload_tuple_del(struct flow_offload_work *offload,
618                                    enum flow_offload_tuple_dir dir)
619 {
620         struct nf_flowtable *flowtable = offload->flowtable;
621         struct flow_cls_offload cls_flow = {};
622         struct flow_block_cb *block_cb;
623         struct netlink_ext_ack extack;
624         __be16 proto = ETH_P_ALL;
625
626         nf_flow_offload_init(&cls_flow, proto, offload->priority,
627                              FLOW_CLS_DESTROY,
628                              &offload->flow->tuplehash[dir].tuple, &extack);
629
630         list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
631                 block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv);
632
633         offload->flow->flags |= FLOW_OFFLOAD_HW_DEAD;
634 }
635
636 static int flow_offload_rule_add(struct flow_offload_work *offload,
637                                  struct nf_flow_rule *flow_rule[])
638 {
639         int ok_count = 0;
640
641         ok_count += flow_offload_tuple_add(offload, flow_rule[0],
642                                            FLOW_OFFLOAD_DIR_ORIGINAL);
643         ok_count += flow_offload_tuple_add(offload, flow_rule[1],
644                                            FLOW_OFFLOAD_DIR_REPLY);
645         if (ok_count == 0)
646                 return -ENOENT;
647
648         return 0;
649 }
650
651 static int flow_offload_work_add(struct flow_offload_work *offload)
652 {
653         struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
654         int err;
655
656         err = nf_flow_offload_alloc(offload, flow_rule);
657         if (err < 0)
658                 return -ENOMEM;
659
660         err = flow_offload_rule_add(offload, flow_rule);
661
662         nf_flow_offload_destroy(flow_rule);
663
664         return err;
665 }
666
667 static void flow_offload_work_del(struct flow_offload_work *offload)
668 {
669         flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
670         flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
671 }
672
673 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
674                                      enum flow_offload_tuple_dir dir,
675                                      struct flow_stats *stats)
676 {
677         struct nf_flowtable *flowtable = offload->flowtable;
678         struct flow_cls_offload cls_flow = {};
679         struct flow_block_cb *block_cb;
680         struct netlink_ext_ack extack;
681         __be16 proto = ETH_P_ALL;
682
683         nf_flow_offload_init(&cls_flow, proto, offload->priority,
684                              FLOW_CLS_STATS,
685                              &offload->flow->tuplehash[dir].tuple, &extack);
686
687         list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
688                 block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv);
689         memcpy(stats, &cls_flow.stats, sizeof(*stats));
690 }
691
692 static void flow_offload_work_stats(struct flow_offload_work *offload)
693 {
694         struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
695         u64 lastused;
696
697         flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
698         flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
699
700         lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
701         offload->flow->timeout = max_t(u64, offload->flow->timeout,
702                                        lastused + NF_FLOW_TIMEOUT);
703 }
704
705 static void flow_offload_work_handler(struct work_struct *work)
706 {
707         struct flow_offload_work *offload, *next;
708         LIST_HEAD(offload_pending_list);
709         int ret;
710
711         spin_lock_bh(&flow_offload_pending_list_lock);
712         list_replace_init(&flow_offload_pending_list, &offload_pending_list);
713         spin_unlock_bh(&flow_offload_pending_list_lock);
714
715         list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
716                 switch (offload->cmd) {
717                 case FLOW_CLS_REPLACE:
718                         ret = flow_offload_work_add(offload);
719                         if (ret < 0)
720                                 offload->flow->flags &= ~FLOW_OFFLOAD_HW;
721                         break;
722                 case FLOW_CLS_DESTROY:
723                         flow_offload_work_del(offload);
724                         break;
725                 case FLOW_CLS_STATS:
726                         flow_offload_work_stats(offload);
727                         break;
728                 default:
729                         WARN_ON_ONCE(1);
730                 }
731                 list_del(&offload->list);
732                 kfree(offload);
733         }
734 }
735
736 static void flow_offload_queue_work(struct flow_offload_work *offload)
737 {
738         spin_lock_bh(&flow_offload_pending_list_lock);
739         list_add_tail(&offload->list, &flow_offload_pending_list);
740         spin_unlock_bh(&flow_offload_pending_list_lock);
741
742         schedule_work(&nf_flow_offload_work);
743 }
744
745 void nf_flow_offload_add(struct nf_flowtable *flowtable,
746                          struct flow_offload *flow)
747 {
748         struct flow_offload_work *offload;
749
750         offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
751         if (!offload)
752                 return;
753
754         offload->cmd = FLOW_CLS_REPLACE;
755         offload->flow = flow;
756         offload->priority = flowtable->priority;
757         offload->flowtable = flowtable;
758         flow->flags |= FLOW_OFFLOAD_HW;
759
760         flow_offload_queue_work(offload);
761 }
762
763 void nf_flow_offload_del(struct nf_flowtable *flowtable,
764                          struct flow_offload *flow)
765 {
766         struct flow_offload_work *offload;
767
768         offload = kzalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
769         if (!offload)
770                 return;
771
772         offload->cmd = FLOW_CLS_DESTROY;
773         offload->flow = flow;
774         offload->flow->flags |= FLOW_OFFLOAD_HW_DYING;
775         offload->flowtable = flowtable;
776
777         flow_offload_queue_work(offload);
778 }
779
780 void nf_flow_offload_stats(struct nf_flowtable *flowtable,
781                            struct flow_offload *flow)
782 {
783         struct flow_offload_work *offload;
784         __s32 delta;
785
786         delta = nf_flow_timeout_delta(flow->timeout);
787         if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10) ||
788             flow->flags & FLOW_OFFLOAD_HW_DYING)
789                 return;
790
791         offload = kzalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
792         if (!offload)
793                 return;
794
795         offload->cmd = FLOW_CLS_STATS;
796         offload->flow = flow;
797         offload->flowtable = flowtable;
798
799         flow_offload_queue_work(offload);
800 }
801
802 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
803 {
804         if (flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD)
805                 flush_work(&nf_flow_offload_work);
806 }
807
808 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
809                                      struct flow_block_offload *bo,
810                                      enum flow_block_command cmd)
811 {
812         struct flow_block_cb *block_cb, *next;
813         int err = 0;
814
815         switch (cmd) {
816         case FLOW_BLOCK_BIND:
817                 list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
818                 break;
819         case FLOW_BLOCK_UNBIND:
820                 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
821                         list_del(&block_cb->list);
822                         flow_block_cb_free(block_cb);
823                 }
824                 break;
825         default:
826                 WARN_ON_ONCE(1);
827                 err = -EOPNOTSUPP;
828         }
829
830         return err;
831 }
832
833 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
834                                 struct net_device *dev,
835                                 enum flow_block_command cmd)
836 {
837         struct netlink_ext_ack extack = {};
838         struct flow_block_offload bo = {};
839         int err;
840
841         if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD))
842                 return 0;
843
844         if (!dev->netdev_ops->ndo_setup_tc)
845                 return -EOPNOTSUPP;
846
847         bo.net          = dev_net(dev);
848         bo.block        = &flowtable->flow_block;
849         bo.command      = cmd;
850         bo.binder_type  = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
851         bo.extack       = &extack;
852         INIT_LIST_HEAD(&bo.cb_list);
853
854         err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, &bo);
855         if (err < 0)
856                 return err;
857
858         return nf_flow_table_block_setup(flowtable, &bo, cmd);
859 }
860 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
861
862 int nf_flow_table_offload_init(void)
863 {
864         INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler);
865
866         return 0;
867 }
868
869 void nf_flow_table_offload_exit(void)
870 {
871         struct flow_offload_work *offload, *next;
872         LIST_HEAD(offload_pending_list);
873
874         cancel_work_sync(&nf_flow_offload_work);
875
876         list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
877                 list_del(&offload->list);
878                 kfree(offload);
879         }
880 }