1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Expectation handling for nf_conntrack. */
4 /* (C) 1999-2001 Paul `Rusty' Russell
5 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
6 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
7 * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
10 #include <linux/types.h>
11 #include <linux/netfilter.h>
12 #include <linux/skbuff.h>
13 #include <linux/proc_fs.h>
14 #include <linux/seq_file.h>
15 #include <linux/stddef.h>
16 #include <linux/slab.h>
17 #include <linux/err.h>
18 #include <linux/percpu.h>
19 #include <linux/kernel.h>
20 #include <linux/jhash.h>
21 #include <linux/moduleparam.h>
22 #include <linux/export.h>
23 #include <net/net_namespace.h>
24 #include <net/netns/hash.h>
26 #include <net/netfilter/nf_conntrack.h>
27 #include <net/netfilter/nf_conntrack_core.h>
28 #include <net/netfilter/nf_conntrack_expect.h>
29 #include <net/netfilter/nf_conntrack_helper.h>
30 #include <net/netfilter/nf_conntrack_tuple.h>
31 #include <net/netfilter/nf_conntrack_zones.h>
33 unsigned int nf_ct_expect_hsize __read_mostly;
34 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
36 struct hlist_head *nf_ct_expect_hash __read_mostly;
37 EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
39 unsigned int nf_ct_expect_max __read_mostly;
41 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
42 static unsigned int nf_ct_expect_hashrnd __read_mostly;
44 /* nf_conntrack_expect helper functions */
45 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
46 u32 portid, int report)
48 struct nf_conn_help *master_help = nfct_help(exp->master);
49 struct net *net = nf_ct_exp_net(exp);
51 WARN_ON(!master_help);
52 WARN_ON(timer_pending(&exp->timeout));
54 hlist_del_rcu(&exp->hnode);
55 net->ct.expect_count--;
57 hlist_del_rcu(&exp->lnode);
58 master_help->expecting[exp->class]--;
60 nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
61 nf_ct_expect_put(exp);
63 NF_CT_STAT_INC(net, expect_delete);
65 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
67 static void nf_ct_expectation_timed_out(struct timer_list *t)
69 struct nf_conntrack_expect *exp = from_timer(exp, t, timeout);
71 spin_lock_bh(&nf_conntrack_expect_lock);
72 nf_ct_unlink_expect(exp);
73 spin_unlock_bh(&nf_conntrack_expect_lock);
74 nf_ct_expect_put(exp);
77 static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
79 unsigned int hash, seed;
81 get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
83 seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
85 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
86 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
87 (__force __u16)tuple->dst.u.all) ^ seed);
89 return reciprocal_scale(hash, nf_ct_expect_hsize);
93 nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
94 const struct nf_conntrack_expect *i,
95 const struct nf_conntrack_zone *zone,
96 const struct net *net)
98 return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
99 net_eq(net, nf_ct_net(i->master)) &&
100 nf_ct_zone_equal_any(i->master, zone);
103 bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
105 if (del_timer(&exp->timeout)) {
106 nf_ct_unlink_expect(exp);
107 nf_ct_expect_put(exp);
112 EXPORT_SYMBOL_GPL(nf_ct_remove_expect);
114 struct nf_conntrack_expect *
115 __nf_ct_expect_find(struct net *net,
116 const struct nf_conntrack_zone *zone,
117 const struct nf_conntrack_tuple *tuple)
119 struct nf_conntrack_expect *i;
122 if (!net->ct.expect_count)
125 h = nf_ct_expect_dst_hash(net, tuple);
126 hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
127 if (nf_ct_exp_equal(tuple, i, zone, net))
132 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
134 /* Just find a expectation corresponding to a tuple. */
135 struct nf_conntrack_expect *
136 nf_ct_expect_find_get(struct net *net,
137 const struct nf_conntrack_zone *zone,
138 const struct nf_conntrack_tuple *tuple)
140 struct nf_conntrack_expect *i;
143 i = __nf_ct_expect_find(net, zone, tuple);
144 if (i && !refcount_inc_not_zero(&i->use))
150 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
152 /* If an expectation for this connection is found, it gets delete from
153 * global list then returned. */
154 struct nf_conntrack_expect *
155 nf_ct_find_expectation(struct net *net,
156 const struct nf_conntrack_zone *zone,
157 const struct nf_conntrack_tuple *tuple)
159 struct nf_conntrack_expect *i, *exp = NULL;
162 if (!net->ct.expect_count)
165 h = nf_ct_expect_dst_hash(net, tuple);
166 hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
167 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
168 nf_ct_exp_equal(tuple, i, zone, net)) {
176 /* If master is not in hash table yet (ie. packet hasn't left
177 this machine yet), how can other end know about expected?
178 Hence these are not the droids you are looking for (if
179 master ct never got confirmed, we'd hold a reference to it
180 and weird things would happen to future packets). */
181 if (!nf_ct_is_confirmed(exp->master))
184 /* Avoid race with other CPUs, that for exp->master ct, is
185 * about to invoke ->destroy(), or nf_ct_delete() via timeout
188 * The atomic_inc_not_zero() check tells: If that fails, we
189 * know that the ct is being destroyed. If it succeeds, we
190 * can be sure the ct cannot disappear underneath.
192 if (unlikely(nf_ct_is_dying(exp->master) ||
193 !atomic_inc_not_zero(&exp->master->ct_general.use)))
196 if (exp->flags & NF_CT_EXPECT_PERMANENT) {
197 refcount_inc(&exp->use);
199 } else if (del_timer(&exp->timeout)) {
200 nf_ct_unlink_expect(exp);
203 /* Undo exp->master refcnt increase, if del_timer() failed */
204 nf_ct_put(exp->master);
209 /* delete all expectations for this conntrack */
210 void nf_ct_remove_expectations(struct nf_conn *ct)
212 struct nf_conn_help *help = nfct_help(ct);
213 struct nf_conntrack_expect *exp;
214 struct hlist_node *next;
216 /* Optimization: most connection never expect any others. */
220 spin_lock_bh(&nf_conntrack_expect_lock);
221 hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
222 nf_ct_remove_expect(exp);
224 spin_unlock_bh(&nf_conntrack_expect_lock);
226 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
228 /* Would two expected things clash? */
229 static inline int expect_clash(const struct nf_conntrack_expect *a,
230 const struct nf_conntrack_expect *b)
232 /* Part covered by intersection of masks must be unequal,
233 otherwise they clash */
234 struct nf_conntrack_tuple_mask intersect_mask;
237 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
239 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
240 intersect_mask.src.u3.all[count] =
241 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
244 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
245 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
246 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
249 static inline int expect_matches(const struct nf_conntrack_expect *a,
250 const struct nf_conntrack_expect *b)
252 return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
253 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
254 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
255 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
258 static bool master_matches(const struct nf_conntrack_expect *a,
259 const struct nf_conntrack_expect *b,
262 if (flags & NF_CT_EXP_F_SKIP_MASTER)
265 return a->master == b->master;
268 /* Generally a bad idea to call this: could have matched already. */
269 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
271 spin_lock_bh(&nf_conntrack_expect_lock);
272 nf_ct_remove_expect(exp);
273 spin_unlock_bh(&nf_conntrack_expect_lock);
275 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
277 /* We don't increase the master conntrack refcount for non-fulfilled
278 * conntracks. During the conntrack destruction, the expectations are
279 * always killed before the conntrack itself */
280 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
282 struct nf_conntrack_expect *new;
284 new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
289 refcount_set(&new->use, 1);
292 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
294 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
296 const union nf_inet_addr *saddr,
297 const union nf_inet_addr *daddr,
298 u_int8_t proto, const __be16 *src, const __be16 *dst)
302 if (family == AF_INET)
309 exp->expectfn = NULL;
311 exp->tuple.src.l3num = family;
312 exp->tuple.dst.protonum = proto;
315 memcpy(&exp->tuple.src.u3, saddr, len);
316 if (sizeof(exp->tuple.src.u3) > len)
317 /* address needs to be cleared for nf_ct_tuple_equal */
318 memset((void *)&exp->tuple.src.u3 + len, 0x00,
319 sizeof(exp->tuple.src.u3) - len);
320 memset(&exp->mask.src.u3, 0xFF, len);
321 if (sizeof(exp->mask.src.u3) > len)
322 memset((void *)&exp->mask.src.u3 + len, 0x00,
323 sizeof(exp->mask.src.u3) - len);
325 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
326 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
330 exp->tuple.src.u.all = *src;
331 exp->mask.src.u.all = htons(0xFFFF);
333 exp->tuple.src.u.all = 0;
334 exp->mask.src.u.all = 0;
337 memcpy(&exp->tuple.dst.u3, daddr, len);
338 if (sizeof(exp->tuple.dst.u3) > len)
339 /* address needs to be cleared for nf_ct_tuple_equal */
340 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
341 sizeof(exp->tuple.dst.u3) - len);
343 exp->tuple.dst.u.all = *dst;
345 #if IS_ENABLED(CONFIG_NF_NAT)
346 memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
347 memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
350 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
352 static void nf_ct_expect_free_rcu(struct rcu_head *head)
354 struct nf_conntrack_expect *exp;
356 exp = container_of(head, struct nf_conntrack_expect, rcu);
357 kmem_cache_free(nf_ct_expect_cachep, exp);
360 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
362 if (refcount_dec_and_test(&exp->use))
363 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
365 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
367 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
369 struct nf_conn_help *master_help = nfct_help(exp->master);
370 struct nf_conntrack_helper *helper;
371 struct net *net = nf_ct_exp_net(exp);
372 unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
374 /* two references : one for hash insert, one for the timer */
375 refcount_add(2, &exp->use);
377 timer_setup(&exp->timeout, nf_ct_expectation_timed_out, 0);
378 helper = rcu_dereference_protected(master_help->helper,
379 lockdep_is_held(&nf_conntrack_expect_lock));
381 exp->timeout.expires = jiffies +
382 helper->expect_policy[exp->class].timeout * HZ;
384 add_timer(&exp->timeout);
386 hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
387 master_help->expecting[exp->class]++;
389 hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
390 net->ct.expect_count++;
392 NF_CT_STAT_INC(net, expect_create);
395 /* Race with expectations being used means we could have none to find; OK. */
396 static void evict_oldest_expect(struct nf_conn *master,
397 struct nf_conntrack_expect *new)
399 struct nf_conn_help *master_help = nfct_help(master);
400 struct nf_conntrack_expect *exp, *last = NULL;
402 hlist_for_each_entry(exp, &master_help->expectations, lnode) {
403 if (exp->class == new->class)
408 nf_ct_remove_expect(last);
411 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
414 const struct nf_conntrack_expect_policy *p;
415 struct nf_conntrack_expect *i;
416 struct nf_conn *master = expect->master;
417 struct nf_conn_help *master_help = nfct_help(master);
418 struct nf_conntrack_helper *helper;
419 struct net *net = nf_ct_exp_net(expect);
420 struct hlist_node *next;
428 h = nf_ct_expect_dst_hash(net, &expect->tuple);
429 hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
430 if (master_matches(i, expect, flags) &&
431 expect_matches(i, expect)) {
432 if (i->class != expect->class ||
433 i->master != expect->master)
436 if (nf_ct_remove_expect(i))
438 } else if (expect_clash(i, expect)) {
443 /* Will be over limit? */
444 helper = rcu_dereference_protected(master_help->helper,
445 lockdep_is_held(&nf_conntrack_expect_lock));
447 p = &helper->expect_policy[expect->class];
448 if (p->max_expected &&
449 master_help->expecting[expect->class] >= p->max_expected) {
450 evict_oldest_expect(master, expect);
451 if (master_help->expecting[expect->class]
452 >= p->max_expected) {
459 if (net->ct.expect_count >= nf_ct_expect_max) {
460 net_warn_ratelimited("nf_conntrack: expectation table full\n");
467 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
468 u32 portid, int report, unsigned int flags)
472 spin_lock_bh(&nf_conntrack_expect_lock);
473 ret = __nf_ct_expect_check(expect, flags);
477 nf_ct_expect_insert(expect);
479 spin_unlock_bh(&nf_conntrack_expect_lock);
480 nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
483 spin_unlock_bh(&nf_conntrack_expect_lock);
486 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
488 void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
491 struct nf_conntrack_expect *exp;
492 const struct hlist_node *next;
495 spin_lock_bh(&nf_conntrack_expect_lock);
497 for (i = 0; i < nf_ct_expect_hsize; i++) {
498 hlist_for_each_entry_safe(exp, next,
499 &nf_ct_expect_hash[i],
501 if (iter(exp, data) && del_timer(&exp->timeout)) {
502 nf_ct_unlink_expect(exp);
503 nf_ct_expect_put(exp);
508 spin_unlock_bh(&nf_conntrack_expect_lock);
510 EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
512 void nf_ct_expect_iterate_net(struct net *net,
513 bool (*iter)(struct nf_conntrack_expect *e, void *data),
515 u32 portid, int report)
517 struct nf_conntrack_expect *exp;
518 const struct hlist_node *next;
521 spin_lock_bh(&nf_conntrack_expect_lock);
523 for (i = 0; i < nf_ct_expect_hsize; i++) {
524 hlist_for_each_entry_safe(exp, next,
525 &nf_ct_expect_hash[i],
528 if (!net_eq(nf_ct_exp_net(exp), net))
531 if (iter(exp, data) && del_timer(&exp->timeout)) {
532 nf_ct_unlink_expect_report(exp, portid, report);
533 nf_ct_expect_put(exp);
538 spin_unlock_bh(&nf_conntrack_expect_lock);
540 EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
542 #ifdef CONFIG_NF_CONNTRACK_PROCFS
543 struct ct_expect_iter_state {
544 struct seq_net_private p;
548 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
550 struct ct_expect_iter_state *st = seq->private;
551 struct hlist_node *n;
553 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
554 n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
561 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
562 struct hlist_node *head)
564 struct ct_expect_iter_state *st = seq->private;
566 head = rcu_dereference(hlist_next_rcu(head));
567 while (head == NULL) {
568 if (++st->bucket >= nf_ct_expect_hsize)
570 head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
575 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
577 struct hlist_node *head = ct_expect_get_first(seq);
580 while (pos && (head = ct_expect_get_next(seq, head)))
582 return pos ? NULL : head;
585 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
589 return ct_expect_get_idx(seq, *pos);
592 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
595 return ct_expect_get_next(seq, v);
598 static void exp_seq_stop(struct seq_file *seq, void *v)
604 static int exp_seq_show(struct seq_file *s, void *v)
606 struct nf_conntrack_expect *expect;
607 struct nf_conntrack_helper *helper;
608 struct hlist_node *n = v;
611 expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
613 if (expect->timeout.function)
614 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
615 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
618 seq_printf(s, "l3proto = %u proto=%u ",
619 expect->tuple.src.l3num,
620 expect->tuple.dst.protonum);
621 print_tuple(s, &expect->tuple,
622 nf_ct_l4proto_find(expect->tuple.dst.protonum));
624 if (expect->flags & NF_CT_EXPECT_PERMANENT) {
625 seq_puts(s, "PERMANENT");
628 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
629 seq_printf(s, "%sINACTIVE", delim);
632 if (expect->flags & NF_CT_EXPECT_USERSPACE)
633 seq_printf(s, "%sUSERSPACE", delim);
635 helper = rcu_dereference(nfct_help(expect->master)->helper);
637 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
638 if (helper->expect_policy[expect->class].name[0])
640 helper->expect_policy[expect->class].name);
648 static const struct seq_operations exp_seq_ops = {
649 .start = exp_seq_start,
650 .next = exp_seq_next,
651 .stop = exp_seq_stop,
654 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
656 static int exp_proc_init(struct net *net)
658 #ifdef CONFIG_NF_CONNTRACK_PROCFS
659 struct proc_dir_entry *proc;
663 proc = proc_create_net("nf_conntrack_expect", 0440, net->proc_net,
664 &exp_seq_ops, sizeof(struct ct_expect_iter_state));
668 root_uid = make_kuid(net->user_ns, 0);
669 root_gid = make_kgid(net->user_ns, 0);
670 if (uid_valid(root_uid) && gid_valid(root_gid))
671 proc_set_user(proc, root_uid, root_gid);
672 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
676 static void exp_proc_remove(struct net *net)
678 #ifdef CONFIG_NF_CONNTRACK_PROCFS
679 remove_proc_entry("nf_conntrack_expect", net->proc_net);
680 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
683 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
685 int nf_conntrack_expect_pernet_init(struct net *net)
687 net->ct.expect_count = 0;
688 return exp_proc_init(net);
691 void nf_conntrack_expect_pernet_fini(struct net *net)
693 exp_proc_remove(net);
696 int nf_conntrack_expect_init(void)
698 if (!nf_ct_expect_hsize) {
699 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
700 if (!nf_ct_expect_hsize)
701 nf_ct_expect_hsize = 1;
703 nf_ct_expect_max = nf_ct_expect_hsize * 4;
704 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
705 sizeof(struct nf_conntrack_expect),
707 if (!nf_ct_expect_cachep)
710 nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
711 if (!nf_ct_expect_hash) {
712 kmem_cache_destroy(nf_ct_expect_cachep);
719 void nf_conntrack_expect_fini(void)
721 rcu_barrier(); /* Wait for call_rcu() before destroy */
722 kmem_cache_destroy(nf_ct_expect_cachep);
723 kvfree(nf_ct_expect_hash);