]> asedeno.scripts.mit.edu Git - linux.git/blob - kernel/audit_tree.c
audit: Guarantee forward progress of chunk untagging
[linux.git] / kernel / audit_tree.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "audit.h"
3 #include <linux/fsnotify_backend.h>
4 #include <linux/namei.h>
5 #include <linux/mount.h>
6 #include <linux/kthread.h>
7 #include <linux/refcount.h>
8 #include <linux/slab.h>
9
10 struct audit_tree;
11 struct audit_chunk;
12
13 struct audit_tree {
14         refcount_t count;
15         int goner;
16         struct audit_chunk *root;
17         struct list_head chunks;
18         struct list_head rules;
19         struct list_head list;
20         struct list_head same_root;
21         struct rcu_head head;
22         char pathname[];
23 };
24
25 struct audit_chunk {
26         struct list_head hash;
27         unsigned long key;
28         struct fsnotify_mark *mark;
29         struct list_head trees;         /* with root here */
30         int dead;
31         int count;
32         atomic_long_t refs;
33         struct rcu_head head;
34         struct node {
35                 struct list_head list;
36                 struct audit_tree *owner;
37                 unsigned index;         /* index; upper bit indicates 'will prune' */
38         } owners[];
39 };
40
41 struct audit_tree_mark {
42         struct fsnotify_mark mark;
43         struct audit_chunk *chunk;
44 };
45
46 static LIST_HEAD(tree_list);
47 static LIST_HEAD(prune_list);
48 static struct task_struct *prune_thread;
49
50 /*
51  * One struct chunk is attached to each inode of interest.
52  * We replace struct chunk on tagging/untagging.
53  * Rules have pointer to struct audit_tree.
54  * Rules have struct list_head rlist forming a list of rules over
55  * the same tree.
56  * References to struct chunk are collected at audit_inode{,_child}()
57  * time and used in AUDIT_TREE rule matching.
58  * These references are dropped at the same time we are calling
59  * audit_free_names(), etc.
60  *
61  * Cyclic lists galore:
62  * tree.chunks anchors chunk.owners[].list                      hash_lock
63  * tree.rules anchors rule.rlist                                audit_filter_mutex
64  * chunk.trees anchors tree.same_root                           hash_lock
65  * chunk.hash is a hash with middle bits of watch.inode as
66  * a hash function.                                             RCU, hash_lock
67  *
68  * tree is refcounted; one reference for "some rules on rules_list refer to
69  * it", one for each chunk with pointer to it.
70  *
71  * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
72  * of watch contributes 1 to .refs).
73  *
74  * node.index allows to get from node.list to containing chunk.
75  * MSB of that sucker is stolen to mark taggings that we might have to
76  * revert - several operations have very unpleasant cleanup logics and
77  * that makes a difference.  Some.
78  */
79
80 static struct fsnotify_group *audit_tree_group;
81 static struct kmem_cache *audit_tree_mark_cachep __read_mostly;
82
83 static struct audit_tree *alloc_tree(const char *s)
84 {
85         struct audit_tree *tree;
86
87         tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL);
88         if (tree) {
89                 refcount_set(&tree->count, 1);
90                 tree->goner = 0;
91                 INIT_LIST_HEAD(&tree->chunks);
92                 INIT_LIST_HEAD(&tree->rules);
93                 INIT_LIST_HEAD(&tree->list);
94                 INIT_LIST_HEAD(&tree->same_root);
95                 tree->root = NULL;
96                 strcpy(tree->pathname, s);
97         }
98         return tree;
99 }
100
101 static inline void get_tree(struct audit_tree *tree)
102 {
103         refcount_inc(&tree->count);
104 }
105
106 static inline void put_tree(struct audit_tree *tree)
107 {
108         if (refcount_dec_and_test(&tree->count))
109                 kfree_rcu(tree, head);
110 }
111
112 /* to avoid bringing the entire thing in audit.h */
113 const char *audit_tree_path(struct audit_tree *tree)
114 {
115         return tree->pathname;
116 }
117
118 static void free_chunk(struct audit_chunk *chunk)
119 {
120         int i;
121
122         for (i = 0; i < chunk->count; i++) {
123                 if (chunk->owners[i].owner)
124                         put_tree(chunk->owners[i].owner);
125         }
126         kfree(chunk);
127 }
128
129 void audit_put_chunk(struct audit_chunk *chunk)
130 {
131         if (atomic_long_dec_and_test(&chunk->refs))
132                 free_chunk(chunk);
133 }
134
135 static void __put_chunk(struct rcu_head *rcu)
136 {
137         struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
138         audit_put_chunk(chunk);
139 }
140
141 /*
142  * Drop reference to the chunk that was held by the mark. This is the reference
143  * that gets dropped after we've removed the chunk from the hash table and we
144  * use it to make sure chunk cannot be freed before RCU grace period expires.
145  */
146 static void audit_mark_put_chunk(struct audit_chunk *chunk)
147 {
148         call_rcu(&chunk->head, __put_chunk);
149 }
150
151 static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *entry)
152 {
153         return container_of(entry, struct audit_tree_mark, mark);
154 }
155
156 static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
157 {
158         return audit_mark(mark)->chunk;
159 }
160
161 static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
162 {
163         struct audit_chunk *chunk = mark_chunk(entry);
164         audit_mark_put_chunk(chunk);
165         kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
166 }
167
168 static struct fsnotify_mark *alloc_mark(void)
169 {
170         struct audit_tree_mark *amark;
171
172         amark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
173         if (!amark)
174                 return NULL;
175         fsnotify_init_mark(&amark->mark, audit_tree_group);
176         amark->mark.mask = FS_IN_IGNORED;
177         return &amark->mark;
178 }
179
180 static struct audit_chunk *alloc_chunk(int count)
181 {
182         struct audit_chunk *chunk;
183         size_t size;
184         int i;
185
186         size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
187         chunk = kzalloc(size, GFP_KERNEL);
188         if (!chunk)
189                 return NULL;
190
191         chunk->mark = alloc_mark();
192         if (!chunk->mark) {
193                 kfree(chunk);
194                 return NULL;
195         }
196         audit_mark(chunk->mark)->chunk = chunk;
197
198         INIT_LIST_HEAD(&chunk->hash);
199         INIT_LIST_HEAD(&chunk->trees);
200         chunk->count = count;
201         atomic_long_set(&chunk->refs, 1);
202         for (i = 0; i < count; i++) {
203                 INIT_LIST_HEAD(&chunk->owners[i].list);
204                 chunk->owners[i].index = i;
205         }
206         return chunk;
207 }
208
209 enum {HASH_SIZE = 128};
210 static struct list_head chunk_hash_heads[HASH_SIZE];
211 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
212
213 /* Function to return search key in our hash from inode. */
214 static unsigned long inode_to_key(const struct inode *inode)
215 {
216         /* Use address pointed to by connector->obj as the key */
217         return (unsigned long)&inode->i_fsnotify_marks;
218 }
219
220 static inline struct list_head *chunk_hash(unsigned long key)
221 {
222         unsigned long n = key / L1_CACHE_BYTES;
223         return chunk_hash_heads + n % HASH_SIZE;
224 }
225
226 /* hash_lock & entry->group->mark_mutex is held by caller */
227 static void insert_hash(struct audit_chunk *chunk)
228 {
229         struct list_head *list;
230
231         /*
232          * Make sure chunk is fully initialized before making it visible in the
233          * hash. Pairs with a data dependency barrier in READ_ONCE() in
234          * audit_tree_lookup().
235          */
236         smp_wmb();
237         WARN_ON_ONCE(!chunk->key);
238         list = chunk_hash(chunk->key);
239         list_add_rcu(&chunk->hash, list);
240 }
241
242 /* called under rcu_read_lock */
243 struct audit_chunk *audit_tree_lookup(const struct inode *inode)
244 {
245         unsigned long key = inode_to_key(inode);
246         struct list_head *list = chunk_hash(key);
247         struct audit_chunk *p;
248
249         list_for_each_entry_rcu(p, list, hash) {
250                 /*
251                  * We use a data dependency barrier in READ_ONCE() to make sure
252                  * the chunk we see is fully initialized.
253                  */
254                 if (READ_ONCE(p->key) == key) {
255                         atomic_long_inc(&p->refs);
256                         return p;
257                 }
258         }
259         return NULL;
260 }
261
262 bool audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
263 {
264         int n;
265         for (n = 0; n < chunk->count; n++)
266                 if (chunk->owners[n].owner == tree)
267                         return true;
268         return false;
269 }
270
271 /* tagging and untagging inodes with trees */
272
273 static struct audit_chunk *find_chunk(struct node *p)
274 {
275         int index = p->index & ~(1U<<31);
276         p -= index;
277         return container_of(p, struct audit_chunk, owners[0]);
278 }
279
280 static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
281                           struct node *skip)
282 {
283         struct audit_tree *owner;
284         int i, j;
285
286         new->key = old->key;
287         list_splice_init(&old->trees, &new->trees);
288         list_for_each_entry(owner, &new->trees, same_root)
289                 owner->root = new;
290         for (i = j = 0; j < old->count; i++, j++) {
291                 if (&old->owners[j] == skip) {
292                         i--;
293                         continue;
294                 }
295                 owner = old->owners[j].owner;
296                 new->owners[i].owner = owner;
297                 new->owners[i].index = old->owners[j].index - j + i;
298                 if (!owner) /* result of earlier fallback */
299                         continue;
300                 get_tree(owner);
301                 list_replace_init(&old->owners[j].list, &new->owners[i].list);
302         }
303         /*
304          * Make sure chunk is fully initialized before making it visible in the
305          * hash. Pairs with a data dependency barrier in READ_ONCE() in
306          * audit_tree_lookup().
307          */
308         smp_wmb();
309         list_replace_rcu(&old->hash, &new->hash);
310 }
311
312 static void remove_chunk_node(struct audit_chunk *chunk, struct node *p)
313 {
314         struct audit_tree *owner = p->owner;
315
316         if (owner->root == chunk) {
317                 list_del_init(&owner->same_root);
318                 owner->root = NULL;
319         }
320         list_del_init(&p->list);
321         p->owner = NULL;
322         put_tree(owner);
323 }
324
325 static void untag_chunk(struct node *p)
326 {
327         struct audit_chunk *chunk = find_chunk(p);
328         struct fsnotify_mark *entry = chunk->mark;
329         struct audit_chunk *new = NULL;
330         int size = chunk->count - 1;
331
332         remove_chunk_node(chunk, p);
333         fsnotify_get_mark(entry);
334         spin_unlock(&hash_lock);
335
336         if (size)
337                 new = alloc_chunk(size);
338
339         mutex_lock(&entry->group->mark_mutex);
340         /*
341          * mark_mutex protects mark from getting detached and thus also from
342          * mark->connector->obj getting NULL.
343          */
344         if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
345                 mutex_unlock(&entry->group->mark_mutex);
346                 if (new)
347                         fsnotify_put_mark(new->mark);
348                 goto out;
349         }
350
351         if (!size) {
352                 chunk->dead = 1;
353                 spin_lock(&hash_lock);
354                 list_del_init(&chunk->trees);
355                 list_del_rcu(&chunk->hash);
356                 spin_unlock(&hash_lock);
357                 fsnotify_detach_mark(entry);
358                 mutex_unlock(&entry->group->mark_mutex);
359                 fsnotify_free_mark(entry);
360                 goto out;
361         }
362
363         if (!new)
364                 goto out_mutex;
365
366         if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
367                                      FSNOTIFY_OBJ_TYPE_INODE, 1)) {
368                 fsnotify_put_mark(new->mark);
369                 goto out_mutex;
370         }
371
372         chunk->dead = 1;
373         spin_lock(&hash_lock);
374         /*
375          * This has to go last when updating chunk as once replace_chunk() is
376          * called, new RCU readers can see the new chunk.
377          */
378         replace_chunk(new, chunk, p);
379         spin_unlock(&hash_lock);
380         fsnotify_detach_mark(entry);
381         mutex_unlock(&entry->group->mark_mutex);
382         fsnotify_free_mark(entry);
383         fsnotify_put_mark(new->mark);   /* drop initial reference */
384         goto out;
385
386 out_mutex:
387         mutex_unlock(&entry->group->mark_mutex);
388 out:
389         fsnotify_put_mark(entry);
390         spin_lock(&hash_lock);
391 }
392
393 /* Call with group->mark_mutex held, releases it */
394 static int create_chunk(struct inode *inode, struct audit_tree *tree)
395 {
396         struct fsnotify_mark *entry;
397         struct audit_chunk *chunk = alloc_chunk(1);
398
399         if (!chunk) {
400                 mutex_unlock(&audit_tree_group->mark_mutex);
401                 return -ENOMEM;
402         }
403
404         entry = chunk->mark;
405         if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
406                 mutex_unlock(&audit_tree_group->mark_mutex);
407                 fsnotify_put_mark(entry);
408                 return -ENOSPC;
409         }
410
411         spin_lock(&hash_lock);
412         if (tree->goner) {
413                 spin_unlock(&hash_lock);
414                 chunk->dead = 1;
415                 fsnotify_detach_mark(entry);
416                 mutex_unlock(&audit_tree_group->mark_mutex);
417                 fsnotify_free_mark(entry);
418                 fsnotify_put_mark(entry);
419                 return 0;
420         }
421         chunk->owners[0].index = (1U << 31);
422         chunk->owners[0].owner = tree;
423         get_tree(tree);
424         list_add(&chunk->owners[0].list, &tree->chunks);
425         if (!tree->root) {
426                 tree->root = chunk;
427                 list_add(&tree->same_root, &chunk->trees);
428         }
429         chunk->key = inode_to_key(inode);
430         /*
431          * Inserting into the hash table has to go last as once we do that RCU
432          * readers can see the chunk.
433          */
434         insert_hash(chunk);
435         spin_unlock(&hash_lock);
436         mutex_unlock(&audit_tree_group->mark_mutex);
437         fsnotify_put_mark(entry);       /* drop initial reference */
438         return 0;
439 }
440
441 /* the first tagged inode becomes root of tree */
442 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
443 {
444         struct fsnotify_mark *old_entry, *chunk_entry;
445         struct audit_chunk *chunk, *old;
446         struct node *p;
447         int n;
448
449         mutex_lock(&audit_tree_group->mark_mutex);
450         old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
451                                        audit_tree_group);
452         if (!old_entry)
453                 return create_chunk(inode, tree);
454
455         old = mark_chunk(old_entry);
456
457         /* are we already there? */
458         spin_lock(&hash_lock);
459         for (n = 0; n < old->count; n++) {
460                 if (old->owners[n].owner == tree) {
461                         spin_unlock(&hash_lock);
462                         mutex_unlock(&audit_tree_group->mark_mutex);
463                         fsnotify_put_mark(old_entry);
464                         return 0;
465                 }
466         }
467         spin_unlock(&hash_lock);
468
469         chunk = alloc_chunk(old->count + 1);
470         if (!chunk) {
471                 mutex_unlock(&audit_tree_group->mark_mutex);
472                 fsnotify_put_mark(old_entry);
473                 return -ENOMEM;
474         }
475
476         chunk_entry = chunk->mark;
477
478         /*
479          * mark_mutex protects mark from getting detached and thus also from
480          * mark->connector->obj getting NULL.
481          */
482         if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
483                 /* old_entry is being shot, lets just lie */
484                 mutex_unlock(&audit_tree_group->mark_mutex);
485                 fsnotify_put_mark(old_entry);
486                 fsnotify_put_mark(chunk->mark);
487                 return -ENOENT;
488         }
489
490         if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
491                                      FSNOTIFY_OBJ_TYPE_INODE, 1)) {
492                 mutex_unlock(&audit_tree_group->mark_mutex);
493                 fsnotify_put_mark(chunk_entry);
494                 fsnotify_put_mark(old_entry);
495                 return -ENOSPC;
496         }
497
498         spin_lock(&hash_lock);
499         if (tree->goner) {
500                 spin_unlock(&hash_lock);
501                 chunk->dead = 1;
502                 fsnotify_detach_mark(chunk_entry);
503                 mutex_unlock(&audit_tree_group->mark_mutex);
504                 fsnotify_free_mark(chunk_entry);
505                 fsnotify_put_mark(chunk_entry);
506                 fsnotify_put_mark(old_entry);
507                 return 0;
508         }
509         p = &chunk->owners[chunk->count - 1];
510         p->index = (chunk->count - 1) | (1U<<31);
511         p->owner = tree;
512         get_tree(tree);
513         list_add(&p->list, &tree->chunks);
514         old->dead = 1;
515         if (!tree->root) {
516                 tree->root = chunk;
517                 list_add(&tree->same_root, &chunk->trees);
518         }
519         /*
520          * This has to go last when updating chunk as once replace_chunk() is
521          * called, new RCU readers can see the new chunk.
522          */
523         replace_chunk(chunk, old, NULL);
524         spin_unlock(&hash_lock);
525         fsnotify_detach_mark(old_entry);
526         mutex_unlock(&audit_tree_group->mark_mutex);
527         fsnotify_free_mark(old_entry);
528         fsnotify_put_mark(chunk_entry); /* drop initial reference */
529         fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
530         return 0;
531 }
532
533 static void audit_tree_log_remove_rule(struct audit_krule *rule)
534 {
535         struct audit_buffer *ab;
536
537         if (!audit_enabled)
538                 return;
539         ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
540         if (unlikely(!ab))
541                 return;
542         audit_log_format(ab, "op=remove_rule");
543         audit_log_format(ab, " dir=");
544         audit_log_untrustedstring(ab, rule->tree->pathname);
545         audit_log_key(ab, rule->filterkey);
546         audit_log_format(ab, " list=%d res=1", rule->listnr);
547         audit_log_end(ab);
548 }
549
550 static void kill_rules(struct audit_tree *tree)
551 {
552         struct audit_krule *rule, *next;
553         struct audit_entry *entry;
554
555         list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
556                 entry = container_of(rule, struct audit_entry, rule);
557
558                 list_del_init(&rule->rlist);
559                 if (rule->tree) {
560                         /* not a half-baked one */
561                         audit_tree_log_remove_rule(rule);
562                         if (entry->rule.exe)
563                                 audit_remove_mark(entry->rule.exe);
564                         rule->tree = NULL;
565                         list_del_rcu(&entry->list);
566                         list_del(&entry->rule.list);
567                         call_rcu(&entry->rcu, audit_free_rule_rcu);
568                 }
569         }
570 }
571
572 /*
573  * finish killing struct audit_tree
574  */
575 static void prune_one(struct audit_tree *victim)
576 {
577         spin_lock(&hash_lock);
578         while (!list_empty(&victim->chunks)) {
579                 struct node *p;
580
581                 p = list_entry(victim->chunks.next, struct node, list);
582
583                 untag_chunk(p);
584         }
585         spin_unlock(&hash_lock);
586         put_tree(victim);
587 }
588
589 /* trim the uncommitted chunks from tree */
590
591 static void trim_marked(struct audit_tree *tree)
592 {
593         struct list_head *p, *q;
594         spin_lock(&hash_lock);
595         if (tree->goner) {
596                 spin_unlock(&hash_lock);
597                 return;
598         }
599         /* reorder */
600         for (p = tree->chunks.next; p != &tree->chunks; p = q) {
601                 struct node *node = list_entry(p, struct node, list);
602                 q = p->next;
603                 if (node->index & (1U<<31)) {
604                         list_del_init(p);
605                         list_add(p, &tree->chunks);
606                 }
607         }
608
609         while (!list_empty(&tree->chunks)) {
610                 struct node *node;
611
612                 node = list_entry(tree->chunks.next, struct node, list);
613
614                 /* have we run out of marked? */
615                 if (!(node->index & (1U<<31)))
616                         break;
617
618                 untag_chunk(node);
619         }
620         if (!tree->root && !tree->goner) {
621                 tree->goner = 1;
622                 spin_unlock(&hash_lock);
623                 mutex_lock(&audit_filter_mutex);
624                 kill_rules(tree);
625                 list_del_init(&tree->list);
626                 mutex_unlock(&audit_filter_mutex);
627                 prune_one(tree);
628         } else {
629                 spin_unlock(&hash_lock);
630         }
631 }
632
633 static void audit_schedule_prune(void);
634
635 /* called with audit_filter_mutex */
636 int audit_remove_tree_rule(struct audit_krule *rule)
637 {
638         struct audit_tree *tree;
639         tree = rule->tree;
640         if (tree) {
641                 spin_lock(&hash_lock);
642                 list_del_init(&rule->rlist);
643                 if (list_empty(&tree->rules) && !tree->goner) {
644                         tree->root = NULL;
645                         list_del_init(&tree->same_root);
646                         tree->goner = 1;
647                         list_move(&tree->list, &prune_list);
648                         rule->tree = NULL;
649                         spin_unlock(&hash_lock);
650                         audit_schedule_prune();
651                         return 1;
652                 }
653                 rule->tree = NULL;
654                 spin_unlock(&hash_lock);
655                 return 1;
656         }
657         return 0;
658 }
659
660 static int compare_root(struct vfsmount *mnt, void *arg)
661 {
662         return inode_to_key(d_backing_inode(mnt->mnt_root)) ==
663                (unsigned long)arg;
664 }
665
666 void audit_trim_trees(void)
667 {
668         struct list_head cursor;
669
670         mutex_lock(&audit_filter_mutex);
671         list_add(&cursor, &tree_list);
672         while (cursor.next != &tree_list) {
673                 struct audit_tree *tree;
674                 struct path path;
675                 struct vfsmount *root_mnt;
676                 struct node *node;
677                 int err;
678
679                 tree = container_of(cursor.next, struct audit_tree, list);
680                 get_tree(tree);
681                 list_del(&cursor);
682                 list_add(&cursor, &tree->list);
683                 mutex_unlock(&audit_filter_mutex);
684
685                 err = kern_path(tree->pathname, 0, &path);
686                 if (err)
687                         goto skip_it;
688
689                 root_mnt = collect_mounts(&path);
690                 path_put(&path);
691                 if (IS_ERR(root_mnt))
692                         goto skip_it;
693
694                 spin_lock(&hash_lock);
695                 list_for_each_entry(node, &tree->chunks, list) {
696                         struct audit_chunk *chunk = find_chunk(node);
697                         /* this could be NULL if the watch is dying else where... */
698                         node->index |= 1U<<31;
699                         if (iterate_mounts(compare_root,
700                                            (void *)(chunk->key),
701                                            root_mnt))
702                                 node->index &= ~(1U<<31);
703                 }
704                 spin_unlock(&hash_lock);
705                 trim_marked(tree);
706                 drop_collected_mounts(root_mnt);
707 skip_it:
708                 put_tree(tree);
709                 mutex_lock(&audit_filter_mutex);
710         }
711         list_del(&cursor);
712         mutex_unlock(&audit_filter_mutex);
713 }
714
715 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
716 {
717
718         if (pathname[0] != '/' ||
719             rule->listnr != AUDIT_FILTER_EXIT ||
720             op != Audit_equal ||
721             rule->inode_f || rule->watch || rule->tree)
722                 return -EINVAL;
723         rule->tree = alloc_tree(pathname);
724         if (!rule->tree)
725                 return -ENOMEM;
726         return 0;
727 }
728
729 void audit_put_tree(struct audit_tree *tree)
730 {
731         put_tree(tree);
732 }
733
734 static int tag_mount(struct vfsmount *mnt, void *arg)
735 {
736         return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
737 }
738
739 /*
740  * That gets run when evict_chunk() ends up needing to kill audit_tree.
741  * Runs from a separate thread.
742  */
743 static int prune_tree_thread(void *unused)
744 {
745         for (;;) {
746                 if (list_empty(&prune_list)) {
747                         set_current_state(TASK_INTERRUPTIBLE);
748                         schedule();
749                 }
750
751                 audit_ctl_lock();
752                 mutex_lock(&audit_filter_mutex);
753
754                 while (!list_empty(&prune_list)) {
755                         struct audit_tree *victim;
756
757                         victim = list_entry(prune_list.next,
758                                         struct audit_tree, list);
759                         list_del_init(&victim->list);
760
761                         mutex_unlock(&audit_filter_mutex);
762
763                         prune_one(victim);
764
765                         mutex_lock(&audit_filter_mutex);
766                 }
767
768                 mutex_unlock(&audit_filter_mutex);
769                 audit_ctl_unlock();
770         }
771         return 0;
772 }
773
774 static int audit_launch_prune(void)
775 {
776         if (prune_thread)
777                 return 0;
778         prune_thread = kthread_run(prune_tree_thread, NULL,
779                                 "audit_prune_tree");
780         if (IS_ERR(prune_thread)) {
781                 pr_err("cannot start thread audit_prune_tree");
782                 prune_thread = NULL;
783                 return -ENOMEM;
784         }
785         return 0;
786 }
787
788 /* called with audit_filter_mutex */
789 int audit_add_tree_rule(struct audit_krule *rule)
790 {
791         struct audit_tree *seed = rule->tree, *tree;
792         struct path path;
793         struct vfsmount *mnt;
794         int err;
795
796         rule->tree = NULL;
797         list_for_each_entry(tree, &tree_list, list) {
798                 if (!strcmp(seed->pathname, tree->pathname)) {
799                         put_tree(seed);
800                         rule->tree = tree;
801                         list_add(&rule->rlist, &tree->rules);
802                         return 0;
803                 }
804         }
805         tree = seed;
806         list_add(&tree->list, &tree_list);
807         list_add(&rule->rlist, &tree->rules);
808         /* do not set rule->tree yet */
809         mutex_unlock(&audit_filter_mutex);
810
811         if (unlikely(!prune_thread)) {
812                 err = audit_launch_prune();
813                 if (err)
814                         goto Err;
815         }
816
817         err = kern_path(tree->pathname, 0, &path);
818         if (err)
819                 goto Err;
820         mnt = collect_mounts(&path);
821         path_put(&path);
822         if (IS_ERR(mnt)) {
823                 err = PTR_ERR(mnt);
824                 goto Err;
825         }
826
827         get_tree(tree);
828         err = iterate_mounts(tag_mount, tree, mnt);
829         drop_collected_mounts(mnt);
830
831         if (!err) {
832                 struct node *node;
833                 spin_lock(&hash_lock);
834                 list_for_each_entry(node, &tree->chunks, list)
835                         node->index &= ~(1U<<31);
836                 spin_unlock(&hash_lock);
837         } else {
838                 trim_marked(tree);
839                 goto Err;
840         }
841
842         mutex_lock(&audit_filter_mutex);
843         if (list_empty(&rule->rlist)) {
844                 put_tree(tree);
845                 return -ENOENT;
846         }
847         rule->tree = tree;
848         put_tree(tree);
849
850         return 0;
851 Err:
852         mutex_lock(&audit_filter_mutex);
853         list_del_init(&tree->list);
854         list_del_init(&tree->rules);
855         put_tree(tree);
856         return err;
857 }
858
859 int audit_tag_tree(char *old, char *new)
860 {
861         struct list_head cursor, barrier;
862         int failed = 0;
863         struct path path1, path2;
864         struct vfsmount *tagged;
865         int err;
866
867         err = kern_path(new, 0, &path2);
868         if (err)
869                 return err;
870         tagged = collect_mounts(&path2);
871         path_put(&path2);
872         if (IS_ERR(tagged))
873                 return PTR_ERR(tagged);
874
875         err = kern_path(old, 0, &path1);
876         if (err) {
877                 drop_collected_mounts(tagged);
878                 return err;
879         }
880
881         mutex_lock(&audit_filter_mutex);
882         list_add(&barrier, &tree_list);
883         list_add(&cursor, &barrier);
884
885         while (cursor.next != &tree_list) {
886                 struct audit_tree *tree;
887                 int good_one = 0;
888
889                 tree = container_of(cursor.next, struct audit_tree, list);
890                 get_tree(tree);
891                 list_del(&cursor);
892                 list_add(&cursor, &tree->list);
893                 mutex_unlock(&audit_filter_mutex);
894
895                 err = kern_path(tree->pathname, 0, &path2);
896                 if (!err) {
897                         good_one = path_is_under(&path1, &path2);
898                         path_put(&path2);
899                 }
900
901                 if (!good_one) {
902                         put_tree(tree);
903                         mutex_lock(&audit_filter_mutex);
904                         continue;
905                 }
906
907                 failed = iterate_mounts(tag_mount, tree, tagged);
908                 if (failed) {
909                         put_tree(tree);
910                         mutex_lock(&audit_filter_mutex);
911                         break;
912                 }
913
914                 mutex_lock(&audit_filter_mutex);
915                 spin_lock(&hash_lock);
916                 if (!tree->goner) {
917                         list_del(&tree->list);
918                         list_add(&tree->list, &tree_list);
919                 }
920                 spin_unlock(&hash_lock);
921                 put_tree(tree);
922         }
923
924         while (barrier.prev != &tree_list) {
925                 struct audit_tree *tree;
926
927                 tree = container_of(barrier.prev, struct audit_tree, list);
928                 get_tree(tree);
929                 list_del(&tree->list);
930                 list_add(&tree->list, &barrier);
931                 mutex_unlock(&audit_filter_mutex);
932
933                 if (!failed) {
934                         struct node *node;
935                         spin_lock(&hash_lock);
936                         list_for_each_entry(node, &tree->chunks, list)
937                                 node->index &= ~(1U<<31);
938                         spin_unlock(&hash_lock);
939                 } else {
940                         trim_marked(tree);
941                 }
942
943                 put_tree(tree);
944                 mutex_lock(&audit_filter_mutex);
945         }
946         list_del(&barrier);
947         list_del(&cursor);
948         mutex_unlock(&audit_filter_mutex);
949         path_put(&path1);
950         drop_collected_mounts(tagged);
951         return failed;
952 }
953
954
955 static void audit_schedule_prune(void)
956 {
957         wake_up_process(prune_thread);
958 }
959
960 /*
961  * ... and that one is done if evict_chunk() decides to delay until the end
962  * of syscall.  Runs synchronously.
963  */
964 void audit_kill_trees(struct list_head *list)
965 {
966         audit_ctl_lock();
967         mutex_lock(&audit_filter_mutex);
968
969         while (!list_empty(list)) {
970                 struct audit_tree *victim;
971
972                 victim = list_entry(list->next, struct audit_tree, list);
973                 kill_rules(victim);
974                 list_del_init(&victim->list);
975
976                 mutex_unlock(&audit_filter_mutex);
977
978                 prune_one(victim);
979
980                 mutex_lock(&audit_filter_mutex);
981         }
982
983         mutex_unlock(&audit_filter_mutex);
984         audit_ctl_unlock();
985 }
986
987 /*
988  *  Here comes the stuff asynchronous to auditctl operations
989  */
990
991 static void evict_chunk(struct audit_chunk *chunk)
992 {
993         struct audit_tree *owner;
994         struct list_head *postponed = audit_killed_trees();
995         int need_prune = 0;
996         int n;
997
998         if (chunk->dead)
999                 return;
1000
1001         chunk->dead = 1;
1002         mutex_lock(&audit_filter_mutex);
1003         spin_lock(&hash_lock);
1004         while (!list_empty(&chunk->trees)) {
1005                 owner = list_entry(chunk->trees.next,
1006                                    struct audit_tree, same_root);
1007                 owner->goner = 1;
1008                 owner->root = NULL;
1009                 list_del_init(&owner->same_root);
1010                 spin_unlock(&hash_lock);
1011                 if (!postponed) {
1012                         kill_rules(owner);
1013                         list_move(&owner->list, &prune_list);
1014                         need_prune = 1;
1015                 } else {
1016                         list_move(&owner->list, postponed);
1017                 }
1018                 spin_lock(&hash_lock);
1019         }
1020         list_del_rcu(&chunk->hash);
1021         for (n = 0; n < chunk->count; n++)
1022                 list_del_init(&chunk->owners[n].list);
1023         spin_unlock(&hash_lock);
1024         mutex_unlock(&audit_filter_mutex);
1025         if (need_prune)
1026                 audit_schedule_prune();
1027 }
1028
1029 static int audit_tree_handle_event(struct fsnotify_group *group,
1030                                    struct inode *to_tell,
1031                                    u32 mask, const void *data, int data_type,
1032                                    const unsigned char *file_name, u32 cookie,
1033                                    struct fsnotify_iter_info *iter_info)
1034 {
1035         return 0;
1036 }
1037
1038 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
1039 {
1040         struct audit_chunk *chunk = mark_chunk(entry);
1041
1042         evict_chunk(chunk);
1043
1044         /*
1045          * We are guaranteed to have at least one reference to the mark from
1046          * either the inode or the caller of fsnotify_destroy_mark().
1047          */
1048         BUG_ON(refcount_read(&entry->refcnt) < 1);
1049 }
1050
1051 static const struct fsnotify_ops audit_tree_ops = {
1052         .handle_event = audit_tree_handle_event,
1053         .freeing_mark = audit_tree_freeing_mark,
1054         .free_mark = audit_tree_destroy_watch,
1055 };
1056
1057 static int __init audit_tree_init(void)
1058 {
1059         int i;
1060
1061         audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
1062
1063         audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
1064         if (IS_ERR(audit_tree_group))
1065                 audit_panic("cannot initialize fsnotify group for rectree watches");
1066
1067         for (i = 0; i < HASH_SIZE; i++)
1068                 INIT_LIST_HEAD(&chunk_hash_heads[i]);
1069
1070         return 0;
1071 }
1072 __initcall(audit_tree_init);