2 #include <linux/fsnotify_backend.h>
3 #include <linux/namei.h>
4 #include <linux/mount.h>
5 #include <linux/kthread.h>
6 #include <linux/refcount.h>
7 #include <linux/slab.h>
15 struct audit_chunk *root;
16 struct list_head chunks;
17 struct list_head rules;
18 struct list_head list;
19 struct list_head same_root;
25 struct list_head hash;
26 struct fsnotify_mark mark;
27 struct list_head trees; /* with root here */
33 struct list_head list;
34 struct audit_tree *owner;
35 unsigned index; /* index; upper bit indicates 'will prune' */
39 static LIST_HEAD(tree_list);
40 static LIST_HEAD(prune_list);
41 static struct task_struct *prune_thread;
44 * One struct chunk is attached to each inode of interest.
45 * We replace struct chunk on tagging/untagging.
46 * Rules have pointer to struct audit_tree.
47 * Rules have struct list_head rlist forming a list of rules over
49 * References to struct chunk are collected at audit_inode{,_child}()
50 * time and used in AUDIT_TREE rule matching.
51 * These references are dropped at the same time we are calling
52 * audit_free_names(), etc.
54 * Cyclic lists galore:
55 * tree.chunks anchors chunk.owners[].list hash_lock
56 * tree.rules anchors rule.rlist audit_filter_mutex
57 * chunk.trees anchors tree.same_root hash_lock
58 * chunk.hash is a hash with middle bits of watch.inode as
59 * a hash function. RCU, hash_lock
61 * tree is refcounted; one reference for "some rules on rules_list refer to
62 * it", one for each chunk with pointer to it.
64 * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
65 * of watch contributes 1 to .refs).
67 * node.index allows to get from node.list to containing chunk.
68 * MSB of that sucker is stolen to mark taggings that we might have to
69 * revert - several operations have very unpleasant cleanup logics and
70 * that makes a difference. Some.
73 static struct fsnotify_group *audit_tree_group;
75 static struct audit_tree *alloc_tree(const char *s)
77 struct audit_tree *tree;
79 tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL);
81 refcount_set(&tree->count, 1);
83 INIT_LIST_HEAD(&tree->chunks);
84 INIT_LIST_HEAD(&tree->rules);
85 INIT_LIST_HEAD(&tree->list);
86 INIT_LIST_HEAD(&tree->same_root);
88 strcpy(tree->pathname, s);
93 static inline void get_tree(struct audit_tree *tree)
95 refcount_inc(&tree->count);
98 static inline void put_tree(struct audit_tree *tree)
100 if (refcount_dec_and_test(&tree->count))
101 kfree_rcu(tree, head);
104 /* to avoid bringing the entire thing in audit.h */
105 const char *audit_tree_path(struct audit_tree *tree)
107 return tree->pathname;
110 static void free_chunk(struct audit_chunk *chunk)
114 for (i = 0; i < chunk->count; i++) {
115 if (chunk->owners[i].owner)
116 put_tree(chunk->owners[i].owner);
121 void audit_put_chunk(struct audit_chunk *chunk)
123 if (atomic_long_dec_and_test(&chunk->refs))
127 static void __put_chunk(struct rcu_head *rcu)
129 struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
130 audit_put_chunk(chunk);
133 static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
135 struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
136 call_rcu(&chunk->head, __put_chunk);
139 static struct audit_chunk *alloc_chunk(int count)
141 struct audit_chunk *chunk;
145 size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
146 chunk = kzalloc(size, GFP_KERNEL);
150 INIT_LIST_HEAD(&chunk->hash);
151 INIT_LIST_HEAD(&chunk->trees);
152 chunk->count = count;
153 atomic_long_set(&chunk->refs, 1);
154 for (i = 0; i < count; i++) {
155 INIT_LIST_HEAD(&chunk->owners[i].list);
156 chunk->owners[i].index = i;
158 fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch);
159 chunk->mark.mask = FS_IN_IGNORED;
163 enum {HASH_SIZE = 128};
164 static struct list_head chunk_hash_heads[HASH_SIZE];
165 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
167 static inline struct list_head *chunk_hash(const struct inode *inode)
169 unsigned long n = (unsigned long)inode / L1_CACHE_BYTES;
170 return chunk_hash_heads + n % HASH_SIZE;
173 /* hash_lock & entry->lock is held by caller */
174 static void insert_hash(struct audit_chunk *chunk)
176 struct fsnotify_mark *entry = &chunk->mark;
177 struct list_head *list;
181 list = chunk_hash(entry->inode);
182 list_add_rcu(&chunk->hash, list);
185 /* called under rcu_read_lock */
186 struct audit_chunk *audit_tree_lookup(const struct inode *inode)
188 struct list_head *list = chunk_hash(inode);
189 struct audit_chunk *p;
191 list_for_each_entry_rcu(p, list, hash) {
192 /* mark.inode may have gone NULL, but who cares? */
193 if (p->mark.inode == inode) {
194 atomic_long_inc(&p->refs);
201 bool audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
204 for (n = 0; n < chunk->count; n++)
205 if (chunk->owners[n].owner == tree)
210 /* tagging and untagging inodes with trees */
212 static struct audit_chunk *find_chunk(struct node *p)
214 int index = p->index & ~(1U<<31);
216 return container_of(p, struct audit_chunk, owners[0]);
219 static void untag_chunk(struct node *p)
221 struct audit_chunk *chunk = find_chunk(p);
222 struct fsnotify_mark *entry = &chunk->mark;
223 struct audit_chunk *new = NULL;
224 struct audit_tree *owner;
225 int size = chunk->count - 1;
228 fsnotify_get_mark(entry);
230 spin_unlock(&hash_lock);
233 new = alloc_chunk(size);
235 mutex_lock(&entry->group->mark_mutex);
236 spin_lock(&entry->lock);
237 if (chunk->dead || !entry->inode) {
238 spin_unlock(&entry->lock);
239 mutex_unlock(&entry->group->mark_mutex);
249 spin_lock(&hash_lock);
250 list_del_init(&chunk->trees);
251 if (owner->root == chunk)
253 list_del_init(&p->list);
254 list_del_rcu(&chunk->hash);
255 spin_unlock(&hash_lock);
256 spin_unlock(&entry->lock);
257 mutex_unlock(&entry->group->mark_mutex);
258 fsnotify_destroy_mark(entry, audit_tree_group);
265 if (fsnotify_add_mark_locked(&new->mark, entry->group, entry->inode,
267 fsnotify_put_mark(&new->mark);
272 spin_lock(&hash_lock);
273 list_replace_init(&chunk->trees, &new->trees);
274 if (owner->root == chunk) {
275 list_del_init(&owner->same_root);
279 for (i = j = 0; j <= size; i++, j++) {
280 struct audit_tree *s;
281 if (&chunk->owners[j] == p) {
282 list_del_init(&p->list);
286 s = chunk->owners[j].owner;
287 new->owners[i].owner = s;
288 new->owners[i].index = chunk->owners[j].index - j + i;
289 if (!s) /* result of earlier fallback */
292 list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
295 list_replace_rcu(&chunk->hash, &new->hash);
296 list_for_each_entry(owner, &new->trees, same_root)
298 spin_unlock(&hash_lock);
299 spin_unlock(&entry->lock);
300 mutex_unlock(&entry->group->mark_mutex);
301 fsnotify_destroy_mark(entry, audit_tree_group);
302 fsnotify_put_mark(&new->mark); /* drop initial reference */
306 // do the best we can
307 spin_lock(&hash_lock);
308 if (owner->root == chunk) {
309 list_del_init(&owner->same_root);
312 list_del_init(&p->list);
315 spin_unlock(&hash_lock);
316 spin_unlock(&entry->lock);
317 mutex_unlock(&entry->group->mark_mutex);
319 fsnotify_put_mark(entry);
320 spin_lock(&hash_lock);
323 static int create_chunk(struct inode *inode, struct audit_tree *tree)
325 struct fsnotify_mark *entry;
326 struct audit_chunk *chunk = alloc_chunk(1);
330 entry = &chunk->mark;
331 if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) {
332 fsnotify_put_mark(entry);
336 spin_lock(&entry->lock);
337 spin_lock(&hash_lock);
339 spin_unlock(&hash_lock);
341 spin_unlock(&entry->lock);
342 fsnotify_destroy_mark(entry, audit_tree_group);
343 fsnotify_put_mark(entry);
346 chunk->owners[0].index = (1U << 31);
347 chunk->owners[0].owner = tree;
349 list_add(&chunk->owners[0].list, &tree->chunks);
352 list_add(&tree->same_root, &chunk->trees);
355 spin_unlock(&hash_lock);
356 spin_unlock(&entry->lock);
357 fsnotify_put_mark(entry); /* drop initial reference */
361 /* the first tagged inode becomes root of tree */
362 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
364 struct fsnotify_mark *old_entry, *chunk_entry;
365 struct audit_tree *owner;
366 struct audit_chunk *chunk, *old;
370 old_entry = fsnotify_find_inode_mark(audit_tree_group, inode);
372 return create_chunk(inode, tree);
374 old = container_of(old_entry, struct audit_chunk, mark);
376 /* are we already there? */
377 spin_lock(&hash_lock);
378 for (n = 0; n < old->count; n++) {
379 if (old->owners[n].owner == tree) {
380 spin_unlock(&hash_lock);
381 fsnotify_put_mark(old_entry);
385 spin_unlock(&hash_lock);
387 chunk = alloc_chunk(old->count + 1);
389 fsnotify_put_mark(old_entry);
393 chunk_entry = &chunk->mark;
395 mutex_lock(&old_entry->group->mark_mutex);
396 spin_lock(&old_entry->lock);
397 if (!old_entry->inode) {
398 /* old_entry is being shot, lets just lie */
399 spin_unlock(&old_entry->lock);
400 mutex_unlock(&old_entry->group->mark_mutex);
401 fsnotify_put_mark(old_entry);
406 if (fsnotify_add_mark_locked(chunk_entry, old_entry->group,
407 old_entry->inode, NULL, 1)) {
408 spin_unlock(&old_entry->lock);
409 mutex_unlock(&old_entry->group->mark_mutex);
410 fsnotify_put_mark(chunk_entry);
411 fsnotify_put_mark(old_entry);
415 /* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */
416 spin_lock(&chunk_entry->lock);
417 spin_lock(&hash_lock);
419 /* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */
421 spin_unlock(&hash_lock);
423 spin_unlock(&chunk_entry->lock);
424 spin_unlock(&old_entry->lock);
425 mutex_unlock(&old_entry->group->mark_mutex);
427 fsnotify_destroy_mark(chunk_entry, audit_tree_group);
429 fsnotify_put_mark(chunk_entry);
430 fsnotify_put_mark(old_entry);
433 list_replace_init(&old->trees, &chunk->trees);
434 for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
435 struct audit_tree *s = old->owners[n].owner;
437 p->index = old->owners[n].index;
438 if (!s) /* result of fallback in untag */
441 list_replace_init(&old->owners[n].list, &p->list);
443 p->index = (chunk->count - 1) | (1U<<31);
446 list_add(&p->list, &tree->chunks);
447 list_replace_rcu(&old->hash, &chunk->hash);
448 list_for_each_entry(owner, &chunk->trees, same_root)
453 list_add(&tree->same_root, &chunk->trees);
455 spin_unlock(&hash_lock);
456 spin_unlock(&chunk_entry->lock);
457 spin_unlock(&old_entry->lock);
458 mutex_unlock(&old_entry->group->mark_mutex);
459 fsnotify_destroy_mark(old_entry, audit_tree_group);
460 fsnotify_put_mark(chunk_entry); /* drop initial reference */
461 fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
465 static void audit_tree_log_remove_rule(struct audit_krule *rule)
467 struct audit_buffer *ab;
469 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
472 audit_log_format(ab, "op=remove_rule");
473 audit_log_format(ab, " dir=");
474 audit_log_untrustedstring(ab, rule->tree->pathname);
475 audit_log_key(ab, rule->filterkey);
476 audit_log_format(ab, " list=%d res=1", rule->listnr);
480 static void kill_rules(struct audit_tree *tree)
482 struct audit_krule *rule, *next;
483 struct audit_entry *entry;
485 list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
486 entry = container_of(rule, struct audit_entry, rule);
488 list_del_init(&rule->rlist);
490 /* not a half-baked one */
491 audit_tree_log_remove_rule(rule);
493 audit_remove_mark(entry->rule.exe);
495 list_del_rcu(&entry->list);
496 list_del(&entry->rule.list);
497 call_rcu(&entry->rcu, audit_free_rule_rcu);
503 * finish killing struct audit_tree
505 static void prune_one(struct audit_tree *victim)
507 spin_lock(&hash_lock);
508 while (!list_empty(&victim->chunks)) {
511 p = list_entry(victim->chunks.next, struct node, list);
515 spin_unlock(&hash_lock);
519 /* trim the uncommitted chunks from tree */
521 static void trim_marked(struct audit_tree *tree)
523 struct list_head *p, *q;
524 spin_lock(&hash_lock);
526 spin_unlock(&hash_lock);
530 for (p = tree->chunks.next; p != &tree->chunks; p = q) {
531 struct node *node = list_entry(p, struct node, list);
533 if (node->index & (1U<<31)) {
535 list_add(p, &tree->chunks);
539 while (!list_empty(&tree->chunks)) {
542 node = list_entry(tree->chunks.next, struct node, list);
544 /* have we run out of marked? */
545 if (!(node->index & (1U<<31)))
550 if (!tree->root && !tree->goner) {
552 spin_unlock(&hash_lock);
553 mutex_lock(&audit_filter_mutex);
555 list_del_init(&tree->list);
556 mutex_unlock(&audit_filter_mutex);
559 spin_unlock(&hash_lock);
563 static void audit_schedule_prune(void);
565 /* called with audit_filter_mutex */
566 int audit_remove_tree_rule(struct audit_krule *rule)
568 struct audit_tree *tree;
571 spin_lock(&hash_lock);
572 list_del_init(&rule->rlist);
573 if (list_empty(&tree->rules) && !tree->goner) {
575 list_del_init(&tree->same_root);
577 list_move(&tree->list, &prune_list);
579 spin_unlock(&hash_lock);
580 audit_schedule_prune();
584 spin_unlock(&hash_lock);
590 static int compare_root(struct vfsmount *mnt, void *arg)
592 return d_backing_inode(mnt->mnt_root) == arg;
595 void audit_trim_trees(void)
597 struct list_head cursor;
599 mutex_lock(&audit_filter_mutex);
600 list_add(&cursor, &tree_list);
601 while (cursor.next != &tree_list) {
602 struct audit_tree *tree;
604 struct vfsmount *root_mnt;
608 tree = container_of(cursor.next, struct audit_tree, list);
611 list_add(&cursor, &tree->list);
612 mutex_unlock(&audit_filter_mutex);
614 err = kern_path(tree->pathname, 0, &path);
618 root_mnt = collect_mounts(&path);
620 if (IS_ERR(root_mnt))
623 spin_lock(&hash_lock);
624 list_for_each_entry(node, &tree->chunks, list) {
625 struct audit_chunk *chunk = find_chunk(node);
626 /* this could be NULL if the watch is dying else where... */
627 struct inode *inode = chunk->mark.inode;
628 node->index |= 1U<<31;
629 if (iterate_mounts(compare_root, inode, root_mnt))
630 node->index &= ~(1U<<31);
632 spin_unlock(&hash_lock);
634 drop_collected_mounts(root_mnt);
637 mutex_lock(&audit_filter_mutex);
640 mutex_unlock(&audit_filter_mutex);
643 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
646 if (pathname[0] != '/' ||
647 rule->listnr != AUDIT_FILTER_EXIT ||
649 rule->inode_f || rule->watch || rule->tree)
651 rule->tree = alloc_tree(pathname);
657 void audit_put_tree(struct audit_tree *tree)
662 static int tag_mount(struct vfsmount *mnt, void *arg)
664 return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
668 * That gets run when evict_chunk() ends up needing to kill audit_tree.
669 * Runs from a separate thread.
671 static int prune_tree_thread(void *unused)
674 if (list_empty(&prune_list)) {
675 set_current_state(TASK_INTERRUPTIBLE);
679 mutex_lock(&audit_cmd_mutex);
680 mutex_lock(&audit_filter_mutex);
682 while (!list_empty(&prune_list)) {
683 struct audit_tree *victim;
685 victim = list_entry(prune_list.next,
686 struct audit_tree, list);
687 list_del_init(&victim->list);
689 mutex_unlock(&audit_filter_mutex);
693 mutex_lock(&audit_filter_mutex);
696 mutex_unlock(&audit_filter_mutex);
697 mutex_unlock(&audit_cmd_mutex);
702 static int audit_launch_prune(void)
706 prune_thread = kthread_run(prune_tree_thread, NULL,
708 if (IS_ERR(prune_thread)) {
709 pr_err("cannot start thread audit_prune_tree");
716 /* called with audit_filter_mutex */
717 int audit_add_tree_rule(struct audit_krule *rule)
719 struct audit_tree *seed = rule->tree, *tree;
721 struct vfsmount *mnt;
725 list_for_each_entry(tree, &tree_list, list) {
726 if (!strcmp(seed->pathname, tree->pathname)) {
729 list_add(&rule->rlist, &tree->rules);
734 list_add(&tree->list, &tree_list);
735 list_add(&rule->rlist, &tree->rules);
736 /* do not set rule->tree yet */
737 mutex_unlock(&audit_filter_mutex);
739 if (unlikely(!prune_thread)) {
740 err = audit_launch_prune();
745 err = kern_path(tree->pathname, 0, &path);
748 mnt = collect_mounts(&path);
756 err = iterate_mounts(tag_mount, tree, mnt);
757 drop_collected_mounts(mnt);
761 spin_lock(&hash_lock);
762 list_for_each_entry(node, &tree->chunks, list)
763 node->index &= ~(1U<<31);
764 spin_unlock(&hash_lock);
770 mutex_lock(&audit_filter_mutex);
771 if (list_empty(&rule->rlist)) {
780 mutex_lock(&audit_filter_mutex);
781 list_del_init(&tree->list);
782 list_del_init(&tree->rules);
787 int audit_tag_tree(char *old, char *new)
789 struct list_head cursor, barrier;
791 struct path path1, path2;
792 struct vfsmount *tagged;
795 err = kern_path(new, 0, &path2);
798 tagged = collect_mounts(&path2);
801 return PTR_ERR(tagged);
803 err = kern_path(old, 0, &path1);
805 drop_collected_mounts(tagged);
809 mutex_lock(&audit_filter_mutex);
810 list_add(&barrier, &tree_list);
811 list_add(&cursor, &barrier);
813 while (cursor.next != &tree_list) {
814 struct audit_tree *tree;
817 tree = container_of(cursor.next, struct audit_tree, list);
820 list_add(&cursor, &tree->list);
821 mutex_unlock(&audit_filter_mutex);
823 err = kern_path(tree->pathname, 0, &path2);
825 good_one = path_is_under(&path1, &path2);
831 mutex_lock(&audit_filter_mutex);
835 failed = iterate_mounts(tag_mount, tree, tagged);
838 mutex_lock(&audit_filter_mutex);
842 mutex_lock(&audit_filter_mutex);
843 spin_lock(&hash_lock);
845 list_del(&tree->list);
846 list_add(&tree->list, &tree_list);
848 spin_unlock(&hash_lock);
852 while (barrier.prev != &tree_list) {
853 struct audit_tree *tree;
855 tree = container_of(barrier.prev, struct audit_tree, list);
857 list_del(&tree->list);
858 list_add(&tree->list, &barrier);
859 mutex_unlock(&audit_filter_mutex);
863 spin_lock(&hash_lock);
864 list_for_each_entry(node, &tree->chunks, list)
865 node->index &= ~(1U<<31);
866 spin_unlock(&hash_lock);
872 mutex_lock(&audit_filter_mutex);
876 mutex_unlock(&audit_filter_mutex);
878 drop_collected_mounts(tagged);
883 static void audit_schedule_prune(void)
885 wake_up_process(prune_thread);
889 * ... and that one is done if evict_chunk() decides to delay until the end
890 * of syscall. Runs synchronously.
892 void audit_kill_trees(struct list_head *list)
894 mutex_lock(&audit_cmd_mutex);
895 mutex_lock(&audit_filter_mutex);
897 while (!list_empty(list)) {
898 struct audit_tree *victim;
900 victim = list_entry(list->next, struct audit_tree, list);
902 list_del_init(&victim->list);
904 mutex_unlock(&audit_filter_mutex);
908 mutex_lock(&audit_filter_mutex);
911 mutex_unlock(&audit_filter_mutex);
912 mutex_unlock(&audit_cmd_mutex);
916 * Here comes the stuff asynchronous to auditctl operations
919 static void evict_chunk(struct audit_chunk *chunk)
921 struct audit_tree *owner;
922 struct list_head *postponed = audit_killed_trees();
930 mutex_lock(&audit_filter_mutex);
931 spin_lock(&hash_lock);
932 while (!list_empty(&chunk->trees)) {
933 owner = list_entry(chunk->trees.next,
934 struct audit_tree, same_root);
937 list_del_init(&owner->same_root);
938 spin_unlock(&hash_lock);
941 list_move(&owner->list, &prune_list);
944 list_move(&owner->list, postponed);
946 spin_lock(&hash_lock);
948 list_del_rcu(&chunk->hash);
949 for (n = 0; n < chunk->count; n++)
950 list_del_init(&chunk->owners[n].list);
951 spin_unlock(&hash_lock);
952 mutex_unlock(&audit_filter_mutex);
954 audit_schedule_prune();
957 static int audit_tree_handle_event(struct fsnotify_group *group,
958 struct inode *to_tell,
959 struct fsnotify_mark *inode_mark,
960 struct fsnotify_mark *vfsmount_mark,
961 u32 mask, const void *data, int data_type,
962 const unsigned char *file_name, u32 cookie)
967 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
969 struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
974 * We are guaranteed to have at least one reference to the mark from
975 * either the inode or the caller of fsnotify_destroy_mark().
977 BUG_ON(atomic_read(&entry->refcnt) < 1);
980 static const struct fsnotify_ops audit_tree_ops = {
981 .handle_event = audit_tree_handle_event,
982 .freeing_mark = audit_tree_freeing_mark,
985 static int __init audit_tree_init(void)
989 audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
990 if (IS_ERR(audit_tree_group))
991 audit_panic("cannot initialize fsnotify group for rectree watches");
993 for (i = 0; i < HASH_SIZE; i++)
994 INIT_LIST_HEAD(&chunk_hash_heads[i]);
998 __initcall(audit_tree_init);