X-Git-Url: https://asedeno.scripts.mit.edu/gitweb/?a=blobdiff_plain;f=kernel%2Ffork.c;h=2508a4f238a3f3b2e41dd78502a5a653f57c221b;hb=baf4dc829574ed9dd8ba33cda0536a50f28937de;hp=55af6931c6ec3f8b817836c7fc78d56638f7de8c;hpb=b7fa0cde29c87a6d3f0a11d24c46f52549e5a5c1;p=linux.git diff --git a/kernel/fork.c b/kernel/fork.c index 55af6931c6ec..2508a4f238a3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -94,6 +93,7 @@ #include #include #include +#include #include #include @@ -224,6 +224,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; + /* Clear the KASAN shadow of the stack. */ + kasan_unpoison_shadow(s->addr, THREAD_SIZE); + /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); @@ -1283,24 +1286,8 @@ static int wait_for_vfork_done(struct task_struct *child, * restoring the old one. . . * Eric Biederman 10 January 1998 */ -void mm_release(struct task_struct *tsk, struct mm_struct *mm) +static void mm_release(struct task_struct *tsk, struct mm_struct *mm) { - /* Get rid of any futexes when releasing the mm */ -#ifdef CONFIG_FUTEX - if (unlikely(tsk->robust_list)) { - exit_robust_list(tsk); - tsk->robust_list = NULL; - } -#ifdef CONFIG_COMPAT - if (unlikely(tsk->compat_robust_list)) { - compat_exit_robust_list(tsk); - tsk->compat_robust_list = NULL; - } -#endif - if (unlikely(!list_empty(&tsk->pi_state_list))) - exit_pi_state_list(tsk); -#endif - uprobe_free_utask(tsk); /* Get rid of any cached register state */ @@ -1333,6 +1320,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) complete_vfork_done(tsk); } +void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) +{ + futex_exit_release(tsk); + mm_release(tsk, mm); +} + +void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) +{ + futex_exec_release(tsk); + mm_release(tsk, mm); +} + /** * dup_mm() - duplicates an existing mm structure * @tsk: the task_struct with which the new mm will be associated. @@ -1517,6 +1516,11 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) spin_lock_irq(¤t->sighand->siglock); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); spin_unlock_irq(¤t->sighand->siglock); + + /* Reset all signal handler not set to SIG_IGN to SIG_DFL. */ + if (clone_flags & CLONE_CLEAR_SIGHAND) + flush_signal_handlers(tsk, 0); + return 0; } @@ -1695,12 +1699,68 @@ static int pidfd_release(struct inode *inode, struct file *file) } #ifdef CONFIG_PROC_FS +/** + * pidfd_show_fdinfo - print information about a pidfd + * @m: proc fdinfo file + * @f: file referencing a pidfd + * + * Pid: + * This function will print the pid that a given pidfd refers to in the + * pid namespace of the procfs instance. + * If the pid namespace of the process is not a descendant of the pid + * namespace of the procfs instance 0 will be shown as its pid. This is + * similar to calling getppid() on a process whose parent is outside of + * its pid namespace. + * + * NSpid: + * If pid namespaces are supported then this function will also print + * the pid of a given pidfd refers to for all descendant pid namespaces + * starting from the current pid namespace of the instance, i.e. the + * Pid field and the first entry in the NSpid field will be identical. + * If the pid namespace of the process is not a descendant of the pid + * namespace of the procfs instance 0 will be shown as its first NSpid + * entry and no others will be shown. + * Note that this differs from the Pid and NSpid fields in + * /proc//status where Pid and NSpid are always shown relative to + * the pid namespace of the procfs instance. The difference becomes + * obvious when sending around a pidfd between pid namespaces from a + * different branch of the tree, i.e. where no ancestoral relation is + * present between the pid namespaces: + * - create two new pid namespaces ns1 and ns2 in the initial pid + * namespace (also take care to create new mount namespaces in the + * new pid namespace and mount procfs) + * - create a process with a pidfd in ns1 + * - send pidfd from ns1 to ns2 + * - read /proc/self/fdinfo/ and observe that both Pid and NSpid + * have exactly one entry, which is 0 + */ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) { - struct pid_namespace *ns = proc_pid_ns(file_inode(m->file)); struct pid *pid = f->private_data; + struct pid_namespace *ns; + pid_t nr = -1; + + if (likely(pid_has_task(pid, PIDTYPE_PID))) { + ns = proc_pid_ns(file_inode(m->file)); + nr = pid_nr_ns(pid, ns); + } + + seq_put_decimal_ll(m, "Pid:\t", nr); - seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns)); +#ifdef CONFIG_PID_NS + seq_put_decimal_ll(m, "\nNSpid:\t", nr); + if (nr > 0) { + int i; + + /* If nr is non-zero it means that 'pid' is valid and that + * ns, i.e. the pid namespace associated with the procfs + * instance, is in the pid namespace hierarchy of pid. + * Start at one below the already printed level. + */ + for (i = ns->level + 1; i <= pid->level; i++) + seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); + } +#endif seq_putc(m, '\n'); } #endif @@ -1708,11 +1768,11 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) /* * Poll support for process exit notification. */ -static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts) +static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { struct task_struct *task; struct pid *pid = file->private_data; - int poll_flags = 0; + __poll_t poll_flags = 0; poll_wait(file, &pid->wait_pidfd, pts); @@ -1724,7 +1784,7 @@ static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts) * group, then poll(2) should block, similar to the wait(2) family. */ if (!task || (task->exit_state && thread_group_empty(task))) - poll_flags = POLLIN | POLLRDNORM; + poll_flags = EPOLLIN | EPOLLRDNORM; rcu_read_unlock(); return poll_flags; @@ -2026,7 +2086,8 @@ static __latent_entropy struct task_struct *copy_process( stackleak_task_init(p); if (pid != &init_struct_pid) { - pid = alloc_pid(p->nsproxy->pid_ns_for_children); + pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid, + args->set_tid_size); if (IS_ERR(pid)) { retval = PTR_ERR(pid); goto bad_fork_cleanup_thread; @@ -2062,14 +2123,8 @@ static __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_BLOCK p->plug = NULL; #endif -#ifdef CONFIG_FUTEX - p->robust_list = NULL; -#ifdef CONFIG_COMPAT - p->compat_robust_list = NULL; -#endif - INIT_LIST_HEAD(&p->pi_state_list); - p->pi_state_cache = NULL; -#endif + futex_init_task(p); + /* * sigaltstack should be cleared when sharing the same VM */ @@ -2130,7 +2185,7 @@ static __latent_entropy struct task_struct *copy_process( */ p->start_time = ktime_get_ns(); - p->real_start_time = ktime_get_boottime_ns(); + p->start_boottime = ktime_get_boottime_ns(); /* * Make it visible to the rest of the system, but dont wake it up yet. @@ -2529,6 +2584,7 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, { int err; struct clone_args args; + pid_t *kset_tid = kargs->set_tid; if (unlikely(usize > PAGE_SIZE)) return -E2BIG; @@ -2539,6 +2595,15 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, if (err) return err; + if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL)) + return -EINVAL; + + if (unlikely(!args.set_tid && args.set_tid_size > 0)) + return -EINVAL; + + if (unlikely(args.set_tid && args.set_tid_size == 0)) + return -EINVAL; + /* * Verify that higher 32bits of exit_signal are unset and that * it is a valid signal @@ -2556,8 +2621,16 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, .stack = args.stack, .stack_size = args.stack_size, .tls = args.tls, + .set_tid_size = args.set_tid_size, }; + if (args.set_tid && + copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), + (kargs->set_tid_size * sizeof(pid_t)))) + return -EFAULT; + + kargs->set_tid = kset_tid; + return 0; } @@ -2591,11 +2664,8 @@ static inline bool clone3_stack_valid(struct kernel_clone_args *kargs) static bool clone3_args_valid(struct kernel_clone_args *kargs) { - /* - * All lower bits of the flag word are taken. - * Verify that no other unknown flags are passed along. - */ - if (kargs->flags & ~CLONE_LEGACY_FLAGS) + /* Verify that no unknown flags are passed along. */ + if (kargs->flags & ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND)) return false; /* @@ -2605,6 +2675,10 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs) if (kargs->flags & (CLONE_DETACHED | CSIGNAL)) return false; + if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) == + (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) + return false; + if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) && kargs->exit_signal) return false; @@ -2631,6 +2705,9 @@ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) int err; struct kernel_clone_args kargs; + pid_t set_tid[MAX_PID_NS_LEVEL]; + + kargs.set_tid = set_tid; err = copy_clone_args_from_user(&kargs, uargs, size); if (err)