]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - kernel/fork.c
Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[linux.git] / kernel / fork.c
index fbe9dfcd8680ec9b6d485e65596b228ec77a6da0..5359facf98675d7746e29f3883b07be4e242dca1 100644 (file)
@@ -11,6 +11,7 @@
  * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
  */
 
+#include <linux/anon_inodes.h>
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
@@ -21,6 +22,7 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/cputime.h>
+#include <linux/seq_file.h>
 #include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
@@ -1223,7 +1225,9 @@ static int wait_for_vfork_done(struct task_struct *child,
        int killed;
 
        freezer_do_not_count();
+       cgroup_enter_frozen();
        killed = wait_for_completion_killable(vfork);
+       cgroup_leave_frozen(false);
        freezer_count();
 
        if (killed) {
@@ -1670,6 +1674,58 @@ static inline void rcu_copy_process(struct task_struct *p)
 #endif /* #ifdef CONFIG_TASKS_RCU */
 }
 
+static int pidfd_release(struct inode *inode, struct file *file)
+{
+       struct pid *pid = file->private_data;
+
+       file->private_data = NULL;
+       put_pid(pid);
+       return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+       struct pid_namespace *ns = proc_pid_ns(file_inode(m->file));
+       struct pid *pid = f->private_data;
+
+       seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns));
+       seq_putc(m, '\n');
+}
+#endif
+
+const struct file_operations pidfd_fops = {
+       .release = pidfd_release,
+#ifdef CONFIG_PROC_FS
+       .show_fdinfo = pidfd_show_fdinfo,
+#endif
+};
+
+/**
+ * pidfd_create() - Create a new pid file descriptor.
+ *
+ * @pid:  struct pid that the pidfd will reference
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set.
+ *
+ * Note, that this function can only be called after the fd table has
+ * been unshared to avoid leaking the pidfd to the new process.
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ *         On error, a negative errno number will be returned.
+ */
+static int pidfd_create(struct pid *pid)
+{
+       int fd;
+
+       fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
+                             O_RDWR | O_CLOEXEC);
+       if (fd < 0)
+               put_pid(pid);
+
+       return fd;
+}
+
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
@@ -1682,13 +1738,14 @@ static __latent_entropy struct task_struct *copy_process(
                                        unsigned long clone_flags,
                                        unsigned long stack_start,
                                        unsigned long stack_size,
+                                       int __user *parent_tidptr,
                                        int __user *child_tidptr,
                                        struct pid *pid,
                                        int trace,
                                        unsigned long tls,
                                        int node)
 {
-       int retval;
+       int pidfd = -1, retval;
        struct task_struct *p;
        struct multiprocess_signals delayed;
 
@@ -1738,6 +1795,31 @@ static __latent_entropy struct task_struct *copy_process(
                        return ERR_PTR(-EINVAL);
        }
 
+       if (clone_flags & CLONE_PIDFD) {
+               int reserved;
+
+               /*
+                * - CLONE_PARENT_SETTID is useless for pidfds and also
+                *   parent_tidptr is used to return pidfds.
+                * - CLONE_DETACHED is blocked so that we can potentially
+                *   reuse it later for CLONE_PIDFD.
+                * - CLONE_THREAD is blocked until someone really needs it.
+                */
+               if (clone_flags &
+                   (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
+                       return ERR_PTR(-EINVAL);
+
+               /*
+                * Verify that parent_tidptr is sane so we can potentially
+                * reuse it later.
+                */
+               if (get_user(reserved, parent_tidptr))
+                       return ERR_PTR(-EFAULT);
+
+               if (reserved != 0)
+                       return ERR_PTR(-EINVAL);
+       }
+
        /*
         * Force any signals received before this point to be delivered
         * before the fork happens.  Collect up signals sent to multiple
@@ -1944,6 +2026,22 @@ static __latent_entropy struct task_struct *copy_process(
                }
        }
 
+       /*
+        * This has to happen after we've potentially unshared the file
+        * descriptor table (so that the pidfd doesn't leak into the child
+        * if the fd table isn't shared).
+        */
+       if (clone_flags & CLONE_PIDFD) {
+               retval = pidfd_create(pid);
+               if (retval < 0)
+                       goto bad_fork_free_pid;
+
+               pidfd = retval;
+               retval = put_user(pidfd, parent_tidptr);
+               if (retval)
+                       goto bad_fork_put_pidfd;
+       }
+
 #ifdef CONFIG_BLOCK
        p->plug = NULL;
 #endif
@@ -2004,7 +2102,7 @@ static __latent_entropy struct task_struct *copy_process(
         */
        retval = cgroup_can_fork(p);
        if (retval)
-               goto bad_fork_free_pid;
+               goto bad_fork_put_pidfd;
 
        /*
         * From this point on we must avoid any synchronous user-space
@@ -2119,6 +2217,9 @@ static __latent_entropy struct task_struct *copy_process(
        spin_unlock(&current->sighand->siglock);
        write_unlock_irq(&tasklist_lock);
        cgroup_cancel_fork(p);
+bad_fork_put_pidfd:
+       if (clone_flags & CLONE_PIDFD)
+               ksys_close(pidfd);
 bad_fork_free_pid:
        cgroup_threadgroup_change_end(current);
        if (pid != &init_struct_pid)
@@ -2184,7 +2285,7 @@ static inline void init_idle_pids(struct task_struct *idle)
 struct task_struct *fork_idle(int cpu)
 {
        struct task_struct *task;
-       task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
+       task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0,
                            cpu_to_node(cpu));
        if (!IS_ERR(task)) {
                init_idle_pids(task);
@@ -2236,7 +2337,7 @@ long _do_fork(unsigned long clone_flags,
                        trace = 0;
        }
 
-       p = copy_process(clone_flags, stack_start, stack_size,
+       p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr,
                         child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
        add_latent_entropy();