sched/membarrier: Return -ENOMEM to userspace on memory allocation failure

[linux.git] / kernel / exit.c
diff --git a/kernel/exit.c b/kernel/exit.c

index 5b4a5dcce8f8328dd173ead148f4dc6a5fa0d8c4..a46a50d67002d9877b00c8052ba922e02e9740af 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -182,6 +182,11 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
         put_task_struct(tsk);
  }
  
+void put_task_struct_rcu_user(struct task_struct *task)
+{
+       if (refcount_dec_and_test(&task->rcu_users))
+               call_rcu(&task->rcu, delayed_put_task_struct);
+}
  
  void release_task(struct task_struct *p)
  {
@@ -222,76 +227,13 @@ void release_task(struct task_struct *p)
  
         write_unlock_irq(&tasklist_lock);
         release_thread(p);
-       call_rcu(&p->rcu, delayed_put_task_struct);
+       put_task_struct_rcu_user(p);
  
         p = leader;
         if (unlikely(zap_leader))
                 goto repeat;
  }
  
-/*
- * Note that if this function returns a valid task_struct pointer (!NULL)
- * task->usage must remain >0 for the duration of the RCU critical section.
- */
-struct task_struct *task_rcu_dereference(struct task_struct **ptask)
-{
-       struct sighand_struct *sighand;
-       struct task_struct *task;
-
-       /*
-        * We need to verify that release_task() was not called and thus
-        * delayed_put_task_struct() can't run and drop the last reference
-        * before rcu_read_unlock(). We check task->sighand != NULL,
-        * but we can read the already freed and reused memory.
-        */
-retry:
-       task = rcu_dereference(*ptask);
-       if (!task)
-               return NULL;
-
-       probe_kernel_address(&task->sighand, sighand);
-
-       /*
-        * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
-        * was already freed we can not miss the preceding update of this
-        * pointer.
-        */
-       smp_rmb();
-       if (unlikely(task != READ_ONCE(*ptask)))
-               goto retry;
-
-       /*
-        * We've re-checked that "task == *ptask", now we have two different
-        * cases:
-        *
-        * 1. This is actually the same task/task_struct. In this case
-        *    sighand != NULL tells us it is still alive.
-        *
-        * 2. This is another task which got the same memory for task_struct.
-        *    We can't know this of course, and we can not trust
-        *    sighand != NULL.
-        *
-        *    In this case we actually return a random value, but this is
-        *    correct.
-        *
-        *    If we return NULL - we can pretend that we actually noticed that
-        *    *ptask was updated when the previous task has exited. Or pretend
-        *    that probe_slab_address(&sighand) reads NULL.
-        *
-        *    If we return the new task (because sighand is not NULL for any
-        *    reason) - this is fine too. This (new) task can't go away before
-        *    another gp pass.
-        *
-        *    And note: We could even eliminate the false positive if re-read
-        *    task->sighand once again to avoid the falsely NULL. But this case
-        *    is very unlikely so we don't care.
-        */
-       if (!sighand)
-               return NULL;
-
-       return task;
-}
-
  void rcuwait_wake_up(struct rcuwait *w)
  {
         struct task_struct *task;
@@ -311,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
          */
         smp_mb(); /* (B) */
  
-       /*
-        * Avoid using task_rcu_dereference() magic as long as we are careful,
-        * see comment in rcuwait_wait_event() regarding ->exit_state.
-        */
         task = rcu_dereference(w->task);
         if (task)
                 wake_up_process(task);
@@ -1554,6 +1492,23 @@ static long do_wait(struct wait_opts *wo)
         return retval;
  }
  
+static struct pid *pidfd_get_pid(unsigned int fd)
+{
+       struct fd f;
+       struct pid *pid;
+
+       f = fdget(fd);
+       if (!f.file)
+               return ERR_PTR(-EBADF);
+
+       pid = pidfd_pid(f.file);
+       if (!IS_ERR(pid))
+               get_pid(pid);
+
+       fdput(f);
+       return pid;
+}
+
  static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
                           int options, struct rusage *ru)
  {
@@ -1576,19 +1531,32 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
                 type = PIDTYPE_PID;
                 if (upid <= 0)
                         return -EINVAL;
+
+               pid = find_get_pid(upid);
                 break;
         case P_PGID:
                 type = PIDTYPE_PGID;
-               if (upid <= 0)
+               if (upid < 0)
                         return -EINVAL;
+
+               if (upid)
+                       pid = find_get_pid(upid);
+               else
+                       pid = get_task_pid(current, PIDTYPE_PGID);
+               break;
+       case P_PIDFD:
+               type = PIDTYPE_PID;
+               if (upid < 0)
+                       return -EINVAL;
+
+               pid = pidfd_get_pid(upid);
+               if (IS_ERR(pid))
+                       return PTR_ERR(pid);
                 break;
         default:
                 return -EINVAL;
         }
  
-       if (type < PIDTYPE_MAX)
-               pid = find_get_pid(upid);
-
         wo.wo_type      = type;
         wo.wo_pid       = pid;
         wo.wo_flags     = options;