]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - kernel/futex.c
futex: Replace PF_EXITPIDONE with a state
[linux.git] / kernel / futex.c
index bd18f60e4c6c635d938cce50fd226150800b359f..41c75277d7d1534bb5c419c374722547a482bdd8 100644 (file)
@@ -325,6 +325,12 @@ static inline bool should_fail_futex(bool fshared)
 }
 #endif /* CONFIG_FAIL_FUTEX */
 
+#ifdef CONFIG_COMPAT
+static void compat_exit_robust_list(struct task_struct *curr);
+#else
+static inline void compat_exit_robust_list(struct task_struct *curr) { }
+#endif
+
 static inline void futex_get_mm(union futex_key *key)
 {
        mmgrab(key->private.mm);
@@ -890,7 +896,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
  * Kernel cleans up PI-state, but userspace is likely hosed.
  * (Robust-futex cleanup is separate and might save the day for userspace.)
  */
-void exit_pi_state_list(struct task_struct *curr)
+static void exit_pi_state_list(struct task_struct *curr)
 {
        struct list_head *next, *head = &curr->pi_state_list;
        struct futex_pi_state *pi_state;
@@ -960,7 +966,8 @@ void exit_pi_state_list(struct task_struct *curr)
        }
        raw_spin_unlock_irq(&curr->pi_lock);
 }
-
+#else
+static inline void exit_pi_state_list(struct task_struct *curr) { }
 #endif
 
 /*
@@ -1175,9 +1182,10 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
        u32 uval2;
 
        /*
-        * If PF_EXITPIDONE is not yet set, then try again.
+        * If the futex exit state is not yet FUTEX_STATE_DEAD, wait
+        * for it to finish.
         */
-       if (tsk && !(tsk->flags & PF_EXITPIDONE))
+       if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
                return -EAGAIN;
 
        /*
@@ -1196,8 +1204,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
         *    *uaddr = 0xC0000000;           tsk = get_task(PID);
         *   }                               if (!tsk->flags & PF_EXITING) {
         *  ...                                attach();
-        *  tsk->flags |= PF_EXITPIDONE;     } else {
-        *                                     if (!(tsk->flags & PF_EXITPIDONE))
+        *  tsk->futex_state =               } else {
+        *      FUTEX_STATE_DEAD;              if (tsk->futex_state !=
+        *                                        FUTEX_STATE_DEAD)
         *                                       return -EAGAIN;
         *                                     return -ESRCH; <--- FAIL
         *                                   }
@@ -1253,17 +1262,16 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
        }
 
        /*
-        * We need to look at the task state flags to figure out,
-        * whether the task is exiting. To protect against the do_exit
-        * change of the task flags, we do this protected by
-        * p->pi_lock:
+        * We need to look at the task state to figure out, whether the
+        * task is exiting. To protect against the change of the task state
+        * in futex_exit_release(), we do this protected by p->pi_lock:
         */
        raw_spin_lock_irq(&p->pi_lock);
-       if (unlikely(p->flags & PF_EXITING)) {
+       if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
                /*
-                * The task is on the way out. When PF_EXITPIDONE is
-                * set, we know that the task has finished the
-                * cleanup:
+                * The task is on the way out. When the futex state is
+                * FUTEX_STATE_DEAD, we know that the task has finished
+                * the cleanup:
                 */
                int ret = handle_exit_race(uaddr, uval, p);
 
@@ -1480,7 +1488,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
 
        /*
         * Queue the task for later wakeup for after we've released
-        * the hb->lock. wake_q_add() grabs reference to p.
+        * the hb->lock.
         */
        wake_q_add_safe(wake_q, p);
 }
@@ -3452,11 +3460,16 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
        return ret;
 }
 
+/* Constants for the pending_op argument of handle_futex_death */
+#define HANDLE_DEATH_PENDING   true
+#define HANDLE_DEATH_LIST      false
+
 /*
  * Process a futex-list entry, check whether it's owned by the
  * dying task, and do notification if so:
  */
-static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
+static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
+                             bool pi, bool pending_op)
 {
        u32 uval, uninitialized_var(nval), mval;
        int err;
@@ -3469,6 +3482,42 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int p
        if (get_user(uval, uaddr))
                return -1;
 
+       /*
+        * Special case for regular (non PI) futexes. The unlock path in
+        * user space has two race scenarios:
+        *
+        * 1. The unlock path releases the user space futex value and
+        *    before it can execute the futex() syscall to wake up
+        *    waiters it is killed.
+        *
+        * 2. A woken up waiter is killed before it can acquire the
+        *    futex in user space.
+        *
+        * In both cases the TID validation below prevents a wakeup of
+        * potential waiters which can cause these waiters to block
+        * forever.
+        *
+        * In both cases the following conditions are met:
+        *
+        *      1) task->robust_list->list_op_pending != NULL
+        *         @pending_op == true
+        *      2) User space futex value == 0
+        *      3) Regular futex: @pi == false
+        *
+        * If these conditions are met, it is safe to attempt waking up a
+        * potential waiter without touching the user space futex value and
+        * trying to set the OWNER_DIED bit. The user space futex value is
+        * uncontended and the rest of the user space mutex state is
+        * consistent, so a woken waiter will just take over the
+        * uncontended futex. Setting the OWNER_DIED bit would create
+        * inconsistent state and malfunction of the user space owner died
+        * handling.
+        */
+       if (pending_op && !pi && !uval) {
+               futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+               return 0;
+       }
+
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
                return 0;
 
@@ -3547,7 +3596,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
  *
  * We silently return on any sign of list-walking problem.
  */
-void exit_robust_list(struct task_struct *curr)
+static void exit_robust_list(struct task_struct *curr)
 {
        struct robust_list_head __user *head = curr->robust_list;
        struct robust_list __user *entry, *next_entry, *pending;
@@ -3588,10 +3637,11 @@ void exit_robust_list(struct task_struct *curr)
                 * A pending lock might already be on the list, so
                 * don't process it twice:
                 */
-               if (entry != pending)
+               if (entry != pending) {
                        if (handle_futex_death((void __user *)entry + futex_offset,
-                                               curr, pi))
+                                               curr, pi, HANDLE_DEATH_LIST))
                                return;
+               }
                if (rc)
                        return;
                entry = next_entry;
@@ -3605,9 +3655,28 @@ void exit_robust_list(struct task_struct *curr)
                cond_resched();
        }
 
-       if (pending)
+       if (pending) {
                handle_futex_death((void __user *)pending + futex_offset,
-                                  curr, pip);
+                                  curr, pip, HANDLE_DEATH_PENDING);
+       }
+}
+
+void futex_mm_release(struct task_struct *tsk)
+{
+       if (unlikely(tsk->robust_list)) {
+               exit_robust_list(tsk);
+               tsk->robust_list = NULL;
+       }
+
+#ifdef CONFIG_COMPAT
+       if (unlikely(tsk->compat_robust_list)) {
+               compat_exit_robust_list(tsk);
+               tsk->compat_robust_list = NULL;
+       }
+#endif
+
+       if (unlikely(!list_empty(&tsk->pi_state_list)))
+               exit_pi_state_list(tsk);
 }
 
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
@@ -3737,7 +3806,7 @@ static void __user *futex_uaddr(struct robust_list __user *entry,
  *
  * We silently return on any sign of list-walking problem.
  */
-void compat_exit_robust_list(struct task_struct *curr)
+static void compat_exit_robust_list(struct task_struct *curr)
 {
        struct compat_robust_list_head __user *head = curr->compat_robust_list;
        struct robust_list __user *entry, *next_entry, *pending;
@@ -3784,7 +3853,8 @@ void compat_exit_robust_list(struct task_struct *curr)
                if (entry != pending) {
                        void __user *uaddr = futex_uaddr(entry, futex_offset);
 
-                       if (handle_futex_death(uaddr, curr, pi))
+                       if (handle_futex_death(uaddr, curr, pi,
+                                              HANDLE_DEATH_LIST))
                                return;
                }
                if (rc)
@@ -3803,7 +3873,7 @@ void compat_exit_robust_list(struct task_struct *curr)
        if (pending) {
                void __user *uaddr = futex_uaddr(pending, futex_offset);
 
-               handle_futex_death(uaddr, curr, pip);
+               handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
        }
 }