Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

[linux.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index ddee541ea97aa63863ebcdbee26c437de6b63e42..9657315405de63ebfcfa43fbed8d432f5db30481 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -127,16 +127,16 @@ enum {
   *
   * PL: wq_pool_mutex protected.
   *
- * PR: wq_pool_mutex protected for writes.  Sched-RCU protected for reads.
+ * PR: wq_pool_mutex protected for writes.  RCU protected for reads.
   *
   * PW: wq_pool_mutex and wq->mutex protected for writes.  Either for reads.
   *
   * PWR: wq_pool_mutex and wq->mutex protected for writes.  Either or
- *      sched-RCU for reads.
+ *      RCU for reads.
   *
   * WQ: wq->mutex protected.
   *
- * WR: wq->mutex protected for writes.  Sched-RCU protected for reads.
+ * WR: wq->mutex protected for writes.  RCU protected for reads.
   *
   * MD: wq_mayday_lock protected.
   */
@@ -183,7 +183,7 @@ struct worker_pool {
         atomic_t                nr_running ____cacheline_aligned_in_smp;
  
         /*
-        * Destruction of pool is sched-RCU protected to allow dereferences
+        * Destruction of pool is RCU protected to allow dereferences
          * from get_work_pool().
          */
         struct rcu_head         rcu;
@@ -212,7 +212,7 @@ struct pool_workqueue {
         /*
          * Release of unbound pwq is punted to system_wq.  See put_pwq()
          * and pwq_unbound_release_workfn() for details.  pool_workqueue
-        * itself is also sched-RCU protected so that the first pwq can be
+        * itself is also RCU protected so that the first pwq can be
          * determined without grabbing wq->mutex.
          */
         struct work_struct      unbound_release_work;
@@ -266,8 +266,8 @@ struct workqueue_struct {
         char                    name[WQ_NAME_LEN]; /* I: workqueue name */
  
         /*
-        * Destruction of workqueue_struct is sched-RCU protected to allow
-        * walking the workqueues list without grabbing wq_pool_mutex.
+        * Destruction of workqueue_struct is RCU protected to allow walking
+        * the workqueues list without grabbing wq_pool_mutex.
          * This is used to dump all workqueues from sysrq.
          */
         struct rcu_head         rcu;
@@ -359,20 +359,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  #include <trace/events/workqueue.h>
  
  #define assert_rcu_or_pool_mutex()                                     \
-       RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() &&                 \
+       RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
                          !lockdep_is_held(&wq_pool_mutex),              \
-                        "sched RCU or wq_pool_mutex should be held")
+                        "RCU or wq_pool_mutex should be held")
  
  #define assert_rcu_or_wq_mutex(wq)                                     \
-       RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() &&                 \
+       RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
                          !lockdep_is_held(&wq->mutex),                  \
-                        "sched RCU or wq->mutex should be held")
+                        "RCU or wq->mutex should be held")
  
  #define assert_rcu_or_wq_mutex_or_pool_mutex(wq)                       \
-       RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() &&                 \
+       RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
                          !lockdep_is_held(&wq->mutex) &&                \
                          !lockdep_is_held(&wq_pool_mutex),              \
-                        "sched RCU, wq->mutex or wq_pool_mutex should be held")
+                        "RCU, wq->mutex or wq_pool_mutex should be held")
  
  #define for_each_cpu_worker_pool(pool, cpu)                            \
         for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];               \
@@ -384,7 +384,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
   * @pool: iteration cursor
   * @pi: integer used for iteration
   *
- * This must be called either with wq_pool_mutex held or sched RCU read
+ * This must be called either with wq_pool_mutex held or RCU read
   * locked.  If the pool needs to be used beyond the locking in effect, the
   * caller is responsible for guaranteeing that the pool stays online.
   *
@@ -416,7 +416,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
   * @pwq: iteration cursor
   * @wq: the target workqueue
   *
- * This must be called either with wq->mutex held or sched RCU read locked.
+ * This must be called either with wq->mutex held or RCU read locked.
   * If the pwq needs to be used beyond the locking in effect, the caller is
   * responsible for guaranteeing that the pwq stays online.
   *
@@ -552,7 +552,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
   * @wq: the target workqueue
   * @node: the node ID
   *
- * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
+ * This must be called with any of wq_pool_mutex, wq->mutex or RCU
   * read locked.
   * If the pwq needs to be used beyond the locking in effect, the caller is
   * responsible for guaranteeing that the pwq stays online.
@@ -696,8 +696,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
   * @work: the work item of interest
   *
   * Pools are created and destroyed under wq_pool_mutex, and allows read
- * access under sched-RCU read lock.  As such, this function should be
- * called under wq_pool_mutex or with preemption disabled.
+ * access under RCU read lock.  As such, this function should be
+ * called under wq_pool_mutex or inside of a rcu_read_lock() region.
   *
   * All fields of the returned pool are accessible as long as the above
   * mentioned locking is in effect.  If the returned pool needs to be used
@@ -841,43 +841,32 @@ static void wake_up_worker(struct worker_pool *pool)
  }
  
  /**
- * wq_worker_waking_up - a worker is waking up
+ * wq_worker_running - a worker is running again
   * @task: task waking up
- * @cpu: CPU @task is waking up to
   *
- * This function is called during try_to_wake_up() when a worker is
- * being awoken.
- *
- * CONTEXT:
- * spin_lock_irq(rq->lock)
+ * This function is called when a worker returns from schedule()
   */
-void wq_worker_waking_up(struct task_struct *task, int cpu)
+void wq_worker_running(struct task_struct *task)
  {
         struct worker *worker = kthread_data(task);
  
-       if (!(worker->flags & WORKER_NOT_RUNNING)) {
-               WARN_ON_ONCE(worker->pool->cpu != cpu);
+       if (!worker->sleeping)
+               return;
+       if (!(worker->flags & WORKER_NOT_RUNNING))
                 atomic_inc(&worker->pool->nr_running);
-       }
+       worker->sleeping = 0;
  }
  
  /**
   * wq_worker_sleeping - a worker is going to sleep
   * @task: task going to sleep
   *
- * This function is called during schedule() when a busy worker is
- * going to sleep.  Worker on the same cpu can be woken up by
- * returning pointer to its task.
- *
- * CONTEXT:
- * spin_lock_irq(rq->lock)
- *
- * Return:
- * Worker task on @cpu to wake up, %NULL if none.
+ * This function is called from schedule() when a busy worker is
+ * going to sleep.
   */
-struct task_struct *wq_worker_sleeping(struct task_struct *task)
+void wq_worker_sleeping(struct task_struct *task)
  {
-       struct worker *worker = kthread_data(task), *to_wakeup = NULL;
+       struct worker *next, *worker = kthread_data(task);
         struct worker_pool *pool;
  
         /*
@@ -886,13 +875,15 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
          * checking NOT_RUNNING.
          */
         if (worker->flags & WORKER_NOT_RUNNING)
-               return NULL;
+               return;
  
         pool = worker->pool;
  
-       /* this can only happen on the local cpu */
-       if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
-               return NULL;
+       if (WARN_ON_ONCE(worker->sleeping))
+               return;
+
+       worker->sleeping = 1;
+       spin_lock_irq(&pool->lock);
  
         /*
          * The counterpart of the following dec_and_test, implied mb,
@@ -906,13 +897,17 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
          * lock is safe.
          */
         if (atomic_dec_and_test(&pool->nr_running) &&
-           !list_empty(&pool->worklist))
-               to_wakeup = first_idle_worker(pool);
-       return to_wakeup ? to_wakeup->task : NULL;
+           !list_empty(&pool->worklist)) {
+               next = first_idle_worker(pool);
+               if (next)
+                       wake_up_process(next->task);
+       }
+       spin_unlock_irq(&pool->lock);
  }
  
  /**
   * wq_worker_last_func - retrieve worker's last work function
+ * @task: Task to retrieve last work function of.
   *
   * Determine the last function a worker executed. This is called from
   * the scheduler to get a worker's last known identity.
@@ -1132,7 +1127,7 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
  {
         if (pwq) {
                 /*
-                * As both pwqs and pools are sched-RCU protected, the
+                * As both pwqs and pools are RCU protected, the
                  * following lock operations are safe.
                  */
                 spin_lock_irq(&pwq->pool->lock);
@@ -1260,6 +1255,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
                 return 0;
  
+       rcu_read_lock();
         /*
          * The queueing is in progress, or it is already queued. Try to
          * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
@@ -1298,10 +1294,12 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
                 set_work_pool_and_keep_pending(work, pool->id);
  
                 spin_unlock(&pool->lock);
+               rcu_read_unlock();
                 return 1;
         }
         spin_unlock(&pool->lock);
  fail:
+       rcu_read_unlock();
         local_irq_restore(*flags);
         if (work_is_canceling(work))
                 return -ENOENT;
@@ -1415,6 +1413,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
         if (unlikely(wq->flags & __WQ_DRAINING) &&
             WARN_ON_ONCE(!is_chained_work(wq)))
                 return;
+       rcu_read_lock();
  retry:
         if (req_cpu == WORK_CPU_UNBOUND)
                 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
@@ -1471,10 +1470,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
         /* pwq determined, queue */
         trace_workqueue_queue_work(req_cpu, pwq, work);
  
-       if (WARN_ON(!list_empty(&work->entry))) {
-               spin_unlock(&pwq->pool->lock);
-               return;
-       }
+       if (WARN_ON(!list_empty(&work->entry)))
+               goto out;
  
         pwq->nr_in_flight[pwq->work_color]++;
         work_flags = work_color_to_flags(pwq->work_color);
@@ -1492,7 +1489,9 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  
         insert_work(pwq, work, worklist, work_flags);
  
+out:
         spin_unlock(&pwq->pool->lock);
+       rcu_read_unlock();
  }
  
  /**
@@ -2277,7 +2276,7 @@ __acquires(&pool->lock)
  
         if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
                 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
-                      "     last function: %pf\n",
+                      "     last function: %ps\n",
                        current->comm, preempt_count(), task_pid_nr(current),
                        worker->current_func);
                 debug_show_held_locks(current);
@@ -2596,11 +2595,11 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
         worker = current_wq_worker();
  
         WARN_ONCE(current->flags & PF_MEMALLOC,
-                 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
+                 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
                   current->pid, current->comm, target_wq->name, target_func);
         WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
                               (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
-                 "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
+                 "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
                   worker->current_pwq->wq->name, worker->current_func,
                   target_wq->name, target_func);
  }
@@ -2974,14 +2973,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
  
         might_sleep();
  
-       local_irq_disable();
+       rcu_read_lock();
         pool = get_work_pool(work);
         if (!pool) {
-               local_irq_enable();
+               rcu_read_unlock();
                 return false;
         }
  
-       spin_lock(&pool->lock);
+       spin_lock_irq(&pool->lock);
         /* see the comment in try_to_grab_pending() with the same code */
         pwq = get_work_pwq(work);
         if (pwq) {
@@ -3013,10 +3012,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
                 lock_map_acquire(&pwq->wq->lockdep_map);
                 lock_map_release(&pwq->wq->lockdep_map);
         }
-
+       rcu_read_unlock();
         return true;
  already_gone:
         spin_unlock_irq(&pool->lock);
+       rcu_read_unlock();
         return false;
  }
  
@@ -3503,7 +3503,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
   * put_unbound_pool - put a worker_pool
   * @pool: worker_pool to put
   *
- * Put @pool.  If its refcnt reaches zero, it gets destroyed in sched-RCU
+ * Put @pool.  If its refcnt reaches zero, it gets destroyed in RCU
   * safe manner.  get_unbound_pool() calls this function on its failure path
   * and this function should be able to release pools which went through,
   * successfully or not, init_worker_pool().
@@ -3557,7 +3557,7 @@ static void put_unbound_pool(struct worker_pool *pool)
         del_timer_sync(&pool->idle_timer);
         del_timer_sync(&pool->mayday_timer);
  
-       /* sched-RCU protected to allow dereferences from get_work_pool() */
+       /* RCU protected to allow dereferences from get_work_pool() */
         call_rcu(&pool->rcu, rcu_free_pool);
  }
  
@@ -4208,6 +4208,7 @@ static int init_rescuer(struct workqueue_struct *wq)
         return 0;
  }
  
+__printf(1, 4)
  struct workqueue_struct *alloc_workqueue(const char *fmt,
                                          unsigned int flags,
                                          int max_active, ...)
@@ -4471,7 +4472,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
         struct pool_workqueue *pwq;
         bool ret;
  
-       rcu_read_lock_sched();
+       rcu_read_lock();
+       preempt_disable();
  
         if (cpu == WORK_CPU_UNBOUND)
                 cpu = smp_processor_id();
@@ -4482,7 +4484,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
                 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
  
         ret = !list_empty(&pwq->delayed_works);
-       rcu_read_unlock_sched();
+       preempt_enable();
+       rcu_read_unlock();
  
         return ret;
  }
@@ -4508,15 +4511,15 @@ unsigned int work_busy(struct work_struct *work)
         if (work_pending(work))
                 ret |= WORK_BUSY_PENDING;
  
-       local_irq_save(flags);
+       rcu_read_lock();
         pool = get_work_pool(work);
         if (pool) {
-               spin_lock(&pool->lock);
+               spin_lock_irqsave(&pool->lock, flags);
                 if (find_worker_executing_work(pool, work))
                         ret |= WORK_BUSY_RUNNING;
-               spin_unlock(&pool->lock);
+               spin_unlock_irqrestore(&pool->lock, flags);
         }
-       local_irq_restore(flags);
+       rcu_read_unlock();
  
         return ret;
  }
@@ -4587,7 +4590,7 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
         probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
  
         if (fn || name[0] || desc[0]) {
-               printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
+               printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
                 if (strcmp(name, desc))
                         pr_cont(" (%s)", desc);
                 pr_cont("\n");
@@ -4612,7 +4615,7 @@ static void pr_cont_work(bool comma, struct work_struct *work)
                 pr_cont("%s BAR(%d)", comma ? "," : "",
                         task_pid_nr(barr->task));
         } else {
-               pr_cont("%s %pf", comma ? "," : "", work->func);
+               pr_cont("%s %ps", comma ? "," : "", work->func);
         }
  }
  
@@ -4644,7 +4647,7 @@ static void show_pwq(struct pool_workqueue *pwq)
                         if (worker->current_pwq != pwq)
                                 continue;
  
-                       pr_cont("%s %d%s:%pf", comma ? "," : "",
+                       pr_cont("%s %d%s:%ps", comma ? "," : "",
                                 task_pid_nr(worker->task),
                                 worker == pwq->wq->rescuer ? "(RESCUER)" : "",
                                 worker->current_func);
@@ -4700,7 +4703,7 @@ void show_workqueue_state(void)
         unsigned long flags;
         int pi;
  
-       rcu_read_lock_sched();
+       rcu_read_lock();
  
         pr_info("Showing busy workqueues and worker pools:\n");
  
@@ -4765,7 +4768,7 @@ void show_workqueue_state(void)
                 touch_nmi_watchdog();
         }
  
-       rcu_read_unlock_sched();
+       rcu_read_unlock();
  }
  
  /* used to show worker information through /proc/PID/{comm,stat,status} */
@@ -4929,7 +4932,7 @@ static void rebind_workers(struct worker_pool *pool)
                  *
                  * WRITE_ONCE() is necessary because @worker->flags may be
                  * tested without holding any lock in
-                * wq_worker_waking_up().  Without it, NOT_RUNNING test may
+                * wq_worker_running().  Without it, NOT_RUNNING test may
                  * fail incorrectly leading to premature concurrency
                  * management operations.
                  */
@@ -5152,16 +5155,16 @@ bool freeze_workqueues_busy(void)
                  * nr_active is monotonically decreasing.  It's safe
                  * to peek without lock.
                  */
-               rcu_read_lock_sched();
+               rcu_read_lock();
                 for_each_pwq(pwq, wq) {
                         WARN_ON_ONCE(pwq->nr_active < 0);
                         if (pwq->nr_active) {
                                 busy = true;
-                               rcu_read_unlock_sched();
+                               rcu_read_unlock();
                                 goto out_unlock;
                         }
                 }
-               rcu_read_unlock_sched();
+               rcu_read_unlock();
         }
  out_unlock:
         mutex_unlock(&wq_pool_mutex);
@@ -5356,7 +5359,8 @@ static ssize_t wq_pool_ids_show(struct device *dev,
         const char *delim = "";
         int node, written = 0;
  
-       rcu_read_lock_sched();
+       get_online_cpus();
+       rcu_read_lock();
         for_each_node(node) {
                 written += scnprintf(buf + written, PAGE_SIZE - written,
                                      "%s%d:%d", delim, node,
@@ -5364,7 +5368,8 @@ static ssize_t wq_pool_ids_show(struct device *dev,
                 delim = " ";
         }
         written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
-       rcu_read_unlock_sched();
+       rcu_read_unlock();
+       put_online_cpus();
  
         return written;
  }