From 81c0b3d724f419c0524f432c1ac22b9f518c2899 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 28 May 2019 07:18:08 -0700 Subject: [PATCH] rcu/nocb: Avoid ->nocb_lock capture by corresponding CPU A given rcu_data structure's ->nocb_lock can be acquired very frequently by the corresponding CPU and occasionally by the corresponding no-CBs grace-period and callbacks kthreads. In particular, these two kthreads will have frequent gaps between ->nocb_lock acquisitions that are roughly a grace period in duration. This means that any excessive ->nocb_lock contention will be due to the CPU's acquisitions, and this in turn enables a very naive contention-avoidance strategy to be quite effective. This commit therefore modifies rcu_nocb_lock() to first attempt a raw_spin_trylock(), and to atomically increment a separate ->nocb_lock_contended across a raw_spin_lock(). This new ->nocb_lock_contended field is checked in __call_rcu_nocb_wake() when interrupts are enabled, with a spin-wait for contending acquisitions to complete, thus allowing the kthreads a chance to acquire the lock. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 18 ++++++++++- kernel/rcu/tree_plugin.h | 68 ++++++++++++++++++++++++++-------------- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index c12e85c12310..7062f9d9c053 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -197,6 +197,7 @@ struct rcu_data { struct swait_queue_head nocb_cb_wq; /* For nocb kthreads to sleep on. */ struct task_struct *nocb_gp_kthread; raw_spinlock_t nocb_lock; /* Guard following pair of fields. */ + atomic_t nocb_lock_contended; /* Contention experienced. */ int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ struct timer_list nocb_timer; /* Enforce finite deferral. */ @@ -430,7 +431,22 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, unsigned long flags); #ifdef CONFIG_RCU_NOCB_CPU static void __init rcu_organize_nocb_kthreads(void); -#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ +#define rcu_nocb_lock_irqsave(rdp, flags) \ +do { \ + if (!rcu_segcblist_is_offloaded(&(rdp)->cblist)) { \ + local_irq_save(flags); \ + } else if (!raw_spin_trylock_irqsave(&(rdp)->nocb_lock, (flags))) {\ + atomic_inc(&(rdp)->nocb_lock_contended); \ + smp_mb__after_atomic(); /* atomic_inc() before lock. */ \ + raw_spin_lock_irqsave(&(rdp)->nocb_lock, (flags)); \ + smp_mb__before_atomic(); /* atomic_dec() after lock. */ \ + atomic_dec(&(rdp)->nocb_lock_contended); \ + } \ +} while (0) +#else /* #ifdef CONFIG_RCU_NOCB_CPU */ +#define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags) +#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ + static void rcu_bind_gp_kthread(void); static bool rcu_nohz_full_cpu(void); static void rcu_dynticks_task_enter(void); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c10afe778430..5f0894cec75d 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1498,14 +1498,36 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll); /* * Acquire the specified rcu_data structure's ->nocb_lock, but only - * if it corresponds to a no-CBs CPU. + * if it corresponds to a no-CBs CPU. If the lock isn't immediately + * available, increment ->nocb_lock_contended to flag the contention. */ static void rcu_nocb_lock(struct rcu_data *rdp) { - if (rcu_segcblist_is_offloaded(&rdp->cblist)) { - lockdep_assert_irqs_disabled(); - raw_spin_lock(&rdp->nocb_lock); - } + lockdep_assert_irqs_disabled(); + if (!rcu_segcblist_is_offloaded(&rdp->cblist) || + raw_spin_trylock(&rdp->nocb_lock)) + return; + atomic_inc(&rdp->nocb_lock_contended); + smp_mb__after_atomic(); /* atomic_inc() before lock. */ + raw_spin_lock(&rdp->nocb_lock); + smp_mb__before_atomic(); /* atomic_dec() after lock. */ + atomic_dec(&rdp->nocb_lock_contended); +} + +/* + * Spinwait until the specified rcu_data structure's ->nocb_lock is + * not contended. Please note that this is extremely special-purpose, + * relying on the fact that at most two kthreads and one CPU contend for + * this lock, and also that the two kthreads are guaranteed to have frequent + * grace-period-duration time intervals between successive acquisitions + * of the lock. This allows us to use an extremely simple throttling + * mechanism, and further to apply it only to the CPU doing floods of + * call_rcu() invocations. Don't try this at home! + */ +static void rcu_nocb_wait_contended(struct rcu_data *rdp) +{ + while (atomic_read(&rdp->nocb_lock_contended)) + cpu_relax(); } /* @@ -1575,19 +1597,19 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force, lockdep_assert_held(&rdp->nocb_lock); if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) { - raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); + rcu_nocb_unlock_irqrestore(rdp, flags); return; } if (READ_ONCE(rdp_gp->nocb_gp_sleep) || force) { del_timer(&rdp->nocb_timer); - raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); + rcu_nocb_unlock_irqrestore(rdp, flags); smp_mb(); /* enqueue before ->nocb_gp_sleep. */ - raw_spin_lock_irqsave(&rdp_gp->nocb_lock, flags); + rcu_nocb_lock_irqsave(rdp_gp, flags); WRITE_ONCE(rdp_gp->nocb_gp_sleep, false); - raw_spin_unlock_irqrestore(&rdp_gp->nocb_lock, flags); + rcu_nocb_unlock_irqrestore(rdp_gp, flags); wake_up_process(rdp_gp->nocb_gp_kthread); } else { - raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); + rcu_nocb_unlock_irqrestore(rdp, flags); } } @@ -1646,23 +1668,23 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, if (!rdp->nocb_cb_sleep && rcu_segcblist_ready_cbs(&rdp->cblist)) { // Already going full tilt, so don't try to rewake. - rcu_nocb_unlock_irqrestore(rdp, flags); } else if (rcu_segcblist_pend_cbs(&rdp->cblist) && raw_spin_trylock_rcu_node(rdp->mynode)) { rcu_advance_cbs_nowake(rdp->mynode, rdp); raw_spin_unlock_rcu_node(rdp->mynode); - rcu_nocb_unlock_irqrestore(rdp, flags); } else { wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE, TPS("WakeOvfIsDeferred")); - rcu_nocb_unlock_irqrestore(rdp, flags); } + rcu_nocb_unlock_irqrestore(rdp, flags); } else { trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot")); rcu_nocb_unlock_irqrestore(rdp, flags); } - if (!irqs_disabled_flags(flags)) + if (!irqs_disabled_flags(flags)) { lockdep_assert_irqs_enabled(); + rcu_nocb_wait_contended(rdp); + } return; } @@ -1692,7 +1714,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) if (rcu_segcblist_empty(&rdp->cblist)) continue; /* No callbacks here, try next. */ rnp = rdp->mynode; - raw_spin_lock_irqsave(&rdp->nocb_lock, flags); + rcu_nocb_lock_irqsave(rdp, flags); WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); del_timer(&my_rdp->nocb_timer); raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ @@ -1712,7 +1734,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) } else { needwake = false; } - raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); + rcu_nocb_unlock_irqrestore(rdp, flags); if (needwake) { swake_up_one(&rdp->nocb_cb_wq); gotcbs = true; @@ -1741,9 +1763,9 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait")); } if (!rcu_nocb_poll) { - raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags); + rcu_nocb_lock_irqsave(my_rdp, flags); WRITE_ONCE(my_rdp->nocb_gp_sleep, true); - raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags); + rcu_nocb_unlock_irqrestore(my_rdp, flags); } WARN_ON(signal_pending(current)); } @@ -1784,12 +1806,12 @@ static void nocb_cb_wait(struct rcu_data *rdp) rcu_do_batch(rdp); local_bh_enable(); lockdep_assert_irqs_enabled(); - raw_spin_lock_irqsave(&rdp->nocb_lock, flags); + rcu_nocb_lock_irqsave(rdp, flags); raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ needwake_gp = rcu_advance_cbs(rdp->mynode, rdp); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ if (rcu_segcblist_ready_cbs(&rdp->cblist)) { - raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); + rcu_nocb_unlock_irqrestore(rdp, flags); if (needwake_gp) rcu_gp_kthread_wake(); return; @@ -1797,7 +1819,7 @@ static void nocb_cb_wait(struct rcu_data *rdp) trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep")); WRITE_ONCE(rdp->nocb_cb_sleep, true); - raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); + rcu_nocb_unlock_irqrestore(rdp, flags); if (needwake_gp) rcu_gp_kthread_wake(); swait_event_interruptible_exclusive(rdp->nocb_cb_wq, @@ -1839,9 +1861,9 @@ static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp) unsigned long flags; int ndw; - raw_spin_lock_irqsave(&rdp->nocb_lock, flags); + rcu_nocb_lock_irqsave(rdp, flags); if (!rcu_nocb_need_deferred_wakeup(rdp)) { - raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); + rcu_nocb_unlock_irqrestore(rdp, flags); return; } ndw = READ_ONCE(rdp->nocb_defer_wakeup); -- 2.45.2