]> asedeno.scripts.mit.edu Git - linux.git/blob - kernel/cpu.c
smp/hotplug: Callback vs state-machine consistency
[linux.git] / kernel / cpu.c
1 /* CPU control.
2  * (C) 2001, 2002, 2003, 2004 Rusty Russell
3  *
4  * This code is licenced under the GPL.
5  */
6 #include <linux/proc_fs.h>
7 #include <linux/smp.h>
8 #include <linux/init.h>
9 #include <linux/notifier.h>
10 #include <linux/sched/signal.h>
11 #include <linux/sched/hotplug.h>
12 #include <linux/sched/task.h>
13 #include <linux/unistd.h>
14 #include <linux/cpu.h>
15 #include <linux/oom.h>
16 #include <linux/rcupdate.h>
17 #include <linux/export.h>
18 #include <linux/bug.h>
19 #include <linux/kthread.h>
20 #include <linux/stop_machine.h>
21 #include <linux/mutex.h>
22 #include <linux/gfp.h>
23 #include <linux/suspend.h>
24 #include <linux/lockdep.h>
25 #include <linux/tick.h>
26 #include <linux/irq.h>
27 #include <linux/smpboot.h>
28 #include <linux/relay.h>
29 #include <linux/slab.h>
30 #include <linux/percpu-rwsem.h>
31
32 #include <trace/events/power.h>
33 #define CREATE_TRACE_POINTS
34 #include <trace/events/cpuhp.h>
35
36 #include "smpboot.h"
37
38 /**
39  * cpuhp_cpu_state - Per cpu hotplug state storage
40  * @state:      The current cpu state
41  * @target:     The target state
42  * @thread:     Pointer to the hotplug thread
43  * @should_run: Thread should execute
44  * @rollback:   Perform a rollback
45  * @single:     Single callback invocation
46  * @bringup:    Single callback bringup or teardown selector
47  * @cb_state:   The state for a single callback (install/uninstall)
48  * @result:     Result of the operation
49  * @done:       Signal completion to the issuer of the task
50  */
51 struct cpuhp_cpu_state {
52         enum cpuhp_state        state;
53         enum cpuhp_state        target;
54 #ifdef CONFIG_SMP
55         struct task_struct      *thread;
56         bool                    should_run;
57         bool                    rollback;
58         bool                    single;
59         bool                    bringup;
60         struct hlist_node       *node;
61         struct hlist_node       *last;
62         enum cpuhp_state        cb_state;
63         int                     result;
64         struct completion       done;
65 #endif
66 };
67
68 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
69
70 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
71 static struct lock_class_key cpuhp_state_key;
72 static struct lockdep_map cpuhp_state_lock_map =
73         STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
74 #endif
75
76 /**
77  * cpuhp_step - Hotplug state machine step
78  * @name:       Name of the step
79  * @startup:    Startup function of the step
80  * @teardown:   Teardown function of the step
81  * @skip_onerr: Do not invoke the functions on error rollback
82  *              Will go away once the notifiers are gone
83  * @cant_stop:  Bringup/teardown can't be stopped at this step
84  */
85 struct cpuhp_step {
86         const char              *name;
87         union {
88                 int             (*single)(unsigned int cpu);
89                 int             (*multi)(unsigned int cpu,
90                                          struct hlist_node *node);
91         } startup;
92         union {
93                 int             (*single)(unsigned int cpu);
94                 int             (*multi)(unsigned int cpu,
95                                          struct hlist_node *node);
96         } teardown;
97         struct hlist_head       list;
98         bool                    skip_onerr;
99         bool                    cant_stop;
100         bool                    multi_instance;
101 };
102
103 static DEFINE_MUTEX(cpuhp_state_mutex);
104 static struct cpuhp_step cpuhp_bp_states[];
105 static struct cpuhp_step cpuhp_ap_states[];
106
107 static bool cpuhp_is_ap_state(enum cpuhp_state state)
108 {
109         /*
110          * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
111          * purposes as that state is handled explicitly in cpu_down.
112          */
113         return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
114 }
115
116 /*
117  * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
118  */
119 static bool cpuhp_is_atomic_state(enum cpuhp_state state)
120 {
121         return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
122 }
123
124 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
125 {
126         struct cpuhp_step *sp;
127
128         sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
129         return sp + state;
130 }
131
132 /**
133  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
134  * @cpu:        The cpu for which the callback should be invoked
135  * @state:      The state to do callbacks for
136  * @bringup:    True if the bringup callback should be invoked
137  * @node:       For multi-instance, do a single entry callback for install/remove
138  * @lastp:      For multi-instance rollback, remember how far we got
139  *
140  * Called from cpu hotplug and from the state register machinery.
141  */
142 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
143                                  bool bringup, struct hlist_node *node,
144                                  struct hlist_node **lastp)
145 {
146         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
147         struct cpuhp_step *step = cpuhp_get_step(state);
148         int (*cbm)(unsigned int cpu, struct hlist_node *node);
149         int (*cb)(unsigned int cpu);
150         int ret, cnt;
151
152         if (!step->multi_instance) {
153                 WARN_ON_ONCE(lastp && *lastp);
154                 cb = bringup ? step->startup.single : step->teardown.single;
155                 if (!cb)
156                         return 0;
157                 trace_cpuhp_enter(cpu, st->target, state, cb);
158                 ret = cb(cpu);
159                 trace_cpuhp_exit(cpu, st->state, state, ret);
160                 return ret;
161         }
162         cbm = bringup ? step->startup.multi : step->teardown.multi;
163         if (!cbm)
164                 return 0;
165
166         /* Single invocation for instance add/remove */
167         if (node) {
168                 WARN_ON_ONCE(lastp && *lastp);
169                 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
170                 ret = cbm(cpu, node);
171                 trace_cpuhp_exit(cpu, st->state, state, ret);
172                 return ret;
173         }
174
175         /* State transition. Invoke on all instances */
176         cnt = 0;
177         hlist_for_each(node, &step->list) {
178                 if (lastp && node == *lastp)
179                         break;
180
181                 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
182                 ret = cbm(cpu, node);
183                 trace_cpuhp_exit(cpu, st->state, state, ret);
184                 if (ret) {
185                         if (!lastp)
186                                 goto err;
187
188                         *lastp = node;
189                         return ret;
190                 }
191                 cnt++;
192         }
193         if (lastp)
194                 *lastp = NULL;
195         return 0;
196 err:
197         /* Rollback the instances if one failed */
198         cbm = !bringup ? step->startup.multi : step->teardown.multi;
199         if (!cbm)
200                 return ret;
201
202         hlist_for_each(node, &step->list) {
203                 if (!cnt--)
204                         break;
205
206                 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
207                 ret = cbm(cpu, node);
208                 trace_cpuhp_exit(cpu, st->state, state, ret);
209                 /*
210                  * Rollback must not fail,
211                  */
212                 WARN_ON_ONCE(ret);
213         }
214         return ret;
215 }
216
217 #ifdef CONFIG_SMP
218 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
219 static DEFINE_MUTEX(cpu_add_remove_lock);
220 bool cpuhp_tasks_frozen;
221 EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
222
223 /*
224  * The following two APIs (cpu_maps_update_begin/done) must be used when
225  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
226  */
227 void cpu_maps_update_begin(void)
228 {
229         mutex_lock(&cpu_add_remove_lock);
230 }
231
232 void cpu_maps_update_done(void)
233 {
234         mutex_unlock(&cpu_add_remove_lock);
235 }
236
237 /*
238  * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
239  * Should always be manipulated under cpu_add_remove_lock
240  */
241 static int cpu_hotplug_disabled;
242
243 #ifdef CONFIG_HOTPLUG_CPU
244
245 DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
246
247 void cpus_read_lock(void)
248 {
249         percpu_down_read(&cpu_hotplug_lock);
250 }
251 EXPORT_SYMBOL_GPL(cpus_read_lock);
252
253 void cpus_read_unlock(void)
254 {
255         percpu_up_read(&cpu_hotplug_lock);
256 }
257 EXPORT_SYMBOL_GPL(cpus_read_unlock);
258
259 void cpus_write_lock(void)
260 {
261         percpu_down_write(&cpu_hotplug_lock);
262 }
263
264 void cpus_write_unlock(void)
265 {
266         percpu_up_write(&cpu_hotplug_lock);
267 }
268
269 void lockdep_assert_cpus_held(void)
270 {
271         percpu_rwsem_assert_held(&cpu_hotplug_lock);
272 }
273
274 /*
275  * Wait for currently running CPU hotplug operations to complete (if any) and
276  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
277  * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
278  * hotplug path before performing hotplug operations. So acquiring that lock
279  * guarantees mutual exclusion from any currently running hotplug operations.
280  */
281 void cpu_hotplug_disable(void)
282 {
283         cpu_maps_update_begin();
284         cpu_hotplug_disabled++;
285         cpu_maps_update_done();
286 }
287 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
288
289 static void __cpu_hotplug_enable(void)
290 {
291         if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
292                 return;
293         cpu_hotplug_disabled--;
294 }
295
296 void cpu_hotplug_enable(void)
297 {
298         cpu_maps_update_begin();
299         __cpu_hotplug_enable();
300         cpu_maps_update_done();
301 }
302 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
303 #endif  /* CONFIG_HOTPLUG_CPU */
304
305 static inline enum cpuhp_state
306 cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
307 {
308         enum cpuhp_state prev_state = st->state;
309
310         st->rollback = false;
311         st->last = NULL;
312
313         st->target = target;
314         st->single = false;
315         st->bringup = st->state < target;
316
317         return prev_state;
318 }
319
320 static inline void
321 cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
322 {
323         st->rollback = true;
324
325         /*
326          * If we have st->last we need to undo partial multi_instance of this
327          * state first. Otherwise start undo at the previous state.
328          */
329         if (!st->last) {
330                 if (st->bringup)
331                         st->state--;
332                 else
333                         st->state++;
334         }
335
336         st->target = prev_state;
337         st->bringup = !st->bringup;
338 }
339
340 /* Regular hotplug invocation of the AP hotplug thread */
341 static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
342 {
343         if (!st->single && st->state == st->target)
344                 return;
345
346         st->result = 0;
347         /*
348          * Make sure the above stores are visible before should_run becomes
349          * true. Paired with the mb() above in cpuhp_thread_fun()
350          */
351         smp_mb();
352         st->should_run = true;
353         wake_up_process(st->thread);
354         wait_for_completion(&st->done);
355 }
356
357 static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
358 {
359         enum cpuhp_state prev_state;
360         int ret;
361
362         prev_state = cpuhp_set_state(st, target);
363         __cpuhp_kick_ap(st);
364         if ((ret = st->result)) {
365                 cpuhp_reset_state(st, prev_state);
366                 __cpuhp_kick_ap(st);
367         }
368
369         return ret;
370 }
371
372 static int bringup_wait_for_ap(unsigned int cpu)
373 {
374         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
375
376         /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
377         wait_for_completion(&st->done);
378         if (WARN_ON_ONCE((!cpu_online(cpu))))
379                 return -ECANCELED;
380
381         /* Unpark the stopper thread and the hotplug thread of the target cpu */
382         stop_machine_unpark(cpu);
383         kthread_unpark(st->thread);
384
385         if (st->target <= CPUHP_AP_ONLINE_IDLE)
386                 return 0;
387
388         return cpuhp_kick_ap(st, st->target);
389 }
390
391 static int bringup_cpu(unsigned int cpu)
392 {
393         struct task_struct *idle = idle_thread_get(cpu);
394         int ret;
395
396         /*
397          * Some architectures have to walk the irq descriptors to
398          * setup the vector space for the cpu which comes online.
399          * Prevent irq alloc/free across the bringup.
400          */
401         irq_lock_sparse();
402
403         /* Arch-specific enabling code. */
404         ret = __cpu_up(cpu, idle);
405         irq_unlock_sparse();
406         if (ret)
407                 return ret;
408         return bringup_wait_for_ap(cpu);
409 }
410
411 /*
412  * Hotplug state machine related functions
413  */
414
415 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
416 {
417         for (st->state--; st->state > st->target; st->state--) {
418                 struct cpuhp_step *step = cpuhp_get_step(st->state);
419
420                 if (!step->skip_onerr)
421                         cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
422         }
423 }
424
425 static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
426                               enum cpuhp_state target)
427 {
428         enum cpuhp_state prev_state = st->state;
429         int ret = 0;
430
431         while (st->state < target) {
432                 st->state++;
433                 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
434                 if (ret) {
435                         st->target = prev_state;
436                         undo_cpu_up(cpu, st);
437                         break;
438                 }
439         }
440         return ret;
441 }
442
443 /*
444  * The cpu hotplug threads manage the bringup and teardown of the cpus
445  */
446 static void cpuhp_create(unsigned int cpu)
447 {
448         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
449
450         init_completion(&st->done);
451 }
452
453 static int cpuhp_should_run(unsigned int cpu)
454 {
455         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
456
457         return st->should_run;
458 }
459
460 /*
461  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
462  * callbacks when a state gets [un]installed at runtime.
463  *
464  * Each invocation of this function by the smpboot thread does a single AP
465  * state callback.
466  *
467  * It has 3 modes of operation:
468  *  - single: runs st->cb_state
469  *  - up:     runs ++st->state, while st->state < st->target
470  *  - down:   runs st->state--, while st->state > st->target
471  *
472  * When complete or on error, should_run is cleared and the completion is fired.
473  */
474 static void cpuhp_thread_fun(unsigned int cpu)
475 {
476         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
477         bool bringup = st->bringup;
478         enum cpuhp_state state;
479
480         /*
481          * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
482          * that if we see ->should_run we also see the rest of the state.
483          */
484         smp_mb();
485
486         if (WARN_ON_ONCE(!st->should_run))
487                 return;
488
489         lock_map_acquire(&cpuhp_state_lock_map);
490
491         if (st->single) {
492                 state = st->cb_state;
493                 st->should_run = false;
494         } else {
495                 if (bringup) {
496                         st->state++;
497                         state = st->state;
498                         st->should_run = (st->state < st->target);
499                         WARN_ON_ONCE(st->state > st->target);
500                 } else {
501                         state = st->state;
502                         st->state--;
503                         st->should_run = (st->state > st->target);
504                         WARN_ON_ONCE(st->state < st->target);
505                 }
506         }
507
508         WARN_ON_ONCE(!cpuhp_is_ap_state(state));
509
510         if (st->rollback) {
511                 struct cpuhp_step *step = cpuhp_get_step(state);
512                 if (step->skip_onerr)
513                         goto next;
514         }
515
516         if (cpuhp_is_atomic_state(state)) {
517                 local_irq_disable();
518                 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
519                 local_irq_enable();
520
521                 /*
522                  * STARTING/DYING must not fail!
523                  */
524                 WARN_ON_ONCE(st->result);
525         } else {
526                 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
527         }
528
529         if (st->result) {
530                 /*
531                  * If we fail on a rollback, we're up a creek without no
532                  * paddle, no way forward, no way back. We loose, thanks for
533                  * playing.
534                  */
535                 WARN_ON_ONCE(st->rollback);
536                 st->should_run = false;
537         }
538
539 next:
540         lock_map_release(&cpuhp_state_lock_map);
541
542         if (!st->should_run)
543                 complete(&st->done);
544 }
545
546 /* Invoke a single callback on a remote cpu */
547 static int
548 cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
549                          struct hlist_node *node)
550 {
551         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
552         int ret;
553
554         if (!cpu_online(cpu))
555                 return 0;
556
557         lock_map_acquire(&cpuhp_state_lock_map);
558         lock_map_release(&cpuhp_state_lock_map);
559
560         /*
561          * If we are up and running, use the hotplug thread. For early calls
562          * we invoke the thread function directly.
563          */
564         if (!st->thread)
565                 return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
566
567         st->rollback = false;
568         st->last = NULL;
569
570         st->node = node;
571         st->bringup = bringup;
572         st->cb_state = state;
573         st->single = true;
574
575         __cpuhp_kick_ap(st);
576
577         /*
578          * If we failed and did a partial, do a rollback.
579          */
580         if ((ret = st->result) && st->last) {
581                 st->rollback = true;
582                 st->bringup = !bringup;
583
584                 __cpuhp_kick_ap(st);
585         }
586
587         return ret;
588 }
589
590 static int cpuhp_kick_ap_work(unsigned int cpu)
591 {
592         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
593         enum cpuhp_state prev_state = st->state;
594         int ret;
595
596         lock_map_acquire(&cpuhp_state_lock_map);
597         lock_map_release(&cpuhp_state_lock_map);
598
599         trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
600         ret = cpuhp_kick_ap(st, st->target);
601         trace_cpuhp_exit(cpu, st->state, prev_state, ret);
602
603         return ret;
604 }
605
606 static struct smp_hotplug_thread cpuhp_threads = {
607         .store                  = &cpuhp_state.thread,
608         .create                 = &cpuhp_create,
609         .thread_should_run      = cpuhp_should_run,
610         .thread_fn              = cpuhp_thread_fun,
611         .thread_comm            = "cpuhp/%u",
612         .selfparking            = true,
613 };
614
615 void __init cpuhp_threads_init(void)
616 {
617         BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
618         kthread_unpark(this_cpu_read(cpuhp_state.thread));
619 }
620
621 #ifdef CONFIG_HOTPLUG_CPU
622 /**
623  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
624  * @cpu: a CPU id
625  *
626  * This function walks all processes, finds a valid mm struct for each one and
627  * then clears a corresponding bit in mm's cpumask.  While this all sounds
628  * trivial, there are various non-obvious corner cases, which this function
629  * tries to solve in a safe manner.
630  *
631  * Also note that the function uses a somewhat relaxed locking scheme, so it may
632  * be called only for an already offlined CPU.
633  */
634 void clear_tasks_mm_cpumask(int cpu)
635 {
636         struct task_struct *p;
637
638         /*
639          * This function is called after the cpu is taken down and marked
640          * offline, so its not like new tasks will ever get this cpu set in
641          * their mm mask. -- Peter Zijlstra
642          * Thus, we may use rcu_read_lock() here, instead of grabbing
643          * full-fledged tasklist_lock.
644          */
645         WARN_ON(cpu_online(cpu));
646         rcu_read_lock();
647         for_each_process(p) {
648                 struct task_struct *t;
649
650                 /*
651                  * Main thread might exit, but other threads may still have
652                  * a valid mm. Find one.
653                  */
654                 t = find_lock_task_mm(p);
655                 if (!t)
656                         continue;
657                 cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
658                 task_unlock(t);
659         }
660         rcu_read_unlock();
661 }
662
663 /* Take this CPU down. */
664 static int take_cpu_down(void *_param)
665 {
666         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
667         enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
668         int err, cpu = smp_processor_id();
669         int ret;
670
671         /* Ensure this CPU doesn't handle any more interrupts. */
672         err = __cpu_disable();
673         if (err < 0)
674                 return err;
675
676         /*
677          * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
678          * do this step again.
679          */
680         WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
681         st->state--;
682         /* Invoke the former CPU_DYING callbacks */
683         for (; st->state > target; st->state--) {
684                 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
685                 /*
686                  * DYING must not fail!
687                  */
688                 WARN_ON_ONCE(ret);
689         }
690
691         /* Give up timekeeping duties */
692         tick_handover_do_timer();
693         /* Park the stopper thread */
694         stop_machine_park(cpu);
695         return 0;
696 }
697
698 static int takedown_cpu(unsigned int cpu)
699 {
700         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
701         int err;
702
703         /* Park the smpboot threads */
704         kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
705         smpboot_park_threads(cpu);
706
707         /*
708          * Prevent irq alloc/free while the dying cpu reorganizes the
709          * interrupt affinities.
710          */
711         irq_lock_sparse();
712
713         /*
714          * So now all preempt/rcu users must observe !cpu_active().
715          */
716         err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
717         if (err) {
718                 /* CPU refused to die */
719                 irq_unlock_sparse();
720                 /* Unpark the hotplug thread so we can rollback there */
721                 kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
722                 return err;
723         }
724         BUG_ON(cpu_online(cpu));
725
726         /*
727          * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
728          * runnable tasks from the cpu, there's only the idle task left now
729          * that the migration thread is done doing the stop_machine thing.
730          *
731          * Wait for the stop thread to go away.
732          */
733         wait_for_completion(&st->done);
734         BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
735
736         /* Interrupts are moved away from the dying cpu, reenable alloc/free */
737         irq_unlock_sparse();
738
739         hotplug_cpu__broadcast_tick_pull(cpu);
740         /* This actually kills the CPU. */
741         __cpu_die(cpu);
742
743         tick_cleanup_dead_cpu(cpu);
744         rcutree_migrate_callbacks(cpu);
745         return 0;
746 }
747
748 static void cpuhp_complete_idle_dead(void *arg)
749 {
750         struct cpuhp_cpu_state *st = arg;
751
752         complete(&st->done);
753 }
754
755 void cpuhp_report_idle_dead(void)
756 {
757         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
758
759         BUG_ON(st->state != CPUHP_AP_OFFLINE);
760         rcu_report_dead(smp_processor_id());
761         st->state = CPUHP_AP_IDLE_DEAD;
762         /*
763          * We cannot call complete after rcu_report_dead() so we delegate it
764          * to an online cpu.
765          */
766         smp_call_function_single(cpumask_first(cpu_online_mask),
767                                  cpuhp_complete_idle_dead, st, 0);
768 }
769
770 static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
771 {
772         for (st->state++; st->state < st->target; st->state++) {
773                 struct cpuhp_step *step = cpuhp_get_step(st->state);
774
775                 if (!step->skip_onerr)
776                         cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
777         }
778 }
779
780 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
781                                 enum cpuhp_state target)
782 {
783         enum cpuhp_state prev_state = st->state;
784         int ret = 0;
785
786         for (; st->state > target; st->state--) {
787                 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
788                 if (ret) {
789                         st->target = prev_state;
790                         undo_cpu_down(cpu, st);
791                         break;
792                 }
793         }
794         return ret;
795 }
796
797 /* Requires cpu_add_remove_lock to be held */
798 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
799                            enum cpuhp_state target)
800 {
801         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
802         int prev_state, ret = 0;
803
804         if (num_online_cpus() == 1)
805                 return -EBUSY;
806
807         if (!cpu_present(cpu))
808                 return -EINVAL;
809
810         cpus_write_lock();
811
812         cpuhp_tasks_frozen = tasks_frozen;
813
814         prev_state = cpuhp_set_state(st, target);
815         /*
816          * If the current CPU state is in the range of the AP hotplug thread,
817          * then we need to kick the thread.
818          */
819         if (st->state > CPUHP_TEARDOWN_CPU) {
820                 st->target = max((int)target, CPUHP_TEARDOWN_CPU);
821                 ret = cpuhp_kick_ap_work(cpu);
822                 /*
823                  * The AP side has done the error rollback already. Just
824                  * return the error code..
825                  */
826                 if (ret)
827                         goto out;
828
829                 /*
830                  * We might have stopped still in the range of the AP hotplug
831                  * thread. Nothing to do anymore.
832                  */
833                 if (st->state > CPUHP_TEARDOWN_CPU)
834                         goto out;
835
836                 st->target = target;
837         }
838         /*
839          * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
840          * to do the further cleanups.
841          */
842         ret = cpuhp_down_callbacks(cpu, st, target);
843         if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
844                 cpuhp_reset_state(st, prev_state);
845                 __cpuhp_kick_ap(st);
846         }
847
848 out:
849         cpus_write_unlock();
850         return ret;
851 }
852
853 static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
854 {
855         int err;
856
857         cpu_maps_update_begin();
858
859         if (cpu_hotplug_disabled) {
860                 err = -EBUSY;
861                 goto out;
862         }
863
864         err = _cpu_down(cpu, 0, target);
865
866 out:
867         cpu_maps_update_done();
868         return err;
869 }
870
871 int cpu_down(unsigned int cpu)
872 {
873         return do_cpu_down(cpu, CPUHP_OFFLINE);
874 }
875 EXPORT_SYMBOL(cpu_down);
876
877 #else
878 #define takedown_cpu            NULL
879 #endif /*CONFIG_HOTPLUG_CPU*/
880
881 /**
882  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
883  * @cpu: cpu that just started
884  *
885  * It must be called by the arch code on the new cpu, before the new cpu
886  * enables interrupts and before the "boot" cpu returns from __cpu_up().
887  */
888 void notify_cpu_starting(unsigned int cpu)
889 {
890         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
891         enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
892         int ret;
893
894         rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
895         while (st->state < target) {
896                 st->state++;
897                 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
898                 /*
899                  * STARTING must not fail!
900                  */
901                 WARN_ON_ONCE(ret);
902         }
903 }
904
905 /*
906  * Called from the idle task. Wake up the controlling task which brings the
907  * stopper and the hotplug thread of the upcoming CPU up and then delegates
908  * the rest of the online bringup to the hotplug thread.
909  */
910 void cpuhp_online_idle(enum cpuhp_state state)
911 {
912         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
913
914         /* Happens for the boot cpu */
915         if (state != CPUHP_AP_ONLINE_IDLE)
916                 return;
917
918         st->state = CPUHP_AP_ONLINE_IDLE;
919         complete(&st->done);
920 }
921
922 /* Requires cpu_add_remove_lock to be held */
923 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
924 {
925         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
926         struct task_struct *idle;
927         int ret = 0;
928
929         cpus_write_lock();
930
931         if (!cpu_present(cpu)) {
932                 ret = -EINVAL;
933                 goto out;
934         }
935
936         /*
937          * The caller of do_cpu_up might have raced with another
938          * caller. Ignore it for now.
939          */
940         if (st->state >= target)
941                 goto out;
942
943         if (st->state == CPUHP_OFFLINE) {
944                 /* Let it fail before we try to bring the cpu up */
945                 idle = idle_thread_get(cpu);
946                 if (IS_ERR(idle)) {
947                         ret = PTR_ERR(idle);
948                         goto out;
949                 }
950         }
951
952         cpuhp_tasks_frozen = tasks_frozen;
953
954         cpuhp_set_state(st, target);
955         /*
956          * If the current CPU state is in the range of the AP hotplug thread,
957          * then we need to kick the thread once more.
958          */
959         if (st->state > CPUHP_BRINGUP_CPU) {
960                 ret = cpuhp_kick_ap_work(cpu);
961                 /*
962                  * The AP side has done the error rollback already. Just
963                  * return the error code..
964                  */
965                 if (ret)
966                         goto out;
967         }
968
969         /*
970          * Try to reach the target state. We max out on the BP at
971          * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
972          * responsible for bringing it up to the target state.
973          */
974         target = min((int)target, CPUHP_BRINGUP_CPU);
975         ret = cpuhp_up_callbacks(cpu, st, target);
976 out:
977         cpus_write_unlock();
978         return ret;
979 }
980
981 static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
982 {
983         int err = 0;
984
985         if (!cpu_possible(cpu)) {
986                 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
987                        cpu);
988 #if defined(CONFIG_IA64)
989                 pr_err("please check additional_cpus= boot parameter\n");
990 #endif
991                 return -EINVAL;
992         }
993
994         err = try_online_node(cpu_to_node(cpu));
995         if (err)
996                 return err;
997
998         cpu_maps_update_begin();
999
1000         if (cpu_hotplug_disabled) {
1001                 err = -EBUSY;
1002                 goto out;
1003         }
1004
1005         err = _cpu_up(cpu, 0, target);
1006 out:
1007         cpu_maps_update_done();
1008         return err;
1009 }
1010
1011 int cpu_up(unsigned int cpu)
1012 {
1013         return do_cpu_up(cpu, CPUHP_ONLINE);
1014 }
1015 EXPORT_SYMBOL_GPL(cpu_up);
1016
1017 #ifdef CONFIG_PM_SLEEP_SMP
1018 static cpumask_var_t frozen_cpus;
1019
1020 int freeze_secondary_cpus(int primary)
1021 {
1022         int cpu, error = 0;
1023
1024         cpu_maps_update_begin();
1025         if (!cpu_online(primary))
1026                 primary = cpumask_first(cpu_online_mask);
1027         /*
1028          * We take down all of the non-boot CPUs in one shot to avoid races
1029          * with the userspace trying to use the CPU hotplug at the same time
1030          */
1031         cpumask_clear(frozen_cpus);
1032
1033         pr_info("Disabling non-boot CPUs ...\n");
1034         for_each_online_cpu(cpu) {
1035                 if (cpu == primary)
1036                         continue;
1037                 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1038                 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1039                 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1040                 if (!error)
1041                         cpumask_set_cpu(cpu, frozen_cpus);
1042                 else {
1043                         pr_err("Error taking CPU%d down: %d\n", cpu, error);
1044                         break;
1045                 }
1046         }
1047
1048         if (!error)
1049                 BUG_ON(num_online_cpus() > 1);
1050         else
1051                 pr_err("Non-boot CPUs are not disabled\n");
1052
1053         /*
1054          * Make sure the CPUs won't be enabled by someone else. We need to do
1055          * this even in case of failure as all disable_nonboot_cpus() users are
1056          * supposed to do enable_nonboot_cpus() on the failure path.
1057          */
1058         cpu_hotplug_disabled++;
1059
1060         cpu_maps_update_done();
1061         return error;
1062 }
1063
1064 void __weak arch_enable_nonboot_cpus_begin(void)
1065 {
1066 }
1067
1068 void __weak arch_enable_nonboot_cpus_end(void)
1069 {
1070 }
1071
1072 void enable_nonboot_cpus(void)
1073 {
1074         int cpu, error;
1075
1076         /* Allow everyone to use the CPU hotplug again */
1077         cpu_maps_update_begin();
1078         __cpu_hotplug_enable();
1079         if (cpumask_empty(frozen_cpus))
1080                 goto out;
1081
1082         pr_info("Enabling non-boot CPUs ...\n");
1083
1084         arch_enable_nonboot_cpus_begin();
1085
1086         for_each_cpu(cpu, frozen_cpus) {
1087                 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1088                 error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1089                 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1090                 if (!error) {
1091                         pr_info("CPU%d is up\n", cpu);
1092                         continue;
1093                 }
1094                 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1095         }
1096
1097         arch_enable_nonboot_cpus_end();
1098
1099         cpumask_clear(frozen_cpus);
1100 out:
1101         cpu_maps_update_done();
1102 }
1103
1104 static int __init alloc_frozen_cpus(void)
1105 {
1106         if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1107                 return -ENOMEM;
1108         return 0;
1109 }
1110 core_initcall(alloc_frozen_cpus);
1111
1112 /*
1113  * When callbacks for CPU hotplug notifications are being executed, we must
1114  * ensure that the state of the system with respect to the tasks being frozen
1115  * or not, as reported by the notification, remains unchanged *throughout the
1116  * duration* of the execution of the callbacks.
1117  * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1118  *
1119  * This synchronization is implemented by mutually excluding regular CPU
1120  * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1121  * Hibernate notifications.
1122  */
1123 static int
1124 cpu_hotplug_pm_callback(struct notifier_block *nb,
1125                         unsigned long action, void *ptr)
1126 {
1127         switch (action) {
1128
1129         case PM_SUSPEND_PREPARE:
1130         case PM_HIBERNATION_PREPARE:
1131                 cpu_hotplug_disable();
1132                 break;
1133
1134         case PM_POST_SUSPEND:
1135         case PM_POST_HIBERNATION:
1136                 cpu_hotplug_enable();
1137                 break;
1138
1139         default:
1140                 return NOTIFY_DONE;
1141         }
1142
1143         return NOTIFY_OK;
1144 }
1145
1146
1147 static int __init cpu_hotplug_pm_sync_init(void)
1148 {
1149         /*
1150          * cpu_hotplug_pm_callback has higher priority than x86
1151          * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1152          * to disable cpu hotplug to avoid cpu hotplug race.
1153          */
1154         pm_notifier(cpu_hotplug_pm_callback, 0);
1155         return 0;
1156 }
1157 core_initcall(cpu_hotplug_pm_sync_init);
1158
1159 #endif /* CONFIG_PM_SLEEP_SMP */
1160
1161 int __boot_cpu_id;
1162
1163 #endif /* CONFIG_SMP */
1164
1165 /* Boot processor state steps */
1166 static struct cpuhp_step cpuhp_bp_states[] = {
1167         [CPUHP_OFFLINE] = {
1168                 .name                   = "offline",
1169                 .startup.single         = NULL,
1170                 .teardown.single        = NULL,
1171         },
1172 #ifdef CONFIG_SMP
1173         [CPUHP_CREATE_THREADS]= {
1174                 .name                   = "threads:prepare",
1175                 .startup.single         = smpboot_create_threads,
1176                 .teardown.single        = NULL,
1177                 .cant_stop              = true,
1178         },
1179         [CPUHP_PERF_PREPARE] = {
1180                 .name                   = "perf:prepare",
1181                 .startup.single         = perf_event_init_cpu,
1182                 .teardown.single        = perf_event_exit_cpu,
1183         },
1184         [CPUHP_WORKQUEUE_PREP] = {
1185                 .name                   = "workqueue:prepare",
1186                 .startup.single         = workqueue_prepare_cpu,
1187                 .teardown.single        = NULL,
1188         },
1189         [CPUHP_HRTIMERS_PREPARE] = {
1190                 .name                   = "hrtimers:prepare",
1191                 .startup.single         = hrtimers_prepare_cpu,
1192                 .teardown.single        = hrtimers_dead_cpu,
1193         },
1194         [CPUHP_SMPCFD_PREPARE] = {
1195                 .name                   = "smpcfd:prepare",
1196                 .startup.single         = smpcfd_prepare_cpu,
1197                 .teardown.single        = smpcfd_dead_cpu,
1198         },
1199         [CPUHP_RELAY_PREPARE] = {
1200                 .name                   = "relay:prepare",
1201                 .startup.single         = relay_prepare_cpu,
1202                 .teardown.single        = NULL,
1203         },
1204         [CPUHP_SLAB_PREPARE] = {
1205                 .name                   = "slab:prepare",
1206                 .startup.single         = slab_prepare_cpu,
1207                 .teardown.single        = slab_dead_cpu,
1208         },
1209         [CPUHP_RCUTREE_PREP] = {
1210                 .name                   = "RCU/tree:prepare",
1211                 .startup.single         = rcutree_prepare_cpu,
1212                 .teardown.single        = rcutree_dead_cpu,
1213         },
1214         /*
1215          * On the tear-down path, timers_dead_cpu() must be invoked
1216          * before blk_mq_queue_reinit_notify() from notify_dead(),
1217          * otherwise a RCU stall occurs.
1218          */
1219         [CPUHP_TIMERS_DEAD] = {
1220                 .name                   = "timers:dead",
1221                 .startup.single         = NULL,
1222                 .teardown.single        = timers_dead_cpu,
1223         },
1224         /* Kicks the plugged cpu into life */
1225         [CPUHP_BRINGUP_CPU] = {
1226                 .name                   = "cpu:bringup",
1227                 .startup.single         = bringup_cpu,
1228                 .teardown.single        = NULL,
1229                 .cant_stop              = true,
1230         },
1231         [CPUHP_AP_SMPCFD_DYING] = {
1232                 .name                   = "smpcfd:dying",
1233                 .startup.single         = NULL,
1234                 .teardown.single        = smpcfd_dying_cpu,
1235         },
1236         /*
1237          * Handled on controll processor until the plugged processor manages
1238          * this itself.
1239          */
1240         [CPUHP_TEARDOWN_CPU] = {
1241                 .name                   = "cpu:teardown",
1242                 .startup.single         = NULL,
1243                 .teardown.single        = takedown_cpu,
1244                 .cant_stop              = true,
1245         },
1246 #else
1247         [CPUHP_BRINGUP_CPU] = { },
1248 #endif
1249 };
1250
1251 /* Application processor state steps */
1252 static struct cpuhp_step cpuhp_ap_states[] = {
1253 #ifdef CONFIG_SMP
1254         /* Final state before CPU kills itself */
1255         [CPUHP_AP_IDLE_DEAD] = {
1256                 .name                   = "idle:dead",
1257         },
1258         /*
1259          * Last state before CPU enters the idle loop to die. Transient state
1260          * for synchronization.
1261          */
1262         [CPUHP_AP_OFFLINE] = {
1263                 .name                   = "ap:offline",
1264                 .cant_stop              = true,
1265         },
1266         /* First state is scheduler control. Interrupts are disabled */
1267         [CPUHP_AP_SCHED_STARTING] = {
1268                 .name                   = "sched:starting",
1269                 .startup.single         = sched_cpu_starting,
1270                 .teardown.single        = sched_cpu_dying,
1271         },
1272         [CPUHP_AP_RCUTREE_DYING] = {
1273                 .name                   = "RCU/tree:dying",
1274                 .startup.single         = NULL,
1275                 .teardown.single        = rcutree_dying_cpu,
1276         },
1277         /* Entry state on starting. Interrupts enabled from here on. Transient
1278          * state for synchronsization */
1279         [CPUHP_AP_ONLINE] = {
1280                 .name                   = "ap:online",
1281         },
1282         /* Handle smpboot threads park/unpark */
1283         [CPUHP_AP_SMPBOOT_THREADS] = {
1284                 .name                   = "smpboot/threads:online",
1285                 .startup.single         = smpboot_unpark_threads,
1286                 .teardown.single        = NULL,
1287         },
1288         [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
1289                 .name                   = "irq/affinity:online",
1290                 .startup.single         = irq_affinity_online_cpu,
1291                 .teardown.single        = NULL,
1292         },
1293         [CPUHP_AP_PERF_ONLINE] = {
1294                 .name                   = "perf:online",
1295                 .startup.single         = perf_event_init_cpu,
1296                 .teardown.single        = perf_event_exit_cpu,
1297         },
1298         [CPUHP_AP_WORKQUEUE_ONLINE] = {
1299                 .name                   = "workqueue:online",
1300                 .startup.single         = workqueue_online_cpu,
1301                 .teardown.single        = workqueue_offline_cpu,
1302         },
1303         [CPUHP_AP_RCUTREE_ONLINE] = {
1304                 .name                   = "RCU/tree:online",
1305                 .startup.single         = rcutree_online_cpu,
1306                 .teardown.single        = rcutree_offline_cpu,
1307         },
1308 #endif
1309         /*
1310          * The dynamically registered state space is here
1311          */
1312
1313 #ifdef CONFIG_SMP
1314         /* Last state is scheduler control setting the cpu active */
1315         [CPUHP_AP_ACTIVE] = {
1316                 .name                   = "sched:active",
1317                 .startup.single         = sched_cpu_activate,
1318                 .teardown.single        = sched_cpu_deactivate,
1319         },
1320 #endif
1321
1322         /* CPU is fully up and running. */
1323         [CPUHP_ONLINE] = {
1324                 .name                   = "online",
1325                 .startup.single         = NULL,
1326                 .teardown.single        = NULL,
1327         },
1328 };
1329
1330 /* Sanity check for callbacks */
1331 static int cpuhp_cb_check(enum cpuhp_state state)
1332 {
1333         if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1334                 return -EINVAL;
1335         return 0;
1336 }
1337
1338 /*
1339  * Returns a free for dynamic slot assignment of the Online state. The states
1340  * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1341  * by having no name assigned.
1342  */
1343 static int cpuhp_reserve_state(enum cpuhp_state state)
1344 {
1345         enum cpuhp_state i, end;
1346         struct cpuhp_step *step;
1347
1348         switch (state) {
1349         case CPUHP_AP_ONLINE_DYN:
1350                 step = cpuhp_ap_states + CPUHP_AP_ONLINE_DYN;
1351                 end = CPUHP_AP_ONLINE_DYN_END;
1352                 break;
1353         case CPUHP_BP_PREPARE_DYN:
1354                 step = cpuhp_bp_states + CPUHP_BP_PREPARE_DYN;
1355                 end = CPUHP_BP_PREPARE_DYN_END;
1356                 break;
1357         default:
1358                 return -EINVAL;
1359         }
1360
1361         for (i = state; i <= end; i++, step++) {
1362                 if (!step->name)
1363                         return i;
1364         }
1365         WARN(1, "No more dynamic states available for CPU hotplug\n");
1366         return -ENOSPC;
1367 }
1368
1369 static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1370                                  int (*startup)(unsigned int cpu),
1371                                  int (*teardown)(unsigned int cpu),
1372                                  bool multi_instance)
1373 {
1374         /* (Un)Install the callbacks for further cpu hotplug operations */
1375         struct cpuhp_step *sp;
1376         int ret = 0;
1377
1378         /*
1379          * If name is NULL, then the state gets removed.
1380          *
1381          * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1382          * the first allocation from these dynamic ranges, so the removal
1383          * would trigger a new allocation and clear the wrong (already
1384          * empty) state, leaving the callbacks of the to be cleared state
1385          * dangling, which causes wreckage on the next hotplug operation.
1386          */
1387         if (name && (state == CPUHP_AP_ONLINE_DYN ||
1388                      state == CPUHP_BP_PREPARE_DYN)) {
1389                 ret = cpuhp_reserve_state(state);
1390                 if (ret < 0)
1391                         return ret;
1392                 state = ret;
1393         }
1394         sp = cpuhp_get_step(state);
1395         if (name && sp->name)
1396                 return -EBUSY;
1397
1398         sp->startup.single = startup;
1399         sp->teardown.single = teardown;
1400         sp->name = name;
1401         sp->multi_instance = multi_instance;
1402         INIT_HLIST_HEAD(&sp->list);
1403         return ret;
1404 }
1405
1406 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1407 {
1408         return cpuhp_get_step(state)->teardown.single;
1409 }
1410
1411 /*
1412  * Call the startup/teardown function for a step either on the AP or
1413  * on the current CPU.
1414  */
1415 static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1416                             struct hlist_node *node)
1417 {
1418         struct cpuhp_step *sp = cpuhp_get_step(state);
1419         int ret;
1420
1421         /*
1422          * If there's nothing to do, we done.
1423          * Relies on the union for multi_instance.
1424          */
1425         if ((bringup && !sp->startup.single) ||
1426             (!bringup && !sp->teardown.single))
1427                 return 0;
1428         /*
1429          * The non AP bound callbacks can fail on bringup. On teardown
1430          * e.g. module removal we crash for now.
1431          */
1432 #ifdef CONFIG_SMP
1433         if (cpuhp_is_ap_state(state))
1434                 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1435         else
1436                 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1437 #else
1438         ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1439 #endif
1440         BUG_ON(ret && !bringup);
1441         return ret;
1442 }
1443
1444 /*
1445  * Called from __cpuhp_setup_state on a recoverable failure.
1446  *
1447  * Note: The teardown callbacks for rollback are not allowed to fail!
1448  */
1449 static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1450                                    struct hlist_node *node)
1451 {
1452         int cpu;
1453
1454         /* Roll back the already executed steps on the other cpus */
1455         for_each_present_cpu(cpu) {
1456                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1457                 int cpustate = st->state;
1458
1459                 if (cpu >= failedcpu)
1460                         break;
1461
1462                 /* Did we invoke the startup call on that cpu ? */
1463                 if (cpustate >= state)
1464                         cpuhp_issue_call(cpu, state, false, node);
1465         }
1466 }
1467
1468 int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
1469                                           struct hlist_node *node,
1470                                           bool invoke)
1471 {
1472         struct cpuhp_step *sp;
1473         int cpu;
1474         int ret;
1475
1476         lockdep_assert_cpus_held();
1477
1478         sp = cpuhp_get_step(state);
1479         if (sp->multi_instance == false)
1480                 return -EINVAL;
1481
1482         mutex_lock(&cpuhp_state_mutex);
1483
1484         if (!invoke || !sp->startup.multi)
1485                 goto add_node;
1486
1487         /*
1488          * Try to call the startup callback for each present cpu
1489          * depending on the hotplug state of the cpu.
1490          */
1491         for_each_present_cpu(cpu) {
1492                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1493                 int cpustate = st->state;
1494
1495                 if (cpustate < state)
1496                         continue;
1497
1498                 ret = cpuhp_issue_call(cpu, state, true, node);
1499                 if (ret) {
1500                         if (sp->teardown.multi)
1501                                 cpuhp_rollback_install(cpu, state, node);
1502                         goto unlock;
1503                 }
1504         }
1505 add_node:
1506         ret = 0;
1507         hlist_add_head(node, &sp->list);
1508 unlock:
1509         mutex_unlock(&cpuhp_state_mutex);
1510         return ret;
1511 }
1512
1513 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
1514                                bool invoke)
1515 {
1516         int ret;
1517
1518         cpus_read_lock();
1519         ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
1520         cpus_read_unlock();
1521         return ret;
1522 }
1523 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
1524
1525 /**
1526  * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
1527  * @state:              The state to setup
1528  * @invoke:             If true, the startup function is invoked for cpus where
1529  *                      cpu state >= @state
1530  * @startup:            startup callback function
1531  * @teardown:           teardown callback function
1532  * @multi_instance:     State is set up for multiple instances which get
1533  *                      added afterwards.
1534  *
1535  * The caller needs to hold cpus read locked while calling this function.
1536  * Returns:
1537  *   On success:
1538  *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
1539  *      0 for all other states
1540  *   On failure: proper (negative) error code
1541  */
1542 int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
1543                                    const char *name, bool invoke,
1544                                    int (*startup)(unsigned int cpu),
1545                                    int (*teardown)(unsigned int cpu),
1546                                    bool multi_instance)
1547 {
1548         int cpu, ret = 0;
1549         bool dynstate;
1550
1551         lockdep_assert_cpus_held();
1552
1553         if (cpuhp_cb_check(state) || !name)
1554                 return -EINVAL;
1555
1556         mutex_lock(&cpuhp_state_mutex);
1557
1558         ret = cpuhp_store_callbacks(state, name, startup, teardown,
1559                                     multi_instance);
1560
1561         dynstate = state == CPUHP_AP_ONLINE_DYN;
1562         if (ret > 0 && dynstate) {
1563                 state = ret;
1564                 ret = 0;
1565         }
1566
1567         if (ret || !invoke || !startup)
1568                 goto out;
1569
1570         /*
1571          * Try to call the startup callback for each present cpu
1572          * depending on the hotplug state of the cpu.
1573          */
1574         for_each_present_cpu(cpu) {
1575                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1576                 int cpustate = st->state;
1577
1578                 if (cpustate < state)
1579                         continue;
1580
1581                 ret = cpuhp_issue_call(cpu, state, true, NULL);
1582                 if (ret) {
1583                         if (teardown)
1584                                 cpuhp_rollback_install(cpu, state, NULL);
1585                         cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1586                         goto out;
1587                 }
1588         }
1589 out:
1590         mutex_unlock(&cpuhp_state_mutex);
1591         /*
1592          * If the requested state is CPUHP_AP_ONLINE_DYN, return the
1593          * dynamically allocated state in case of success.
1594          */
1595         if (!ret && dynstate)
1596                 return state;
1597         return ret;
1598 }
1599 EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
1600
1601 int __cpuhp_setup_state(enum cpuhp_state state,
1602                         const char *name, bool invoke,
1603                         int (*startup)(unsigned int cpu),
1604                         int (*teardown)(unsigned int cpu),
1605                         bool multi_instance)
1606 {
1607         int ret;
1608
1609         cpus_read_lock();
1610         ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
1611                                              teardown, multi_instance);
1612         cpus_read_unlock();
1613         return ret;
1614 }
1615 EXPORT_SYMBOL(__cpuhp_setup_state);
1616
1617 int __cpuhp_state_remove_instance(enum cpuhp_state state,
1618                                   struct hlist_node *node, bool invoke)
1619 {
1620         struct cpuhp_step *sp = cpuhp_get_step(state);
1621         int cpu;
1622
1623         BUG_ON(cpuhp_cb_check(state));
1624
1625         if (!sp->multi_instance)
1626                 return -EINVAL;
1627
1628         cpus_read_lock();
1629         mutex_lock(&cpuhp_state_mutex);
1630
1631         if (!invoke || !cpuhp_get_teardown_cb(state))
1632                 goto remove;
1633         /*
1634          * Call the teardown callback for each present cpu depending
1635          * on the hotplug state of the cpu. This function is not
1636          * allowed to fail currently!
1637          */
1638         for_each_present_cpu(cpu) {
1639                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1640                 int cpustate = st->state;
1641
1642                 if (cpustate >= state)
1643                         cpuhp_issue_call(cpu, state, false, node);
1644         }
1645
1646 remove:
1647         hlist_del(node);
1648         mutex_unlock(&cpuhp_state_mutex);
1649         cpus_read_unlock();
1650
1651         return 0;
1652 }
1653 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1654
1655 /**
1656  * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
1657  * @state:      The state to remove
1658  * @invoke:     If true, the teardown function is invoked for cpus where
1659  *              cpu state >= @state
1660  *
1661  * The caller needs to hold cpus read locked while calling this function.
1662  * The teardown callback is currently not allowed to fail. Think
1663  * about module removal!
1664  */
1665 void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
1666 {
1667         struct cpuhp_step *sp = cpuhp_get_step(state);
1668         int cpu;
1669
1670         BUG_ON(cpuhp_cb_check(state));
1671
1672         lockdep_assert_cpus_held();
1673
1674         mutex_lock(&cpuhp_state_mutex);
1675         if (sp->multi_instance) {
1676                 WARN(!hlist_empty(&sp->list),
1677                      "Error: Removing state %d which has instances left.\n",
1678                      state);
1679                 goto remove;
1680         }
1681
1682         if (!invoke || !cpuhp_get_teardown_cb(state))
1683                 goto remove;
1684
1685         /*
1686          * Call the teardown callback for each present cpu depending
1687          * on the hotplug state of the cpu. This function is not
1688          * allowed to fail currently!
1689          */
1690         for_each_present_cpu(cpu) {
1691                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1692                 int cpustate = st->state;
1693
1694                 if (cpustate >= state)
1695                         cpuhp_issue_call(cpu, state, false, NULL);
1696         }
1697 remove:
1698         cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1699         mutex_unlock(&cpuhp_state_mutex);
1700 }
1701 EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
1702
1703 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
1704 {
1705         cpus_read_lock();
1706         __cpuhp_remove_state_cpuslocked(state, invoke);
1707         cpus_read_unlock();
1708 }
1709 EXPORT_SYMBOL(__cpuhp_remove_state);
1710
1711 #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1712 static ssize_t show_cpuhp_state(struct device *dev,
1713                                 struct device_attribute *attr, char *buf)
1714 {
1715         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1716
1717         return sprintf(buf, "%d\n", st->state);
1718 }
1719 static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
1720
1721 static ssize_t write_cpuhp_target(struct device *dev,
1722                                   struct device_attribute *attr,
1723                                   const char *buf, size_t count)
1724 {
1725         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1726         struct cpuhp_step *sp;
1727         int target, ret;
1728
1729         ret = kstrtoint(buf, 10, &target);
1730         if (ret)
1731                 return ret;
1732
1733 #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
1734         if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
1735                 return -EINVAL;
1736 #else
1737         if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
1738                 return -EINVAL;
1739 #endif
1740
1741         ret = lock_device_hotplug_sysfs();
1742         if (ret)
1743                 return ret;
1744
1745         mutex_lock(&cpuhp_state_mutex);
1746         sp = cpuhp_get_step(target);
1747         ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
1748         mutex_unlock(&cpuhp_state_mutex);
1749         if (ret)
1750                 goto out;
1751
1752         if (st->state < target)
1753                 ret = do_cpu_up(dev->id, target);
1754         else
1755                 ret = do_cpu_down(dev->id, target);
1756 out:
1757         unlock_device_hotplug();
1758         return ret ? ret : count;
1759 }
1760
1761 static ssize_t show_cpuhp_target(struct device *dev,
1762                                  struct device_attribute *attr, char *buf)
1763 {
1764         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1765
1766         return sprintf(buf, "%d\n", st->target);
1767 }
1768 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1769
1770 static struct attribute *cpuhp_cpu_attrs[] = {
1771         &dev_attr_state.attr,
1772         &dev_attr_target.attr,
1773         NULL
1774 };
1775
1776 static const struct attribute_group cpuhp_cpu_attr_group = {
1777         .attrs = cpuhp_cpu_attrs,
1778         .name = "hotplug",
1779         NULL
1780 };
1781
1782 static ssize_t show_cpuhp_states(struct device *dev,
1783                                  struct device_attribute *attr, char *buf)
1784 {
1785         ssize_t cur, res = 0;
1786         int i;
1787
1788         mutex_lock(&cpuhp_state_mutex);
1789         for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
1790                 struct cpuhp_step *sp = cpuhp_get_step(i);
1791
1792                 if (sp->name) {
1793                         cur = sprintf(buf, "%3d: %s\n", i, sp->name);
1794                         buf += cur;
1795                         res += cur;
1796                 }
1797         }
1798         mutex_unlock(&cpuhp_state_mutex);
1799         return res;
1800 }
1801 static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
1802
1803 static struct attribute *cpuhp_cpu_root_attrs[] = {
1804         &dev_attr_states.attr,
1805         NULL
1806 };
1807
1808 static const struct attribute_group cpuhp_cpu_root_attr_group = {
1809         .attrs = cpuhp_cpu_root_attrs,
1810         .name = "hotplug",
1811         NULL
1812 };
1813
1814 static int __init cpuhp_sysfs_init(void)
1815 {
1816         int cpu, ret;
1817
1818         ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
1819                                  &cpuhp_cpu_root_attr_group);
1820         if (ret)
1821                 return ret;
1822
1823         for_each_possible_cpu(cpu) {
1824                 struct device *dev = get_cpu_device(cpu);
1825
1826                 if (!dev)
1827                         continue;
1828                 ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
1829                 if (ret)
1830                         return ret;
1831         }
1832         return 0;
1833 }
1834 device_initcall(cpuhp_sysfs_init);
1835 #endif
1836
1837 /*
1838  * cpu_bit_bitmap[] is a special, "compressed" data structure that
1839  * represents all NR_CPUS bits binary values of 1<<nr.
1840  *
1841  * It is used by cpumask_of() to get a constant address to a CPU
1842  * mask value that has a single bit set only.
1843  */
1844
1845 /* cpu_bit_bitmap[0] is empty - so we can back into it */
1846 #define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
1847 #define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
1848 #define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
1849 #define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
1850
1851 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
1852
1853         MASK_DECLARE_8(0),      MASK_DECLARE_8(8),
1854         MASK_DECLARE_8(16),     MASK_DECLARE_8(24),
1855 #if BITS_PER_LONG > 32
1856         MASK_DECLARE_8(32),     MASK_DECLARE_8(40),
1857         MASK_DECLARE_8(48),     MASK_DECLARE_8(56),
1858 #endif
1859 };
1860 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
1861
1862 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
1863 EXPORT_SYMBOL(cpu_all_bits);
1864
1865 #ifdef CONFIG_INIT_ALL_POSSIBLE
1866 struct cpumask __cpu_possible_mask __read_mostly
1867         = {CPU_BITS_ALL};
1868 #else
1869 struct cpumask __cpu_possible_mask __read_mostly;
1870 #endif
1871 EXPORT_SYMBOL(__cpu_possible_mask);
1872
1873 struct cpumask __cpu_online_mask __read_mostly;
1874 EXPORT_SYMBOL(__cpu_online_mask);
1875
1876 struct cpumask __cpu_present_mask __read_mostly;
1877 EXPORT_SYMBOL(__cpu_present_mask);
1878
1879 struct cpumask __cpu_active_mask __read_mostly;
1880 EXPORT_SYMBOL(__cpu_active_mask);
1881
1882 void init_cpu_present(const struct cpumask *src)
1883 {
1884         cpumask_copy(&__cpu_present_mask, src);
1885 }
1886
1887 void init_cpu_possible(const struct cpumask *src)
1888 {
1889         cpumask_copy(&__cpu_possible_mask, src);
1890 }
1891
1892 void init_cpu_online(const struct cpumask *src)
1893 {
1894         cpumask_copy(&__cpu_online_mask, src);
1895 }
1896
1897 /*
1898  * Activate the first processor.
1899  */
1900 void __init boot_cpu_init(void)
1901 {
1902         int cpu = smp_processor_id();
1903
1904         /* Mark the boot cpu "present", "online" etc for SMP and UP case */
1905         set_cpu_online(cpu, true);
1906         set_cpu_active(cpu, true);
1907         set_cpu_present(cpu, true);
1908         set_cpu_possible(cpu, true);
1909
1910 #ifdef CONFIG_SMP
1911         __boot_cpu_id = cpu;
1912 #endif
1913 }
1914
1915 /*
1916  * Must be called _AFTER_ setting up the per_cpu areas
1917  */
1918 void __init boot_cpu_state_init(void)
1919 {
1920         per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
1921 }