]> asedeno.scripts.mit.edu Git - linux.git/blob - arch/powerpc/platforms/powernv/idle.c
Merge tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
[linux.git] / arch / powerpc / platforms / powernv / idle.c
1 /*
2  * PowerNV cpuidle code
3  *
4  * Copyright 2015 IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/types.h>
13 #include <linux/mm.h>
14 #include <linux/slab.h>
15 #include <linux/of.h>
16 #include <linux/device.h>
17 #include <linux/cpu.h>
18
19 #include <asm/firmware.h>
20 #include <asm/machdep.h>
21 #include <asm/opal.h>
22 #include <asm/cputhreads.h>
23 #include <asm/cpuidle.h>
24 #include <asm/code-patching.h>
25 #include <asm/smp.h>
26 #include <asm/runlatch.h>
27 #include <asm/dbell.h>
28
29 #include "powernv.h"
30 #include "subcore.h"
31
32 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */
33 #define MAX_STOP_STATE  0xF
34
35 #define P9_STOP_SPR_MSR 2000
36 #define P9_STOP_SPR_PSSCR      855
37
38 static u32 supported_cpuidle_states;
39
40 /*
41  * The default stop state that will be used by ppc_md.power_save
42  * function on platforms that support stop instruction.
43  */
44 static u64 pnv_default_stop_val;
45 static u64 pnv_default_stop_mask;
46 static bool default_stop_found;
47
48 /*
49  * First deep stop state. Used to figure out when to save/restore
50  * hypervisor context.
51  */
52 u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
53
54 /*
55  * psscr value and mask of the deepest stop idle state.
56  * Used when a cpu is offlined.
57  */
58 static u64 pnv_deepest_stop_psscr_val;
59 static u64 pnv_deepest_stop_psscr_mask;
60 static u64 pnv_deepest_stop_flag;
61 static bool deepest_stop_found;
62
63 static int pnv_save_sprs_for_deep_states(void)
64 {
65         int cpu;
66         int rc;
67
68         /*
69          * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
70          * all cpus at boot. Get these reg values of current cpu and use the
71          * same across all cpus.
72          */
73         uint64_t lpcr_val = mfspr(SPRN_LPCR);
74         uint64_t hid0_val = mfspr(SPRN_HID0);
75         uint64_t hid1_val = mfspr(SPRN_HID1);
76         uint64_t hid4_val = mfspr(SPRN_HID4);
77         uint64_t hid5_val = mfspr(SPRN_HID5);
78         uint64_t hmeer_val = mfspr(SPRN_HMEER);
79         uint64_t msr_val = MSR_IDLE;
80         uint64_t psscr_val = pnv_deepest_stop_psscr_val;
81
82         for_each_possible_cpu(cpu) {
83                 uint64_t pir = get_hard_smp_processor_id(cpu);
84                 uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
85
86                 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
87                 if (rc != 0)
88                         return rc;
89
90                 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
91                 if (rc != 0)
92                         return rc;
93
94                 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
95                         rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
96                         if (rc)
97                                 return rc;
98
99                         rc = opal_slw_set_reg(pir,
100                                               P9_STOP_SPR_PSSCR, psscr_val);
101
102                         if (rc)
103                                 return rc;
104                 }
105
106                 /* HIDs are per core registers */
107                 if (cpu_thread_in_core(cpu) == 0) {
108
109                         rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
110                         if (rc != 0)
111                                 return rc;
112
113                         rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
114                         if (rc != 0)
115                                 return rc;
116
117                         /* Only p8 needs to set extra HID regiters */
118                         if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
119
120                                 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
121                                 if (rc != 0)
122                                         return rc;
123
124                                 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
125                                 if (rc != 0)
126                                         return rc;
127
128                                 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
129                                 if (rc != 0)
130                                         return rc;
131                         }
132                 }
133         }
134
135         return 0;
136 }
137
138 static void pnv_alloc_idle_core_states(void)
139 {
140         int i, j;
141         int nr_cores = cpu_nr_cores();
142         u32 *core_idle_state;
143
144         /*
145          * core_idle_state - The lower 8 bits track the idle state of
146          * each thread of the core.
147          *
148          * The most significant bit is the lock bit.
149          *
150          * Initially all the bits corresponding to threads_per_core
151          * are set. They are cleared when the thread enters deep idle
152          * state like sleep and winkle/stop.
153          *
154          * Initially the lock bit is cleared.  The lock bit has 2
155          * purposes:
156          *      a. While the first thread in the core waking up from
157          *         idle is restoring core state, it prevents other
158          *         threads in the core from switching to process
159          *         context.
160          *      b. While the last thread in the core is saving the
161          *         core state, it prevents a different thread from
162          *         waking up.
163          */
164         for (i = 0; i < nr_cores; i++) {
165                 int first_cpu = i * threads_per_core;
166                 int node = cpu_to_node(first_cpu);
167                 size_t paca_ptr_array_size;
168
169                 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
170                 *core_idle_state = (1 << threads_per_core) - 1;
171                 paca_ptr_array_size = (threads_per_core *
172                                        sizeof(struct paca_struct *));
173
174                 for (j = 0; j < threads_per_core; j++) {
175                         int cpu = first_cpu + j;
176
177                         paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
178                         paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
179                         paca_ptrs[cpu]->thread_mask = 1 << j;
180                         if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
181                                 continue;
182                         paca_ptrs[cpu]->thread_sibling_pacas =
183                                 kmalloc_node(paca_ptr_array_size,
184                                              GFP_KERNEL, node);
185                 }
186         }
187
188         update_subcore_sibling_mask();
189
190         if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
191                 int rc = pnv_save_sprs_for_deep_states();
192
193                 if (likely(!rc))
194                         return;
195
196                 /*
197                  * The stop-api is unable to restore hypervisor
198                  * resources on wakeup from platform idle states which
199                  * lose full context. So disable such states.
200                  */
201                 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
202                 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
203                 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
204
205                 if (cpu_has_feature(CPU_FTR_ARCH_300) &&
206                     (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
207                         /*
208                          * Use the default stop state for CPU-Hotplug
209                          * if available.
210                          */
211                         if (default_stop_found) {
212                                 pnv_deepest_stop_psscr_val =
213                                         pnv_default_stop_val;
214                                 pnv_deepest_stop_psscr_mask =
215                                         pnv_default_stop_mask;
216                                 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
217                                         pnv_deepest_stop_psscr_val);
218                         } else { /* Fallback to snooze loop for CPU-Hotplug */
219                                 deepest_stop_found = false;
220                                 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
221                         }
222                 }
223         }
224 }
225
226 u32 pnv_get_supported_cpuidle_states(void)
227 {
228         return supported_cpuidle_states;
229 }
230 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
231
232 static void pnv_fastsleep_workaround_apply(void *info)
233
234 {
235         int rc;
236         int *err = info;
237
238         rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
239                                         OPAL_CONFIG_IDLE_APPLY);
240         if (rc)
241                 *err = 1;
242 }
243
244 /*
245  * Used to store fastsleep workaround state
246  * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
247  * 1 - Workaround applied once, never undone.
248  */
249 static u8 fastsleep_workaround_applyonce;
250
251 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
252                 struct device_attribute *attr, char *buf)
253 {
254         return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
255 }
256
257 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
258                 struct device_attribute *attr, const char *buf,
259                 size_t count)
260 {
261         cpumask_t primary_thread_mask;
262         int err;
263         u8 val;
264
265         if (kstrtou8(buf, 0, &val) || val != 1)
266                 return -EINVAL;
267
268         if (fastsleep_workaround_applyonce == 1)
269                 return count;
270
271         /*
272          * fastsleep_workaround_applyonce = 1 implies
273          * fastsleep workaround needs to be left in 'applied' state on all
274          * the cores. Do this by-
275          * 1. Patching out the call to 'undo' workaround in fastsleep exit path
276          * 2. Sending ipi to all the cores which have at least one online thread
277          * 3. Patching out the call to 'apply' workaround in fastsleep entry
278          * path
279          * There is no need to send ipi to cores which have all threads
280          * offlined, as last thread of the core entering fastsleep or deeper
281          * state would have applied workaround.
282          */
283         err = patch_instruction(
284                 (unsigned int *)pnv_fastsleep_workaround_at_exit,
285                 PPC_INST_NOP);
286         if (err) {
287                 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
288                 goto fail;
289         }
290
291         get_online_cpus();
292         primary_thread_mask = cpu_online_cores_map();
293         on_each_cpu_mask(&primary_thread_mask,
294                                 pnv_fastsleep_workaround_apply,
295                                 &err, 1);
296         put_online_cpus();
297         if (err) {
298                 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
299                 goto fail;
300         }
301
302         err = patch_instruction(
303                 (unsigned int *)pnv_fastsleep_workaround_at_entry,
304                 PPC_INST_NOP);
305         if (err) {
306                 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
307                 goto fail;
308         }
309
310         fastsleep_workaround_applyonce = 1;
311
312         return count;
313 fail:
314         return -EIO;
315 }
316
317 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
318                         show_fastsleep_workaround_applyonce,
319                         store_fastsleep_workaround_applyonce);
320
321 static unsigned long __power7_idle_type(unsigned long type)
322 {
323         unsigned long srr1;
324
325         if (!prep_irq_for_idle_irqsoff())
326                 return 0;
327
328         __ppc64_runlatch_off();
329         srr1 = power7_idle_insn(type);
330         __ppc64_runlatch_on();
331
332         fini_irq_for_idle_irqsoff();
333
334         return srr1;
335 }
336
337 void power7_idle_type(unsigned long type)
338 {
339         unsigned long srr1;
340
341         srr1 = __power7_idle_type(type);
342         irq_set_pending_from_srr1(srr1);
343 }
344
345 void power7_idle(void)
346 {
347         if (!powersave_nap)
348                 return;
349
350         power7_idle_type(PNV_THREAD_NAP);
351 }
352
353 static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
354                                       unsigned long stop_psscr_mask)
355 {
356         unsigned long psscr;
357         unsigned long srr1;
358
359         if (!prep_irq_for_idle_irqsoff())
360                 return 0;
361
362         psscr = mfspr(SPRN_PSSCR);
363         psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
364
365         __ppc64_runlatch_off();
366         srr1 = power9_idle_stop(psscr);
367         __ppc64_runlatch_on();
368
369         fini_irq_for_idle_irqsoff();
370
371         return srr1;
372 }
373
374 void power9_idle_type(unsigned long stop_psscr_val,
375                                       unsigned long stop_psscr_mask)
376 {
377         unsigned long srr1;
378
379         srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
380         irq_set_pending_from_srr1(srr1);
381 }
382
383 /*
384  * Used for ppc_md.power_save which needs a function with no parameters
385  */
386 void power9_idle(void)
387 {
388         power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
389 }
390
391 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
392 /*
393  * This is used in working around bugs in thread reconfiguration
394  * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
395  * memory and the way that XER[SO] is checkpointed.
396  * This function forces the core into SMT4 in order by asking
397  * all other threads not to stop, and sending a message to any
398  * that are in a stop state.
399  * Must be called with preemption disabled.
400  */
401 void pnv_power9_force_smt4_catch(void)
402 {
403         int cpu, cpu0, thr;
404         int awake_threads = 1;          /* this thread is awake */
405         int poke_threads = 0;
406         int need_awake = threads_per_core;
407
408         cpu = smp_processor_id();
409         cpu0 = cpu & ~(threads_per_core - 1);
410         for (thr = 0; thr < threads_per_core; ++thr) {
411                 if (cpu != cpu0 + thr)
412                         atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
413         }
414         /* order setting dont_stop vs testing requested_psscr */
415         mb();
416         for (thr = 0; thr < threads_per_core; ++thr) {
417                 if (!paca_ptrs[cpu0+thr]->requested_psscr)
418                         ++awake_threads;
419                 else
420                         poke_threads |= (1 << thr);
421         }
422
423         /* If at least 3 threads are awake, the core is in SMT4 already */
424         if (awake_threads < need_awake) {
425                 /* We have to wake some threads; we'll use msgsnd */
426                 for (thr = 0; thr < threads_per_core; ++thr) {
427                         if (poke_threads & (1 << thr)) {
428                                 ppc_msgsnd_sync();
429                                 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
430                                            paca_ptrs[cpu0+thr]->hw_cpu_id);
431                         }
432                 }
433                 /* now spin until at least 3 threads are awake */
434                 do {
435                         for (thr = 0; thr < threads_per_core; ++thr) {
436                                 if ((poke_threads & (1 << thr)) &&
437                                     !paca_ptrs[cpu0+thr]->requested_psscr) {
438                                         ++awake_threads;
439                                         poke_threads &= ~(1 << thr);
440                                 }
441                         }
442                 } while (awake_threads < need_awake);
443         }
444 }
445 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
446
447 void pnv_power9_force_smt4_release(void)
448 {
449         int cpu, cpu0, thr;
450
451         cpu = smp_processor_id();
452         cpu0 = cpu & ~(threads_per_core - 1);
453
454         /* clear all the dont_stop flags */
455         for (thr = 0; thr < threads_per_core; ++thr) {
456                 if (cpu != cpu0 + thr)
457                         atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
458         }
459 }
460 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
461 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
462
463 #ifdef CONFIG_HOTPLUG_CPU
464 static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
465 {
466         u64 pir = get_hard_smp_processor_id(cpu);
467
468         mtspr(SPRN_LPCR, lpcr_val);
469
470         /*
471          * Program the LPCR via stop-api only if the deepest stop state
472          * can lose hypervisor context.
473          */
474         if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
475                 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
476 }
477
478 /*
479  * pnv_cpu_offline: A function that puts the CPU into the deepest
480  * available platform idle state on a CPU-Offline.
481  * interrupts hard disabled and no lazy irq pending.
482  */
483 unsigned long pnv_cpu_offline(unsigned int cpu)
484 {
485         unsigned long srr1;
486         u32 idle_states = pnv_get_supported_cpuidle_states();
487         u64 lpcr_val;
488
489         /*
490          * We don't want to take decrementer interrupts while we are
491          * offline, so clear LPCR:PECE1. We keep PECE2 (and
492          * LPCR_PECE_HVEE on P9) enabled as to let IPIs in.
493          *
494          * If the CPU gets woken up by a special wakeup, ensure that
495          * the SLW engine sets LPCR with decrementer bit cleared, else
496          * the CPU will come back to the kernel due to a spurious
497          * wakeup.
498          */
499         lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
500         pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
501
502         __ppc64_runlatch_off();
503
504         if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
505                 unsigned long psscr;
506
507                 psscr = mfspr(SPRN_PSSCR);
508                 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
509                                                 pnv_deepest_stop_psscr_val;
510                 srr1 = power9_offline_stop(psscr);
511
512         } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
513                    (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
514                 srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
515         } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
516                    (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
517                 srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
518         } else if (idle_states & OPAL_PM_NAP_ENABLED) {
519                 srr1 = power7_idle_insn(PNV_THREAD_NAP);
520         } else {
521                 /* This is the fallback method. We emulate snooze */
522                 while (!generic_check_cpu_restart(cpu)) {
523                         HMT_low();
524                         HMT_very_low();
525                 }
526                 srr1 = 0;
527                 HMT_medium();
528         }
529
530         __ppc64_runlatch_on();
531
532         /*
533          * Re-enable decrementer interrupts in LPCR.
534          *
535          * Further, we want stop states to be woken up by decrementer
536          * for non-hotplug cases. So program the LPCR via stop api as
537          * well.
538          */
539         lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
540         pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
541
542         return srr1;
543 }
544 #endif
545
546 /*
547  * Power ISA 3.0 idle initialization.
548  *
549  * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
550  * Register (PSSCR) to control idle behavior.
551  *
552  * PSSCR layout:
553  * ----------------------------------------------------------
554  * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
555  * ----------------------------------------------------------
556  * 0      4     41   42    43   44     48    54   56    60
557  *
558  * PSSCR key fields:
559  *      Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
560  *      lowest power-saving state the thread entered since stop instruction was
561  *      last executed.
562  *
563  *      Bit 41 - Status Disable(SD)
564  *      0 - Shows PLS entries
565  *      1 - PLS entries are all 0
566  *
567  *      Bit 42 - Enable State Loss
568  *      0 - No state is lost irrespective of other fields
569  *      1 - Allows state loss
570  *
571  *      Bit 43 - Exit Criterion
572  *      0 - Exit from power-save mode on any interrupt
573  *      1 - Exit from power-save mode controlled by LPCR's PECE bits
574  *
575  *      Bits 44:47 - Power-Saving Level Limit
576  *      This limits the power-saving level that can be entered into.
577  *
578  *      Bits 60:63 - Requested Level
579  *      Used to specify which power-saving level must be entered on executing
580  *      stop instruction
581  */
582
583 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
584 {
585         int err = 0;
586
587         /*
588          * psscr_mask == 0xf indicates an older firmware.
589          * Set remaining fields of psscr to the default values.
590          * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
591          */
592         if (*psscr_mask == 0xf) {
593                 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
594                 *psscr_mask = PSSCR_HV_DEFAULT_MASK;
595                 return err;
596         }
597
598         /*
599          * New firmware is expected to set the psscr_val bits correctly.
600          * Validate that the following invariants are correctly maintained by
601          * the new firmware.
602          * - ESL bit value matches the EC bit value.
603          * - ESL bit is set for all the deep stop states.
604          */
605         if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
606                 err = ERR_EC_ESL_MISMATCH;
607         } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
608                 GET_PSSCR_ESL(*psscr_val) == 0) {
609                 err = ERR_DEEP_STATE_ESL_MISMATCH;
610         }
611
612         return err;
613 }
614
615 /*
616  * pnv_arch300_idle_init: Initializes the default idle state, first
617  *                        deep idle state and deepest idle state on
618  *                        ISA 3.0 CPUs.
619  *
620  * @np: /ibm,opal/power-mgt device node
621  * @flags: cpu-idle-state-flags array
622  * @dt_idle_states: Number of idle state entries
623  * Returns 0 on success
624  */
625 static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
626                                         int dt_idle_states)
627 {
628         u64 *psscr_val = NULL;
629         u64 *psscr_mask = NULL;
630         u32 *residency_ns = NULL;
631         u64 max_residency_ns = 0;
632         int rc = 0, i;
633
634         psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
635         psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
636         residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
637                                GFP_KERNEL);
638
639         if (!psscr_val || !psscr_mask || !residency_ns) {
640                 rc = -1;
641                 goto out;
642         }
643
644         if (of_property_read_u64_array(np,
645                 "ibm,cpu-idle-state-psscr",
646                 psscr_val, dt_idle_states)) {
647                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
648                 rc = -1;
649                 goto out;
650         }
651
652         if (of_property_read_u64_array(np,
653                                        "ibm,cpu-idle-state-psscr-mask",
654                                        psscr_mask, dt_idle_states)) {
655                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
656                 rc = -1;
657                 goto out;
658         }
659
660         if (of_property_read_u32_array(np,
661                                        "ibm,cpu-idle-state-residency-ns",
662                                         residency_ns, dt_idle_states)) {
663                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
664                 rc = -1;
665                 goto out;
666         }
667
668         /*
669          * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
670          * and the pnv_default_stop_{val,mask}.
671          *
672          * pnv_first_deep_stop_state should be set to the first stop
673          * level to cause hypervisor state loss.
674          *
675          * pnv_deepest_stop_{val,mask} should be set to values corresponding to
676          * the deepest stop state.
677          *
678          * pnv_default_stop_{val,mask} should be set to values corresponding to
679          * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
680          */
681         pnv_first_deep_stop_state = MAX_STOP_STATE;
682         for (i = 0; i < dt_idle_states; i++) {
683                 int err;
684                 u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
685
686                 if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
687                      (pnv_first_deep_stop_state > psscr_rl))
688                         pnv_first_deep_stop_state = psscr_rl;
689
690                 err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i],
691                                               flags[i]);
692                 if (err) {
693                         report_invalid_psscr_val(psscr_val[i], err);
694                         continue;
695                 }
696
697                 if (max_residency_ns < residency_ns[i]) {
698                         max_residency_ns = residency_ns[i];
699                         pnv_deepest_stop_psscr_val = psscr_val[i];
700                         pnv_deepest_stop_psscr_mask = psscr_mask[i];
701                         pnv_deepest_stop_flag = flags[i];
702                         deepest_stop_found = true;
703                 }
704
705                 if (!default_stop_found &&
706                     (flags[i] & OPAL_PM_STOP_INST_FAST)) {
707                         pnv_default_stop_val = psscr_val[i];
708                         pnv_default_stop_mask = psscr_mask[i];
709                         default_stop_found = true;
710                 }
711         }
712
713         if (unlikely(!default_stop_found)) {
714                 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
715         } else {
716                 ppc_md.power_save = power9_idle;
717                 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
718                         pnv_default_stop_val, pnv_default_stop_mask);
719         }
720
721         if (unlikely(!deepest_stop_found)) {
722                 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
723         } else {
724                 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
725                         pnv_deepest_stop_psscr_val,
726                         pnv_deepest_stop_psscr_mask);
727         }
728
729         pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
730                 pnv_first_deep_stop_state);
731 out:
732         kfree(psscr_val);
733         kfree(psscr_mask);
734         kfree(residency_ns);
735         return rc;
736 }
737
738 /*
739  * Probe device tree for supported idle states
740  */
741 static void __init pnv_probe_idle_states(void)
742 {
743         struct device_node *np;
744         int dt_idle_states;
745         u32 *flags = NULL;
746         int i;
747
748         np = of_find_node_by_path("/ibm,opal/power-mgt");
749         if (!np) {
750                 pr_warn("opal: PowerMgmt Node not found\n");
751                 goto out;
752         }
753         dt_idle_states = of_property_count_u32_elems(np,
754                         "ibm,cpu-idle-state-flags");
755         if (dt_idle_states < 0) {
756                 pr_warn("cpuidle-powernv: no idle states found in the DT\n");
757                 goto out;
758         }
759
760         flags = kcalloc(dt_idle_states, sizeof(*flags),  GFP_KERNEL);
761
762         if (of_property_read_u32_array(np,
763                         "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
764                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
765                 goto out;
766         }
767
768         if (cpu_has_feature(CPU_FTR_ARCH_300)) {
769                 if (pnv_power9_idle_init(np, flags, dt_idle_states))
770                         goto out;
771         }
772
773         for (i = 0; i < dt_idle_states; i++)
774                 supported_cpuidle_states |= flags[i];
775
776 out:
777         kfree(flags);
778 }
779 static int __init pnv_init_idle_states(void)
780 {
781
782         supported_cpuidle_states = 0;
783
784         if (cpuidle_disable != IDLE_NO_OVERRIDE)
785                 goto out;
786
787         pnv_probe_idle_states();
788
789         if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
790                 patch_instruction(
791                         (unsigned int *)pnv_fastsleep_workaround_at_entry,
792                         PPC_INST_NOP);
793                 patch_instruction(
794                         (unsigned int *)pnv_fastsleep_workaround_at_exit,
795                         PPC_INST_NOP);
796         } else {
797                 /*
798                  * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
799                  * workaround is needed to use fastsleep. Provide sysfs
800                  * control to choose how this workaround has to be applied.
801                  */
802                 device_create_file(cpu_subsys.dev_root,
803                                 &dev_attr_fastsleep_workaround_applyonce);
804         }
805
806         pnv_alloc_idle_core_states();
807
808         /*
809          * For each CPU, record its PACA address in each of it's
810          * sibling thread's PACA at the slot corresponding to this
811          * CPU's index in the core.
812          */
813         if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
814                 int cpu;
815
816                 pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
817                 for_each_possible_cpu(cpu) {
818                         int base_cpu = cpu_first_thread_sibling(cpu);
819                         int idx = cpu_thread_in_core(cpu);
820                         int i;
821
822                         for (i = 0; i < threads_per_core; i++) {
823                                 int j = base_cpu + i;
824
825                                 paca_ptrs[j]->thread_sibling_pacas[idx] =
826                                         paca_ptrs[cpu];
827                         }
828                 }
829         }
830
831         if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
832                 ppc_md.power_save = power7_idle;
833
834 out:
835         return 0;
836 }
837 machine_subsys_initcall(powernv, pnv_init_idle_states);