2 * This file contains idle entry/exit functions for POWER7,
3 * POWER8 and POWER9 CPUs.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
11 #include <linux/threads.h>
12 #include <asm/processor.h>
14 #include <asm/cputable.h>
15 #include <asm/thread_info.h>
16 #include <asm/ppc_asm.h>
17 #include <asm/asm-offsets.h>
18 #include <asm/ppc-opcode.h>
19 #include <asm/hw_irq.h>
20 #include <asm/kvm_book3s_asm.h>
22 #include <asm/cpuidle.h>
23 #include <asm/exception-64s.h>
24 #include <asm/book3s/64/mmu-hash.h>
30 * Use unused space in the interrupt stack to save and restore
31 * registers for winkle support.
45 #define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
50 * Used by threads before entering deep idle states. Saves SPRs
51 * in interrupt stack frame
55 * Note all register i.e per-core, per-subcore or per-thread is saved
56 * here since any thread in the core might wake up first
60 * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
70 ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
91 * Used by threads when the lock bit of core_idle_state is set.
92 * Threads will spin in HMT_LOW until the lock bit is cleared.
93 * r14 - pointer to core_idle_state
94 * r15 - used to load contents of core_idle_state
95 * r9 - used as a temporary variable
101 andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
105 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
106 bne- core_idle_lock_held
110 * Pass requested state in r3:
111 * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
112 * - Requested PSSCR value in POWER9
114 * Address of idle handler to branch to in realmode in r4
116 pnv_powersave_common:
117 /* Use r3 to pass state nap/sleep/winkle */
118 /* NAP is a state loss, we create a regs frame on the
119 * stack, fill it up with the state we care about and
120 * stick a pointer to it in PACAR1. We really only
121 * need to save PC, some CR bits and the NV GPRs,
122 * but for now an interrupt frame will do.
128 stdu r1,-INT_FRAME_SIZE(r1)
132 /* We haven't lost state ... yet */
134 stb r0,PACA_NAPSTATELOST(r13)
136 /* Continue saving state */
144 * Go to real mode to do the nap, as required by the architecture.
145 * Also, we need to be in real mode before setting hwthread_state,
146 * because as soon as we do that, another thread can switch
147 * the MMU context to the guest.
149 LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
153 .globl pnv_enter_arch207_idle_mode
154 pnv_enter_arch207_idle_mode:
155 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
156 /* Tell KVM we're entering idle */
157 li r4,KVM_HWTHREAD_IN_IDLE
158 /******************************************************/
159 /* N O T E W E L L ! ! ! N O T E W E L L */
160 /* The following store to HSTATE_HWTHREAD_STATE(r13) */
161 /* MUST occur in real mode, i.e. with the MMU off, */
162 /* and the MMU must stay off until we clear this flag */
163 /* and test HSTATE_HWTHREAD_REQ(r13) in */
164 /* pnv_powersave_wakeup in this file. */
165 /* The reason is that another thread can switch the */
166 /* MMU to a guest context whenever this flag is set */
167 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
168 /* that would potentially cause this thread to start */
169 /* executing instructions from guest memory in */
170 /* hypervisor mode, leading to a host crash or data */
171 /* corruption, or worse. */
172 /******************************************************/
173 stb r4,HSTATE_HWTHREAD_STATE(r13)
175 stb r3,PACA_THREAD_IDLE_STATE(r13)
176 cmpwi cr3,r3,PNV_THREAD_SLEEP
178 IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
181 /* Sleep or winkle */
182 lbz r7,PACA_THREAD_MASK(r13)
183 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
186 lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h
191 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
192 bnel- core_idle_lock_held
194 add r15,r15,r5 /* Add if winkle */
195 andc r15,r15,r7 /* Clear thread bit */
197 andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
200 * If cr0 = 0, then current thread is the last thread of the core entering
201 * sleep. Last thread needs to execute the hardware bug workaround code if
202 * required by the platform.
203 * Make the workaround call unconditionally here. The below branch call is
204 * patched out when the idle states are discovered if the platform does not
207 .global pnv_fastsleep_workaround_at_entry
208 pnv_fastsleep_workaround_at_entry:
209 beq fastsleep_workaround_at_entry
215 common_enter: /* common code for all the threads entering sleep or winkle */
217 IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
219 fastsleep_workaround_at_entry:
220 oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
225 /* Fast sleep workaround */
228 bl opal_config_cpu_idle_state
231 xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
237 bl save_sprs_to_stack
239 IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
242 * r3 - PSSCR value corresponding to the requested stop state.
245 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
246 /* Tell KVM we're entering idle */
247 li r4,KVM_HWTHREAD_IN_IDLE
248 /* DO THIS IN REAL MODE! See comment above. */
249 stb r4,HSTATE_HWTHREAD_STATE(r13)
252 * Check if we are executing the lite variant with ESL=EC=0
254 andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
255 clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
256 bne .Lhandle_esl_ec_set
257 IDLE_STATE_ENTER_SEQ(PPC_STOP)
258 li r3,0 /* Since we didn't lose state, return 0 */
261 * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
262 * it can determine if the wakeup reason is an HMI in
263 * CHECK_HMI_INTERRUPT.
265 * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
266 * reason, so there is no point setting r12 to SRR1.
268 * Further, we clear r12 here, so that we don't accidentally enter the
269 * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
276 * Check if the requested state is a deep idle state.
278 LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
279 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
281 bge .Lhandle_deep_stop
282 IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
285 * Entering deep idle state.
286 * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
287 * stack and enter stop
289 lbz r7,PACA_THREAD_MASK(r13)
290 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
294 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
295 bnel- core_idle_lock_held
296 andc r15,r15,r7 /* Clear thread bit */
302 bl save_sprs_to_stack
304 IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
307 * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
308 * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE).
310 _GLOBAL(power7_idle_insn)
311 /* Now check if user or arch enabled NAP mode */
312 LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode)
313 b pnv_powersave_common
315 #define CHECK_HMI_INTERRUPT \
316 BEGIN_FTR_SECTION_NESTED(66); \
317 rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \
318 FTR_SECTION_ELSE_NESTED(66); \
319 rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \
320 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
321 cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
323 /* Invoke opal call to handle hmi */ \
324 ld r2,PACATOC(r13); \
326 std r3,ORIG_GPR3(r1); /* Save original r3 */ \
327 li r3,0; /* NULL argument */ \
328 bl hmi_exception_realmode; \
330 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
334 * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
335 * r3 contains desired PSSCR register value.
337 _GLOBAL(power9_idle_stop)
338 std r3, PACA_REQ_PSSCR(r13)
340 LOAD_REG_ADDR(r4,power_enter_stop)
341 b pnv_powersave_common
345 * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
346 * HSPRG0 will be set to the HSPRG0 value of one of the
347 * threads in this core. Thus the value we have in r13
348 * may not be this thread's paca pointer.
350 * Fortunately, the TIR remains invariant. Since this thread's
351 * paca pointer is recorded in all its sibling's paca, we can
352 * correctly recover this thread's paca pointer if we
353 * know the index of this thread in the core.
355 * This index can be obtained from the TIR.
357 * i.e, thread's position in the core = TIR.
358 * If this value is i, then this thread's paca is
359 * paca->thread_sibling_pacas[i].
361 power9_dd1_recover_paca:
364 * Since each entry in thread_sibling_pacas is 8 bytes
365 * we need to left-shift by 3 bits. Thus r4 = i * 8
368 /* Get &paca->thread_sibling_pacas[0] in r5 */
369 ld r5, PACA_SIBLING_PACA_PTRS(r13)
370 /* Load paca->thread_sibling_pacas[i] into r13 */
374 * Indicate that we have lost NVGPR state
375 * which needs to be restored from the stack.
378 stb r3,PACA_NAPSTATELOST(r13)
382 * Called from machine check handler for powersave wakeups.
383 * Low level machine check processing has already been done. Now just
384 * go through the wake up path to get everything in order.
386 * r3 - The original SRR1 value.
387 * Original SRR[01] have been clobbered.
390 .global pnv_powersave_wakeup_mce
391 pnv_powersave_wakeup_mce:
392 /* Set cr3 for pnv_powersave_wakeup */
393 rlwinm r11,r3,47-31,30,31
397 * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
398 * reason into r12, which allows reuse of the system reset wakeup
399 * code without being mistaken for another type of wakeup.
401 oris r12,r3,SRR1_WAKEMCE_RESVD@h
403 b pnv_powersave_wakeup
406 * Called from reset vector for powersave wakeups.
407 * cr3 - set to gt if waking up with partial/complete hypervisor state loss
410 .global pnv_powersave_wakeup
411 pnv_powersave_wakeup:
415 BEGIN_FTR_SECTION_NESTED(70)
416 bl power9_dd1_recover_paca
417 END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70)
418 bl pnv_restore_hyp_resource_arch300
420 bl pnv_restore_hyp_resource_arch207
421 ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
423 li r0,PNV_THREAD_RUNNING
424 stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
428 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
429 li r0,KVM_HWTHREAD_IN_KERNEL
430 stb r0,HSTATE_HWTHREAD_STATE(r13)
431 /* Order setting hwthread_state vs. testing hwthread_req */
433 lbz r0,HSTATE_HWTHREAD_REQ(r13)
440 /* Return SRR1 from power7_nap() */
441 blt cr3,pnv_wakeup_noloss
445 * Check whether we have woken up with hypervisor state loss.
446 * If yes, restore hypervisor state and return back to link.
448 * cr3 - set to gt if waking up with partial/complete hypervisor state loss
450 pnv_restore_hyp_resource_arch300:
452 * Workaround for POWER9, if we lost resources, the ERAT
453 * might have been mixed up and needs flushing.
459 * POWER ISA 3. Use PSSCR to determine if we
460 * are waking up from deep idle state
462 LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
463 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
465 BEGIN_FTR_SECTION_NESTED(71)
467 * Assume that we are waking up from the state
468 * same as the Requested Level (RL) in the PSSCR
469 * which are Bits 60-63
471 ld r5,PACA_REQ_PSSCR(r13)
473 FTR_SECTION_ELSE_NESTED(71)
475 * 0-3 bits correspond to Power-Saving Level Status
476 * which indicates the idle state we are waking up from
480 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
482 bge cr4,pnv_wakeup_tb_loss /* returns to caller */
484 blr /* Waking up without hypervisor state loss. */
486 /* Same calling convention as arch300 */
487 pnv_restore_hyp_resource_arch207:
489 * POWER ISA 2.07 or less.
490 * Check if we slept with sleep or winkle.
492 lbz r4,PACA_THREAD_IDLE_STATE(r13)
493 cmpwi cr2,r4,PNV_THREAD_NAP
494 bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
497 * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
498 * up from nap. At this stage CR3 shouldn't contains 'gt' since that
499 * indicates we are waking with hypervisor state loss from nap.
503 blr /* Waking up without hypervisor state loss */
506 * Called if waking up from idle state which can cause either partial or
507 * complete hyp state loss.
508 * In POWER8, called if waking up from fastsleep or winkle
509 * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state
512 * cr3 - gt if waking up with partial/complete hypervisor state loss
515 * cr4 - gt or eq if waking up from complete hypervisor state loss.
518 * r4 - PACA_THREAD_IDLE_STATE
523 * Before entering any idle state, the NVGPRs are saved in the stack.
524 * If there was a state loss, or PACA_NAPSTATELOST was set, then the
525 * NVGPRs are restored. If we are here, it is likely that state is lost,
526 * but not guaranteed -- neither ISA207 nor ISA300 tests to reach
527 * here are the same as the test to restore NVGPRS:
528 * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300,
529 * and SRR1 test for restoring NVGPRs.
531 * We are about to clobber NVGPRs now, so set NAPSTATELOST to
532 * guarantee they will always be restored. This might be tightened
533 * with careful reading of specs (particularly for ISA300) but this
534 * is already a slow wakeup path and it's simpler to be safe.
537 stb r0,PACA_NAPSTATELOST(r13)
541 * Save SRR1 and LR in NVGPRs as they might be clobbered in
542 * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
543 * to determine the wakeup reason if we branch to kvm_start_guest. LR
544 * is required to return back to reset vector after hypervisor state
545 * restore is complete.
552 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
554 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
555 lbz r7,PACA_THREAD_MASK(r13)
558 * Take the core lock to synchronize against other threads.
560 * Lock bit is set in one of the 2 cases-
561 * a. In the sleep/winkle enter path, the last thread is executing
562 * fastsleep workaround code.
563 * b. In the wake up path, another thread is executing fastsleep
564 * workaround undo code or resyncing timebase or restoring context
565 * In either case loop until the lock bit is cleared.
569 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
570 bnel- core_idle_lock_held
571 oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
576 andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
581 * cr2 - eq if first thread to wakeup in core
582 * cr3- gt if waking up with partial/complete hypervisor state loss
584 * cr4 - gt or eq if waking up from complete hypervisor state loss.
590 * If yes, check if all threads were in winkle, decrement our
591 * winkle count, set all thread winkle bits if all were in winkle.
592 * Check if our thread has a winkle bit set, and set cr4 accordingly
593 * (to match ISA300, above). Pseudo-code for core idle state
594 * transitions for ISA207 is as follows (everything happens atomically
595 * due to store conditional and/or lock bit):
602 * core_idle_state &= ~thread_in_core
607 * bool first_in_core, first_in_subcore;
609 * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
610 * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
612 * core_idle_state |= thread_in_core;
617 * core_idle_state &= ~thread_in_core;
618 * core_idle_state += 1 << WINKLE_COUNT_SHIFT;
623 * bool first_in_core, first_in_subcore, winkle_state_lost;
625 * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
626 * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
628 * core_idle_state |= thread_in_core;
630 * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
631 * core_idle_state |= THREAD_WINKLE_BITS;
632 * core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
634 * winkle_state_lost = core_idle_state &
635 * (thread_in_core << WINKLE_THREAD_SHIFT);
636 * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
640 cmpwi r18,PNV_THREAD_WINKLE
642 andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
643 subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
645 ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
647 /* Shift thread bit to winkle mask, then test if this thread is set,
648 * and remove it from the winkle bits */
652 cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
654 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
656 cmpwi r4,0 /* Check if first in subcore */
658 or r15,r15,r7 /* Set thread bit */
659 beq first_thread_in_subcore
660 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
662 or r15,r15,r7 /* Set thread bit */
663 beq cr2,first_thread_in_core
665 /* Not first thread in core or subcore to wake up */
668 first_thread_in_subcore:
670 * If waking up from sleep, subcore state is not lost. Hence
671 * skip subcore state restore
673 blt cr4,subcore_state_restored
675 /* Restore per-subcore state */
684 subcore_state_restored:
686 * Check if the thread is also the first thread in the core. If not,
687 * skip to clear_lock.
691 first_thread_in_core:
694 * First thread in the core waking up from any state which can cause
695 * partial or complete hypervisor state loss. It needs to
696 * call the fastsleep workaround code if the platform requires it.
697 * Call it unconditionally here. The below branch instruction will
698 * be patched out if the platform does not have fastsleep or does not
699 * require the workaround. Patching will be performed during the
700 * discovery of idle-states.
702 .global pnv_fastsleep_workaround_at_exit
703 pnv_fastsleep_workaround_at_exit:
704 b fastsleep_workaround_at_exit
708 * Use cr3 which indicates that we are waking up with atleast partial
709 * hypervisor state loss to determine if TIMEBASE RESYNC is needed.
711 ble cr3,.Ltb_resynced
712 /* Time base re-sync */
713 bl opal_resync_timebase;
715 * If waking up from sleep (POWER8), per core state
716 * is not lost, skip to clear_lock.
722 * First thread in the core to wake up and its waking up with
723 * complete hypervisor state loss. Restore per core hypervisor
731 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
739 xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
745 * Common to all threads.
747 * If waking up from sleep, hypervisor state is not lost. Hence
748 * skip hypervisor state restore.
750 blt cr4,hypervisor_state_restored
752 /* Waking up from winkle */
754 BEGIN_MMU_FTR_SECTION
756 END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
757 /* Restore SLB from PACA */
758 ld r8,PACA_SLBSHADOWPTR(r13)
761 li r3, SLBSHADOW_SAVEAREA
765 andis. r7,r5,SLB_ESID_V@h
772 /* Restore per thread state */
783 /* Call cur_cpu_spec->cpu_restore() */
784 LOAD_REG_ADDR(r4, cur_cpu_spec)
786 ld r12,CPU_SPEC_RESTORE(r4)
787 #ifdef PPC64_ELF_ABI_v1
796 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
797 hypervisor_state_restored:
801 blr /* return to pnv_powersave_wakeup */
803 fastsleep_workaround_at_exit:
806 bl opal_config_cpu_idle_state
810 * R3 here contains the value that will be returned to the caller
812 * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
814 .global pnv_wakeup_loss
819 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
825 addi r1,r1,INT_FRAME_SIZE
832 * R3 here contains the value that will be returned to the caller
834 * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
837 lbz r0,PACA_NAPSTATELOST(r13)
843 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
847 addi r1,r1,INT_FRAME_SIZE