]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/idle/intel_idle.c
intel_idle: Use ACPI _CST for processor models without C-state tables
[linux.git] / drivers / idle / intel_idle.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  */
8
9 /*
10  * intel_idle is a cpuidle driver that loads on specific Intel processors
11  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
12  * make Linux more efficient on these processors, as intel_idle knows
13  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
14  */
15
16 /*
17  * Design Assumptions
18  *
19  * All CPUs have same idle states as boot CPU
20  *
21  * Chipset BM_STS (bus master status) bit is a NOP
22  *      for preventing entry into deep C-stats
23  */
24
25 /*
26  * Known limitations
27  *
28  * The driver currently initializes for_each_online_cpu() upon modprobe.
29  * It it unaware of subsequent processors hot-added to the system.
30  * This means that if you boot with maxcpus=n and later online
31  * processors above n, those processors will use C1 only.
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 #define DEBUG
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/notifier.h>
51 #include <linux/cpu.h>
52 #include <linux/moduleparam.h>
53 #include <asm/cpu_device_id.h>
54 #include <asm/intel-family.h>
55 #include <asm/mwait.h>
56 #include <asm/msr.h>
57
58 #define INTEL_IDLE_VERSION "0.4.1"
59
60 static struct cpuidle_driver intel_idle_driver = {
61         .name = "intel_idle",
62         .owner = THIS_MODULE,
63 };
64 /* intel_idle.max_cstate=0 disables driver */
65 static int max_cstate = CPUIDLE_STATE_MAX - 1;
66
67 static unsigned int mwait_substates;
68
69 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
70 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
71 static unsigned int lapic_timer_reliable_states = (1 << 1);      /* Default to only C1 */
72
73 struct idle_cpu {
74         struct cpuidle_state *state_table;
75
76         /*
77          * Hardware C-state auto-demotion may not always be optimal.
78          * Indicate which enable bits to clear here.
79          */
80         unsigned long auto_demotion_disable_flags;
81         bool byt_auto_demotion_disable_flag;
82         bool disable_promotion_to_c1e;
83 };
84
85 static const struct idle_cpu *icpu;
86 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
87 static int intel_idle(struct cpuidle_device *dev,
88                         struct cpuidle_driver *drv, int index);
89 static void intel_idle_s2idle(struct cpuidle_device *dev,
90                               struct cpuidle_driver *drv, int index);
91 static struct cpuidle_state *cpuidle_state_table;
92
93 /*
94  * Set this flag for states where the HW flushes the TLB for us
95  * and so we don't need cross-calls to keep it consistent.
96  * If this flag is set, SW flushes the TLB, so even if the
97  * HW doesn't do the flushing, this flag is safe to use.
98  */
99 #define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
100
101 /*
102  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
103  * the C-state (top nibble) and sub-state (bottom nibble)
104  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
105  *
106  * We store the hint at the top of our "flags" for each state.
107  */
108 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
109 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
110
111 /*
112  * States are indexed by the cstate number,
113  * which is also the index into the MWAIT hint array.
114  * Thus C0 is a dummy.
115  */
116 static struct cpuidle_state nehalem_cstates[] = {
117         {
118                 .name = "C1",
119                 .desc = "MWAIT 0x00",
120                 .flags = MWAIT2flg(0x00),
121                 .exit_latency = 3,
122                 .target_residency = 6,
123                 .enter = &intel_idle,
124                 .enter_s2idle = intel_idle_s2idle, },
125         {
126                 .name = "C1E",
127                 .desc = "MWAIT 0x01",
128                 .flags = MWAIT2flg(0x01),
129                 .exit_latency = 10,
130                 .target_residency = 20,
131                 .enter = &intel_idle,
132                 .enter_s2idle = intel_idle_s2idle, },
133         {
134                 .name = "C3",
135                 .desc = "MWAIT 0x10",
136                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
137                 .exit_latency = 20,
138                 .target_residency = 80,
139                 .enter = &intel_idle,
140                 .enter_s2idle = intel_idle_s2idle, },
141         {
142                 .name = "C6",
143                 .desc = "MWAIT 0x20",
144                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
145                 .exit_latency = 200,
146                 .target_residency = 800,
147                 .enter = &intel_idle,
148                 .enter_s2idle = intel_idle_s2idle, },
149         {
150                 .enter = NULL }
151 };
152
153 static struct cpuidle_state snb_cstates[] = {
154         {
155                 .name = "C1",
156                 .desc = "MWAIT 0x00",
157                 .flags = MWAIT2flg(0x00),
158                 .exit_latency = 2,
159                 .target_residency = 2,
160                 .enter = &intel_idle,
161                 .enter_s2idle = intel_idle_s2idle, },
162         {
163                 .name = "C1E",
164                 .desc = "MWAIT 0x01",
165                 .flags = MWAIT2flg(0x01),
166                 .exit_latency = 10,
167                 .target_residency = 20,
168                 .enter = &intel_idle,
169                 .enter_s2idle = intel_idle_s2idle, },
170         {
171                 .name = "C3",
172                 .desc = "MWAIT 0x10",
173                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
174                 .exit_latency = 80,
175                 .target_residency = 211,
176                 .enter = &intel_idle,
177                 .enter_s2idle = intel_idle_s2idle, },
178         {
179                 .name = "C6",
180                 .desc = "MWAIT 0x20",
181                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
182                 .exit_latency = 104,
183                 .target_residency = 345,
184                 .enter = &intel_idle,
185                 .enter_s2idle = intel_idle_s2idle, },
186         {
187                 .name = "C7",
188                 .desc = "MWAIT 0x30",
189                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
190                 .exit_latency = 109,
191                 .target_residency = 345,
192                 .enter = &intel_idle,
193                 .enter_s2idle = intel_idle_s2idle, },
194         {
195                 .enter = NULL }
196 };
197
198 static struct cpuidle_state byt_cstates[] = {
199         {
200                 .name = "C1",
201                 .desc = "MWAIT 0x00",
202                 .flags = MWAIT2flg(0x00),
203                 .exit_latency = 1,
204                 .target_residency = 1,
205                 .enter = &intel_idle,
206                 .enter_s2idle = intel_idle_s2idle, },
207         {
208                 .name = "C6N",
209                 .desc = "MWAIT 0x58",
210                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
211                 .exit_latency = 300,
212                 .target_residency = 275,
213                 .enter = &intel_idle,
214                 .enter_s2idle = intel_idle_s2idle, },
215         {
216                 .name = "C6S",
217                 .desc = "MWAIT 0x52",
218                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
219                 .exit_latency = 500,
220                 .target_residency = 560,
221                 .enter = &intel_idle,
222                 .enter_s2idle = intel_idle_s2idle, },
223         {
224                 .name = "C7",
225                 .desc = "MWAIT 0x60",
226                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
227                 .exit_latency = 1200,
228                 .target_residency = 4000,
229                 .enter = &intel_idle,
230                 .enter_s2idle = intel_idle_s2idle, },
231         {
232                 .name = "C7S",
233                 .desc = "MWAIT 0x64",
234                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
235                 .exit_latency = 10000,
236                 .target_residency = 20000,
237                 .enter = &intel_idle,
238                 .enter_s2idle = intel_idle_s2idle, },
239         {
240                 .enter = NULL }
241 };
242
243 static struct cpuidle_state cht_cstates[] = {
244         {
245                 .name = "C1",
246                 .desc = "MWAIT 0x00",
247                 .flags = MWAIT2flg(0x00),
248                 .exit_latency = 1,
249                 .target_residency = 1,
250                 .enter = &intel_idle,
251                 .enter_s2idle = intel_idle_s2idle, },
252         {
253                 .name = "C6N",
254                 .desc = "MWAIT 0x58",
255                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
256                 .exit_latency = 80,
257                 .target_residency = 275,
258                 .enter = &intel_idle,
259                 .enter_s2idle = intel_idle_s2idle, },
260         {
261                 .name = "C6S",
262                 .desc = "MWAIT 0x52",
263                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
264                 .exit_latency = 200,
265                 .target_residency = 560,
266                 .enter = &intel_idle,
267                 .enter_s2idle = intel_idle_s2idle, },
268         {
269                 .name = "C7",
270                 .desc = "MWAIT 0x60",
271                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
272                 .exit_latency = 1200,
273                 .target_residency = 4000,
274                 .enter = &intel_idle,
275                 .enter_s2idle = intel_idle_s2idle, },
276         {
277                 .name = "C7S",
278                 .desc = "MWAIT 0x64",
279                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
280                 .exit_latency = 10000,
281                 .target_residency = 20000,
282                 .enter = &intel_idle,
283                 .enter_s2idle = intel_idle_s2idle, },
284         {
285                 .enter = NULL }
286 };
287
288 static struct cpuidle_state ivb_cstates[] = {
289         {
290                 .name = "C1",
291                 .desc = "MWAIT 0x00",
292                 .flags = MWAIT2flg(0x00),
293                 .exit_latency = 1,
294                 .target_residency = 1,
295                 .enter = &intel_idle,
296                 .enter_s2idle = intel_idle_s2idle, },
297         {
298                 .name = "C1E",
299                 .desc = "MWAIT 0x01",
300                 .flags = MWAIT2flg(0x01),
301                 .exit_latency = 10,
302                 .target_residency = 20,
303                 .enter = &intel_idle,
304                 .enter_s2idle = intel_idle_s2idle, },
305         {
306                 .name = "C3",
307                 .desc = "MWAIT 0x10",
308                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
309                 .exit_latency = 59,
310                 .target_residency = 156,
311                 .enter = &intel_idle,
312                 .enter_s2idle = intel_idle_s2idle, },
313         {
314                 .name = "C6",
315                 .desc = "MWAIT 0x20",
316                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
317                 .exit_latency = 80,
318                 .target_residency = 300,
319                 .enter = &intel_idle,
320                 .enter_s2idle = intel_idle_s2idle, },
321         {
322                 .name = "C7",
323                 .desc = "MWAIT 0x30",
324                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
325                 .exit_latency = 87,
326                 .target_residency = 300,
327                 .enter = &intel_idle,
328                 .enter_s2idle = intel_idle_s2idle, },
329         {
330                 .enter = NULL }
331 };
332
333 static struct cpuidle_state ivt_cstates[] = {
334         {
335                 .name = "C1",
336                 .desc = "MWAIT 0x00",
337                 .flags = MWAIT2flg(0x00),
338                 .exit_latency = 1,
339                 .target_residency = 1,
340                 .enter = &intel_idle,
341                 .enter_s2idle = intel_idle_s2idle, },
342         {
343                 .name = "C1E",
344                 .desc = "MWAIT 0x01",
345                 .flags = MWAIT2flg(0x01),
346                 .exit_latency = 10,
347                 .target_residency = 80,
348                 .enter = &intel_idle,
349                 .enter_s2idle = intel_idle_s2idle, },
350         {
351                 .name = "C3",
352                 .desc = "MWAIT 0x10",
353                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
354                 .exit_latency = 59,
355                 .target_residency = 156,
356                 .enter = &intel_idle,
357                 .enter_s2idle = intel_idle_s2idle, },
358         {
359                 .name = "C6",
360                 .desc = "MWAIT 0x20",
361                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
362                 .exit_latency = 82,
363                 .target_residency = 300,
364                 .enter = &intel_idle,
365                 .enter_s2idle = intel_idle_s2idle, },
366         {
367                 .enter = NULL }
368 };
369
370 static struct cpuidle_state ivt_cstates_4s[] = {
371         {
372                 .name = "C1",
373                 .desc = "MWAIT 0x00",
374                 .flags = MWAIT2flg(0x00),
375                 .exit_latency = 1,
376                 .target_residency = 1,
377                 .enter = &intel_idle,
378                 .enter_s2idle = intel_idle_s2idle, },
379         {
380                 .name = "C1E",
381                 .desc = "MWAIT 0x01",
382                 .flags = MWAIT2flg(0x01),
383                 .exit_latency = 10,
384                 .target_residency = 250,
385                 .enter = &intel_idle,
386                 .enter_s2idle = intel_idle_s2idle, },
387         {
388                 .name = "C3",
389                 .desc = "MWAIT 0x10",
390                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
391                 .exit_latency = 59,
392                 .target_residency = 300,
393                 .enter = &intel_idle,
394                 .enter_s2idle = intel_idle_s2idle, },
395         {
396                 .name = "C6",
397                 .desc = "MWAIT 0x20",
398                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
399                 .exit_latency = 84,
400                 .target_residency = 400,
401                 .enter = &intel_idle,
402                 .enter_s2idle = intel_idle_s2idle, },
403         {
404                 .enter = NULL }
405 };
406
407 static struct cpuidle_state ivt_cstates_8s[] = {
408         {
409                 .name = "C1",
410                 .desc = "MWAIT 0x00",
411                 .flags = MWAIT2flg(0x00),
412                 .exit_latency = 1,
413                 .target_residency = 1,
414                 .enter = &intel_idle,
415                 .enter_s2idle = intel_idle_s2idle, },
416         {
417                 .name = "C1E",
418                 .desc = "MWAIT 0x01",
419                 .flags = MWAIT2flg(0x01),
420                 .exit_latency = 10,
421                 .target_residency = 500,
422                 .enter = &intel_idle,
423                 .enter_s2idle = intel_idle_s2idle, },
424         {
425                 .name = "C3",
426                 .desc = "MWAIT 0x10",
427                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
428                 .exit_latency = 59,
429                 .target_residency = 600,
430                 .enter = &intel_idle,
431                 .enter_s2idle = intel_idle_s2idle, },
432         {
433                 .name = "C6",
434                 .desc = "MWAIT 0x20",
435                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
436                 .exit_latency = 88,
437                 .target_residency = 700,
438                 .enter = &intel_idle,
439                 .enter_s2idle = intel_idle_s2idle, },
440         {
441                 .enter = NULL }
442 };
443
444 static struct cpuidle_state hsw_cstates[] = {
445         {
446                 .name = "C1",
447                 .desc = "MWAIT 0x00",
448                 .flags = MWAIT2flg(0x00),
449                 .exit_latency = 2,
450                 .target_residency = 2,
451                 .enter = &intel_idle,
452                 .enter_s2idle = intel_idle_s2idle, },
453         {
454                 .name = "C1E",
455                 .desc = "MWAIT 0x01",
456                 .flags = MWAIT2flg(0x01),
457                 .exit_latency = 10,
458                 .target_residency = 20,
459                 .enter = &intel_idle,
460                 .enter_s2idle = intel_idle_s2idle, },
461         {
462                 .name = "C3",
463                 .desc = "MWAIT 0x10",
464                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
465                 .exit_latency = 33,
466                 .target_residency = 100,
467                 .enter = &intel_idle,
468                 .enter_s2idle = intel_idle_s2idle, },
469         {
470                 .name = "C6",
471                 .desc = "MWAIT 0x20",
472                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
473                 .exit_latency = 133,
474                 .target_residency = 400,
475                 .enter = &intel_idle,
476                 .enter_s2idle = intel_idle_s2idle, },
477         {
478                 .name = "C7s",
479                 .desc = "MWAIT 0x32",
480                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
481                 .exit_latency = 166,
482                 .target_residency = 500,
483                 .enter = &intel_idle,
484                 .enter_s2idle = intel_idle_s2idle, },
485         {
486                 .name = "C8",
487                 .desc = "MWAIT 0x40",
488                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
489                 .exit_latency = 300,
490                 .target_residency = 900,
491                 .enter = &intel_idle,
492                 .enter_s2idle = intel_idle_s2idle, },
493         {
494                 .name = "C9",
495                 .desc = "MWAIT 0x50",
496                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
497                 .exit_latency = 600,
498                 .target_residency = 1800,
499                 .enter = &intel_idle,
500                 .enter_s2idle = intel_idle_s2idle, },
501         {
502                 .name = "C10",
503                 .desc = "MWAIT 0x60",
504                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
505                 .exit_latency = 2600,
506                 .target_residency = 7700,
507                 .enter = &intel_idle,
508                 .enter_s2idle = intel_idle_s2idle, },
509         {
510                 .enter = NULL }
511 };
512 static struct cpuidle_state bdw_cstates[] = {
513         {
514                 .name = "C1",
515                 .desc = "MWAIT 0x00",
516                 .flags = MWAIT2flg(0x00),
517                 .exit_latency = 2,
518                 .target_residency = 2,
519                 .enter = &intel_idle,
520                 .enter_s2idle = intel_idle_s2idle, },
521         {
522                 .name = "C1E",
523                 .desc = "MWAIT 0x01",
524                 .flags = MWAIT2flg(0x01),
525                 .exit_latency = 10,
526                 .target_residency = 20,
527                 .enter = &intel_idle,
528                 .enter_s2idle = intel_idle_s2idle, },
529         {
530                 .name = "C3",
531                 .desc = "MWAIT 0x10",
532                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
533                 .exit_latency = 40,
534                 .target_residency = 100,
535                 .enter = &intel_idle,
536                 .enter_s2idle = intel_idle_s2idle, },
537         {
538                 .name = "C6",
539                 .desc = "MWAIT 0x20",
540                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
541                 .exit_latency = 133,
542                 .target_residency = 400,
543                 .enter = &intel_idle,
544                 .enter_s2idle = intel_idle_s2idle, },
545         {
546                 .name = "C7s",
547                 .desc = "MWAIT 0x32",
548                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
549                 .exit_latency = 166,
550                 .target_residency = 500,
551                 .enter = &intel_idle,
552                 .enter_s2idle = intel_idle_s2idle, },
553         {
554                 .name = "C8",
555                 .desc = "MWAIT 0x40",
556                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
557                 .exit_latency = 300,
558                 .target_residency = 900,
559                 .enter = &intel_idle,
560                 .enter_s2idle = intel_idle_s2idle, },
561         {
562                 .name = "C9",
563                 .desc = "MWAIT 0x50",
564                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
565                 .exit_latency = 600,
566                 .target_residency = 1800,
567                 .enter = &intel_idle,
568                 .enter_s2idle = intel_idle_s2idle, },
569         {
570                 .name = "C10",
571                 .desc = "MWAIT 0x60",
572                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
573                 .exit_latency = 2600,
574                 .target_residency = 7700,
575                 .enter = &intel_idle,
576                 .enter_s2idle = intel_idle_s2idle, },
577         {
578                 .enter = NULL }
579 };
580
581 static struct cpuidle_state skl_cstates[] = {
582         {
583                 .name = "C1",
584                 .desc = "MWAIT 0x00",
585                 .flags = MWAIT2flg(0x00),
586                 .exit_latency = 2,
587                 .target_residency = 2,
588                 .enter = &intel_idle,
589                 .enter_s2idle = intel_idle_s2idle, },
590         {
591                 .name = "C1E",
592                 .desc = "MWAIT 0x01",
593                 .flags = MWAIT2flg(0x01),
594                 .exit_latency = 10,
595                 .target_residency = 20,
596                 .enter = &intel_idle,
597                 .enter_s2idle = intel_idle_s2idle, },
598         {
599                 .name = "C3",
600                 .desc = "MWAIT 0x10",
601                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
602                 .exit_latency = 70,
603                 .target_residency = 100,
604                 .enter = &intel_idle,
605                 .enter_s2idle = intel_idle_s2idle, },
606         {
607                 .name = "C6",
608                 .desc = "MWAIT 0x20",
609                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
610                 .exit_latency = 85,
611                 .target_residency = 200,
612                 .enter = &intel_idle,
613                 .enter_s2idle = intel_idle_s2idle, },
614         {
615                 .name = "C7s",
616                 .desc = "MWAIT 0x33",
617                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
618                 .exit_latency = 124,
619                 .target_residency = 800,
620                 .enter = &intel_idle,
621                 .enter_s2idle = intel_idle_s2idle, },
622         {
623                 .name = "C8",
624                 .desc = "MWAIT 0x40",
625                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
626                 .exit_latency = 200,
627                 .target_residency = 800,
628                 .enter = &intel_idle,
629                 .enter_s2idle = intel_idle_s2idle, },
630         {
631                 .name = "C9",
632                 .desc = "MWAIT 0x50",
633                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
634                 .exit_latency = 480,
635                 .target_residency = 5000,
636                 .enter = &intel_idle,
637                 .enter_s2idle = intel_idle_s2idle, },
638         {
639                 .name = "C10",
640                 .desc = "MWAIT 0x60",
641                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
642                 .exit_latency = 890,
643                 .target_residency = 5000,
644                 .enter = &intel_idle,
645                 .enter_s2idle = intel_idle_s2idle, },
646         {
647                 .enter = NULL }
648 };
649
650 static struct cpuidle_state skx_cstates[] = {
651         {
652                 .name = "C1",
653                 .desc = "MWAIT 0x00",
654                 .flags = MWAIT2flg(0x00),
655                 .exit_latency = 2,
656                 .target_residency = 2,
657                 .enter = &intel_idle,
658                 .enter_s2idle = intel_idle_s2idle, },
659         {
660                 .name = "C1E",
661                 .desc = "MWAIT 0x01",
662                 .flags = MWAIT2flg(0x01),
663                 .exit_latency = 10,
664                 .target_residency = 20,
665                 .enter = &intel_idle,
666                 .enter_s2idle = intel_idle_s2idle, },
667         {
668                 .name = "C6",
669                 .desc = "MWAIT 0x20",
670                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
671                 .exit_latency = 133,
672                 .target_residency = 600,
673                 .enter = &intel_idle,
674                 .enter_s2idle = intel_idle_s2idle, },
675         {
676                 .enter = NULL }
677 };
678
679 static struct cpuidle_state atom_cstates[] = {
680         {
681                 .name = "C1E",
682                 .desc = "MWAIT 0x00",
683                 .flags = MWAIT2flg(0x00),
684                 .exit_latency = 10,
685                 .target_residency = 20,
686                 .enter = &intel_idle,
687                 .enter_s2idle = intel_idle_s2idle, },
688         {
689                 .name = "C2",
690                 .desc = "MWAIT 0x10",
691                 .flags = MWAIT2flg(0x10),
692                 .exit_latency = 20,
693                 .target_residency = 80,
694                 .enter = &intel_idle,
695                 .enter_s2idle = intel_idle_s2idle, },
696         {
697                 .name = "C4",
698                 .desc = "MWAIT 0x30",
699                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
700                 .exit_latency = 100,
701                 .target_residency = 400,
702                 .enter = &intel_idle,
703                 .enter_s2idle = intel_idle_s2idle, },
704         {
705                 .name = "C6",
706                 .desc = "MWAIT 0x52",
707                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
708                 .exit_latency = 140,
709                 .target_residency = 560,
710                 .enter = &intel_idle,
711                 .enter_s2idle = intel_idle_s2idle, },
712         {
713                 .enter = NULL }
714 };
715 static struct cpuidle_state tangier_cstates[] = {
716         {
717                 .name = "C1",
718                 .desc = "MWAIT 0x00",
719                 .flags = MWAIT2flg(0x00),
720                 .exit_latency = 1,
721                 .target_residency = 4,
722                 .enter = &intel_idle,
723                 .enter_s2idle = intel_idle_s2idle, },
724         {
725                 .name = "C4",
726                 .desc = "MWAIT 0x30",
727                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
728                 .exit_latency = 100,
729                 .target_residency = 400,
730                 .enter = &intel_idle,
731                 .enter_s2idle = intel_idle_s2idle, },
732         {
733                 .name = "C6",
734                 .desc = "MWAIT 0x52",
735                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
736                 .exit_latency = 140,
737                 .target_residency = 560,
738                 .enter = &intel_idle,
739                 .enter_s2idle = intel_idle_s2idle, },
740         {
741                 .name = "C7",
742                 .desc = "MWAIT 0x60",
743                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
744                 .exit_latency = 1200,
745                 .target_residency = 4000,
746                 .enter = &intel_idle,
747                 .enter_s2idle = intel_idle_s2idle, },
748         {
749                 .name = "C9",
750                 .desc = "MWAIT 0x64",
751                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
752                 .exit_latency = 10000,
753                 .target_residency = 20000,
754                 .enter = &intel_idle,
755                 .enter_s2idle = intel_idle_s2idle, },
756         {
757                 .enter = NULL }
758 };
759 static struct cpuidle_state avn_cstates[] = {
760         {
761                 .name = "C1",
762                 .desc = "MWAIT 0x00",
763                 .flags = MWAIT2flg(0x00),
764                 .exit_latency = 2,
765                 .target_residency = 2,
766                 .enter = &intel_idle,
767                 .enter_s2idle = intel_idle_s2idle, },
768         {
769                 .name = "C6",
770                 .desc = "MWAIT 0x51",
771                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
772                 .exit_latency = 15,
773                 .target_residency = 45,
774                 .enter = &intel_idle,
775                 .enter_s2idle = intel_idle_s2idle, },
776         {
777                 .enter = NULL }
778 };
779 static struct cpuidle_state knl_cstates[] = {
780         {
781                 .name = "C1",
782                 .desc = "MWAIT 0x00",
783                 .flags = MWAIT2flg(0x00),
784                 .exit_latency = 1,
785                 .target_residency = 2,
786                 .enter = &intel_idle,
787                 .enter_s2idle = intel_idle_s2idle },
788         {
789                 .name = "C6",
790                 .desc = "MWAIT 0x10",
791                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
792                 .exit_latency = 120,
793                 .target_residency = 500,
794                 .enter = &intel_idle,
795                 .enter_s2idle = intel_idle_s2idle },
796         {
797                 .enter = NULL }
798 };
799
800 static struct cpuidle_state bxt_cstates[] = {
801         {
802                 .name = "C1",
803                 .desc = "MWAIT 0x00",
804                 .flags = MWAIT2flg(0x00),
805                 .exit_latency = 2,
806                 .target_residency = 2,
807                 .enter = &intel_idle,
808                 .enter_s2idle = intel_idle_s2idle, },
809         {
810                 .name = "C1E",
811                 .desc = "MWAIT 0x01",
812                 .flags = MWAIT2flg(0x01),
813                 .exit_latency = 10,
814                 .target_residency = 20,
815                 .enter = &intel_idle,
816                 .enter_s2idle = intel_idle_s2idle, },
817         {
818                 .name = "C6",
819                 .desc = "MWAIT 0x20",
820                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
821                 .exit_latency = 133,
822                 .target_residency = 133,
823                 .enter = &intel_idle,
824                 .enter_s2idle = intel_idle_s2idle, },
825         {
826                 .name = "C7s",
827                 .desc = "MWAIT 0x31",
828                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
829                 .exit_latency = 155,
830                 .target_residency = 155,
831                 .enter = &intel_idle,
832                 .enter_s2idle = intel_idle_s2idle, },
833         {
834                 .name = "C8",
835                 .desc = "MWAIT 0x40",
836                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
837                 .exit_latency = 1000,
838                 .target_residency = 1000,
839                 .enter = &intel_idle,
840                 .enter_s2idle = intel_idle_s2idle, },
841         {
842                 .name = "C9",
843                 .desc = "MWAIT 0x50",
844                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
845                 .exit_latency = 2000,
846                 .target_residency = 2000,
847                 .enter = &intel_idle,
848                 .enter_s2idle = intel_idle_s2idle, },
849         {
850                 .name = "C10",
851                 .desc = "MWAIT 0x60",
852                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
853                 .exit_latency = 10000,
854                 .target_residency = 10000,
855                 .enter = &intel_idle,
856                 .enter_s2idle = intel_idle_s2idle, },
857         {
858                 .enter = NULL }
859 };
860
861 static struct cpuidle_state dnv_cstates[] = {
862         {
863                 .name = "C1",
864                 .desc = "MWAIT 0x00",
865                 .flags = MWAIT2flg(0x00),
866                 .exit_latency = 2,
867                 .target_residency = 2,
868                 .enter = &intel_idle,
869                 .enter_s2idle = intel_idle_s2idle, },
870         {
871                 .name = "C1E",
872                 .desc = "MWAIT 0x01",
873                 .flags = MWAIT2flg(0x01),
874                 .exit_latency = 10,
875                 .target_residency = 20,
876                 .enter = &intel_idle,
877                 .enter_s2idle = intel_idle_s2idle, },
878         {
879                 .name = "C6",
880                 .desc = "MWAIT 0x20",
881                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
882                 .exit_latency = 50,
883                 .target_residency = 500,
884                 .enter = &intel_idle,
885                 .enter_s2idle = intel_idle_s2idle, },
886         {
887                 .enter = NULL }
888 };
889
890 /**
891  * intel_idle
892  * @dev: cpuidle_device
893  * @drv: cpuidle driver
894  * @index: index of cpuidle state
895  *
896  * Must be called under local_irq_disable().
897  */
898 static __cpuidle int intel_idle(struct cpuidle_device *dev,
899                                 struct cpuidle_driver *drv, int index)
900 {
901         unsigned long ecx = 1; /* break on interrupt flag */
902         struct cpuidle_state *state = &drv->states[index];
903         unsigned long eax = flg2MWAIT(state->flags);
904         unsigned int cstate;
905         bool uninitialized_var(tick);
906         int cpu = smp_processor_id();
907
908         /*
909          * leave_mm() to avoid costly and often unnecessary wakeups
910          * for flushing the user TLB's associated with the active mm.
911          */
912         if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
913                 leave_mm(cpu);
914
915         if (!static_cpu_has(X86_FEATURE_ARAT)) {
916                 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
917                                 MWAIT_CSTATE_MASK) + 1;
918                 tick = false;
919                 if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
920                         tick = true;
921                         tick_broadcast_enter();
922                 }
923         }
924
925         mwait_idle_with_hints(eax, ecx);
926
927         if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
928                 tick_broadcast_exit();
929
930         return index;
931 }
932
933 /**
934  * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
935  * @dev: cpuidle_device
936  * @drv: cpuidle driver
937  * @index: state index
938  */
939 static void intel_idle_s2idle(struct cpuidle_device *dev,
940                              struct cpuidle_driver *drv, int index)
941 {
942         unsigned long ecx = 1; /* break on interrupt flag */
943         unsigned long eax = flg2MWAIT(drv->states[index].flags);
944
945         mwait_idle_with_hints(eax, ecx);
946 }
947
948 static bool intel_idle_verify_cstate(unsigned int mwait_hint)
949 {
950         unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
951         unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
952                                         MWAIT_SUBSTATE_MASK;
953
954         /* Ignore the C-state if there are NO sub-states in CPUID for it. */
955         if (num_substates == 0)
956                 return false;
957
958         if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
959                 mark_tsc_unstable("TSC halts in idle states deeper than C2");
960
961         return true;
962 }
963
964 static void __setup_broadcast_timer(bool on)
965 {
966         if (on)
967                 tick_broadcast_enable();
968         else
969                 tick_broadcast_disable();
970 }
971
972 static void auto_demotion_disable(void)
973 {
974         unsigned long long msr_bits;
975
976         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
977         msr_bits &= ~(icpu->auto_demotion_disable_flags);
978         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
979 }
980 static void c1e_promotion_disable(void)
981 {
982         unsigned long long msr_bits;
983
984         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
985         msr_bits &= ~0x2;
986         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
987 }
988
989 static const struct idle_cpu idle_cpu_nehalem = {
990         .state_table = nehalem_cstates,
991         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
992         .disable_promotion_to_c1e = true,
993 };
994
995 static const struct idle_cpu idle_cpu_atom = {
996         .state_table = atom_cstates,
997 };
998
999 static const struct idle_cpu idle_cpu_tangier = {
1000         .state_table = tangier_cstates,
1001 };
1002
1003 static const struct idle_cpu idle_cpu_lincroft = {
1004         .state_table = atom_cstates,
1005         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1006 };
1007
1008 static const struct idle_cpu idle_cpu_snb = {
1009         .state_table = snb_cstates,
1010         .disable_promotion_to_c1e = true,
1011 };
1012
1013 static const struct idle_cpu idle_cpu_byt = {
1014         .state_table = byt_cstates,
1015         .disable_promotion_to_c1e = true,
1016         .byt_auto_demotion_disable_flag = true,
1017 };
1018
1019 static const struct idle_cpu idle_cpu_cht = {
1020         .state_table = cht_cstates,
1021         .disable_promotion_to_c1e = true,
1022         .byt_auto_demotion_disable_flag = true,
1023 };
1024
1025 static const struct idle_cpu idle_cpu_ivb = {
1026         .state_table = ivb_cstates,
1027         .disable_promotion_to_c1e = true,
1028 };
1029
1030 static const struct idle_cpu idle_cpu_ivt = {
1031         .state_table = ivt_cstates,
1032         .disable_promotion_to_c1e = true,
1033 };
1034
1035 static const struct idle_cpu idle_cpu_hsw = {
1036         .state_table = hsw_cstates,
1037         .disable_promotion_to_c1e = true,
1038 };
1039
1040 static const struct idle_cpu idle_cpu_bdw = {
1041         .state_table = bdw_cstates,
1042         .disable_promotion_to_c1e = true,
1043 };
1044
1045 static const struct idle_cpu idle_cpu_skl = {
1046         .state_table = skl_cstates,
1047         .disable_promotion_to_c1e = true,
1048 };
1049
1050 static const struct idle_cpu idle_cpu_skx = {
1051         .state_table = skx_cstates,
1052         .disable_promotion_to_c1e = true,
1053 };
1054
1055 static const struct idle_cpu idle_cpu_avn = {
1056         .state_table = avn_cstates,
1057         .disable_promotion_to_c1e = true,
1058 };
1059
1060 static const struct idle_cpu idle_cpu_knl = {
1061         .state_table = knl_cstates,
1062 };
1063
1064 static const struct idle_cpu idle_cpu_bxt = {
1065         .state_table = bxt_cstates,
1066         .disable_promotion_to_c1e = true,
1067 };
1068
1069 static const struct idle_cpu idle_cpu_dnv = {
1070         .state_table = dnv_cstates,
1071         .disable_promotion_to_c1e = true,
1072 };
1073
1074 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1075         INTEL_CPU_FAM6(NEHALEM_EP,              idle_cpu_nehalem),
1076         INTEL_CPU_FAM6(NEHALEM,                 idle_cpu_nehalem),
1077         INTEL_CPU_FAM6(NEHALEM_G,               idle_cpu_nehalem),
1078         INTEL_CPU_FAM6(WESTMERE,                idle_cpu_nehalem),
1079         INTEL_CPU_FAM6(WESTMERE_EP,             idle_cpu_nehalem),
1080         INTEL_CPU_FAM6(NEHALEM_EX,              idle_cpu_nehalem),
1081         INTEL_CPU_FAM6(ATOM_BONNELL,            idle_cpu_atom),
1082         INTEL_CPU_FAM6(ATOM_BONNELL_MID,        idle_cpu_lincroft),
1083         INTEL_CPU_FAM6(WESTMERE_EX,             idle_cpu_nehalem),
1084         INTEL_CPU_FAM6(SANDYBRIDGE,             idle_cpu_snb),
1085         INTEL_CPU_FAM6(SANDYBRIDGE_X,           idle_cpu_snb),
1086         INTEL_CPU_FAM6(ATOM_SALTWELL,           idle_cpu_atom),
1087         INTEL_CPU_FAM6(ATOM_SILVERMONT,         idle_cpu_byt),
1088         INTEL_CPU_FAM6(ATOM_SILVERMONT_MID,     idle_cpu_tangier),
1089         INTEL_CPU_FAM6(ATOM_AIRMONT,            idle_cpu_cht),
1090         INTEL_CPU_FAM6(IVYBRIDGE,               idle_cpu_ivb),
1091         INTEL_CPU_FAM6(IVYBRIDGE_X,             idle_cpu_ivt),
1092         INTEL_CPU_FAM6(HASWELL,                 idle_cpu_hsw),
1093         INTEL_CPU_FAM6(HASWELL_X,               idle_cpu_hsw),
1094         INTEL_CPU_FAM6(HASWELL_L,               idle_cpu_hsw),
1095         INTEL_CPU_FAM6(HASWELL_G,               idle_cpu_hsw),
1096         INTEL_CPU_FAM6(ATOM_SILVERMONT_D,       idle_cpu_avn),
1097         INTEL_CPU_FAM6(BROADWELL,               idle_cpu_bdw),
1098         INTEL_CPU_FAM6(BROADWELL_G,             idle_cpu_bdw),
1099         INTEL_CPU_FAM6(BROADWELL_X,             idle_cpu_bdw),
1100         INTEL_CPU_FAM6(BROADWELL_D,             idle_cpu_bdw),
1101         INTEL_CPU_FAM6(SKYLAKE_L,               idle_cpu_skl),
1102         INTEL_CPU_FAM6(SKYLAKE,                 idle_cpu_skl),
1103         INTEL_CPU_FAM6(KABYLAKE_L,              idle_cpu_skl),
1104         INTEL_CPU_FAM6(KABYLAKE,                idle_cpu_skl),
1105         INTEL_CPU_FAM6(SKYLAKE_X,               idle_cpu_skx),
1106         INTEL_CPU_FAM6(XEON_PHI_KNL,            idle_cpu_knl),
1107         INTEL_CPU_FAM6(XEON_PHI_KNM,            idle_cpu_knl),
1108         INTEL_CPU_FAM6(ATOM_GOLDMONT,           idle_cpu_bxt),
1109         INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS,      idle_cpu_bxt),
1110         INTEL_CPU_FAM6(ATOM_GOLDMONT_D,         idle_cpu_dnv),
1111         INTEL_CPU_FAM6(ATOM_TREMONT_D,          idle_cpu_dnv),
1112         {}
1113 };
1114
1115 #define INTEL_CPU_FAM6_MWAIT \
1116         { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_MWAIT, 0 }
1117
1118 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1119         INTEL_CPU_FAM6_MWAIT,
1120         {}
1121 };
1122
1123 static bool intel_idle_max_cstate_reached(int cstate)
1124 {
1125         if (cstate + 1 > max_cstate) {
1126                 pr_info("max_cstate %d reached\n", max_cstate);
1127                 return true;
1128         }
1129         return false;
1130 }
1131
1132 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1133 #include <acpi/processor.h>
1134
1135 static struct acpi_processor_power acpi_state_table;
1136
1137 /**
1138  * intel_idle_cst_usable - Check if the _CST information can be used.
1139  *
1140  * Check if all of the C-states listed by _CST in the max_cstate range are
1141  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1142  */
1143 static bool intel_idle_cst_usable(void)
1144 {
1145         int cstate, limit;
1146
1147         limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1148                       acpi_state_table.count);
1149
1150         for (cstate = 1; cstate < limit; cstate++) {
1151                 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1152
1153                 if (cx->entry_method != ACPI_CSTATE_FFH)
1154                         return false;
1155         }
1156
1157         return true;
1158 }
1159
1160 static bool intel_idle_acpi_cst_extract(void)
1161 {
1162         unsigned int cpu;
1163
1164         for_each_possible_cpu(cpu) {
1165                 struct acpi_processor *pr = per_cpu(processors, cpu);
1166
1167                 if (!pr)
1168                         continue;
1169
1170                 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1171                         continue;
1172
1173                 acpi_state_table.count++;
1174
1175                 if (!intel_idle_cst_usable())
1176                         continue;
1177
1178                 if (!acpi_processor_claim_cst_control()) {
1179                         acpi_state_table.count = 0;
1180                         return false;
1181                 }
1182
1183                 return true;
1184         }
1185
1186         pr_debug("ACPI _CST not found or not usable\n");
1187         return false;
1188 }
1189
1190 static void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1191 {
1192         int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1193
1194         /*
1195          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1196          * the interesting states are ACPI_CSTATE_FFH.
1197          */
1198         for (cstate = 1; cstate < limit; cstate++) {
1199                 struct acpi_processor_cx *cx;
1200                 struct cpuidle_state *state;
1201
1202                 if (intel_idle_max_cstate_reached(cstate))
1203                         break;
1204
1205                 cx = &acpi_state_table.states[cstate];
1206
1207                 state = &drv->states[drv->state_count++];
1208
1209                 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1210                 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1211                 state->exit_latency = cx->latency;
1212                 /*
1213                  * For C1-type C-states use the same number for both the exit
1214                  * latency and target residency, because that is the case for
1215                  * C1 in the majority of the static C-states tables above.
1216                  * For the other types of C-states, however, set the target
1217                  * residency to 3 times the exit latency which should lead to
1218                  * a reasonable balance between energy-efficiency and
1219                  * performance in the majority of interesting cases.
1220                  */
1221                 state->target_residency = cx->latency;
1222                 if (cx->type > ACPI_STATE_C1)
1223                         state->target_residency *= 3;
1224
1225                 state->flags = MWAIT2flg(cx->address);
1226                 if (cx->type > ACPI_STATE_C2)
1227                         state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1228
1229                 state->enter = intel_idle;
1230                 state->enter_s2idle = intel_idle_s2idle;
1231         }
1232 }
1233 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1234 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1235 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1236 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1237
1238 /*
1239  * intel_idle_probe()
1240  */
1241 static int __init intel_idle_probe(void)
1242 {
1243         unsigned int eax, ebx, ecx;
1244         const struct x86_cpu_id *id;
1245
1246         if (max_cstate == 0) {
1247                 pr_debug("disabled\n");
1248                 return -EPERM;
1249         }
1250
1251         id = x86_match_cpu(intel_idle_ids);
1252         if (id) {
1253                 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1254                         pr_debug("Please enable MWAIT in BIOS SETUP\n");
1255                         return -ENODEV;
1256                 }
1257         } else {
1258                 id = x86_match_cpu(intel_mwait_ids);
1259                 if (!id)
1260                         return -ENODEV;
1261         }
1262
1263         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1264                 return -ENODEV;
1265
1266         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1267
1268         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1269             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1270             !mwait_substates)
1271                         return -ENODEV;
1272
1273         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1274
1275         icpu = (const struct idle_cpu *)id->driver_data;
1276         if (icpu)
1277                 cpuidle_state_table = icpu->state_table;
1278         else if (!intel_idle_acpi_cst_extract())
1279                 return -ENODEV;
1280
1281         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1282                  boot_cpu_data.x86_model);
1283
1284         return 0;
1285 }
1286
1287 /*
1288  * intel_idle_cpuidle_devices_uninit()
1289  * Unregisters the cpuidle devices.
1290  */
1291 static void intel_idle_cpuidle_devices_uninit(void)
1292 {
1293         int i;
1294         struct cpuidle_device *dev;
1295
1296         for_each_online_cpu(i) {
1297                 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1298                 cpuidle_unregister_device(dev);
1299         }
1300 }
1301
1302 /*
1303  * ivt_idle_state_table_update(void)
1304  *
1305  * Tune IVT multi-socket targets
1306  * Assumption: num_sockets == (max_package_num + 1)
1307  */
1308 static void ivt_idle_state_table_update(void)
1309 {
1310         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1311         int cpu, package_num, num_sockets = 1;
1312
1313         for_each_online_cpu(cpu) {
1314                 package_num = topology_physical_package_id(cpu);
1315                 if (package_num + 1 > num_sockets) {
1316                         num_sockets = package_num + 1;
1317
1318                         if (num_sockets > 4) {
1319                                 cpuidle_state_table = ivt_cstates_8s;
1320                                 return;
1321                         }
1322                 }
1323         }
1324
1325         if (num_sockets > 2)
1326                 cpuidle_state_table = ivt_cstates_4s;
1327
1328         /* else, 1 and 2 socket systems use default ivt_cstates */
1329 }
1330
1331 /*
1332  * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1333  */
1334
1335 static unsigned int irtl_ns_units[] = {
1336         1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1337
1338 static unsigned long long irtl_2_usec(unsigned long long irtl)
1339 {
1340         unsigned long long ns;
1341
1342         if (!irtl)
1343                 return 0;
1344
1345         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1346
1347         return div64_u64((irtl & 0x3FF) * ns, 1000);
1348 }
1349 /*
1350  * bxt_idle_state_table_update(void)
1351  *
1352  * On BXT, we trust the IRTL to show the definitive maximum latency
1353  * We use the same value for target_residency.
1354  */
1355 static void bxt_idle_state_table_update(void)
1356 {
1357         unsigned long long msr;
1358         unsigned int usec;
1359
1360         rdmsrl(MSR_PKGC6_IRTL, msr);
1361         usec = irtl_2_usec(msr);
1362         if (usec) {
1363                 bxt_cstates[2].exit_latency = usec;
1364                 bxt_cstates[2].target_residency = usec;
1365         }
1366
1367         rdmsrl(MSR_PKGC7_IRTL, msr);
1368         usec = irtl_2_usec(msr);
1369         if (usec) {
1370                 bxt_cstates[3].exit_latency = usec;
1371                 bxt_cstates[3].target_residency = usec;
1372         }
1373
1374         rdmsrl(MSR_PKGC8_IRTL, msr);
1375         usec = irtl_2_usec(msr);
1376         if (usec) {
1377                 bxt_cstates[4].exit_latency = usec;
1378                 bxt_cstates[4].target_residency = usec;
1379         }
1380
1381         rdmsrl(MSR_PKGC9_IRTL, msr);
1382         usec = irtl_2_usec(msr);
1383         if (usec) {
1384                 bxt_cstates[5].exit_latency = usec;
1385                 bxt_cstates[5].target_residency = usec;
1386         }
1387
1388         rdmsrl(MSR_PKGC10_IRTL, msr);
1389         usec = irtl_2_usec(msr);
1390         if (usec) {
1391                 bxt_cstates[6].exit_latency = usec;
1392                 bxt_cstates[6].target_residency = usec;
1393         }
1394
1395 }
1396 /*
1397  * sklh_idle_state_table_update(void)
1398  *
1399  * On SKL-H (model 0x5e) disable C8 and C9 if:
1400  * C10 is enabled and SGX disabled
1401  */
1402 static void sklh_idle_state_table_update(void)
1403 {
1404         unsigned long long msr;
1405         unsigned int eax, ebx, ecx, edx;
1406
1407
1408         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1409         if (max_cstate <= 7)
1410                 return;
1411
1412         /* if PC10 not present in CPUID.MWAIT.EDX */
1413         if ((mwait_substates & (0xF << 28)) == 0)
1414                 return;
1415
1416         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1417
1418         /* PC10 is not enabled in PKG C-state limit */
1419         if ((msr & 0xF) != 8)
1420                 return;
1421
1422         ecx = 0;
1423         cpuid(7, &eax, &ebx, &ecx, &edx);
1424
1425         /* if SGX is present */
1426         if (ebx & (1 << 2)) {
1427
1428                 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1429
1430                 /* if SGX is enabled */
1431                 if (msr & (1 << 18))
1432                         return;
1433         }
1434
1435         skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
1436         skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
1437 }
1438 /*
1439  * intel_idle_state_table_update()
1440  *
1441  * Update the default state_table for this CPU-id
1442  */
1443
1444 static void intel_idle_state_table_update(void)
1445 {
1446         switch (boot_cpu_data.x86_model) {
1447
1448         case INTEL_FAM6_IVYBRIDGE_X:
1449                 ivt_idle_state_table_update();
1450                 break;
1451         case INTEL_FAM6_ATOM_GOLDMONT:
1452         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1453                 bxt_idle_state_table_update();
1454                 break;
1455         case INTEL_FAM6_SKYLAKE:
1456                 sklh_idle_state_table_update();
1457                 break;
1458         }
1459 }
1460
1461 static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1462 {
1463         int cstate;
1464
1465         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1466                 unsigned int mwait_hint;
1467
1468                 if (intel_idle_max_cstate_reached(cstate))
1469                         break;
1470
1471                 if (!cpuidle_state_table[cstate].enter &&
1472                     !cpuidle_state_table[cstate].enter_s2idle)
1473                         break;
1474
1475                 /* If marked as unusable, skip this state. */
1476                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1477                         pr_debug("state %s is disabled\n",
1478                                  cpuidle_state_table[cstate].name);
1479                         continue;
1480                 }
1481
1482                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1483                 if (!intel_idle_verify_cstate(mwait_hint))
1484                         continue;
1485
1486                 /* Structure copy. */
1487                 drv->states[drv->state_count++] = cpuidle_state_table[cstate];
1488         }
1489
1490         if (icpu->byt_auto_demotion_disable_flag) {
1491                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1492                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1493         }
1494 }
1495
1496 /*
1497  * intel_idle_cpuidle_driver_init()
1498  * allocate, initialize cpuidle_states
1499  */
1500 static void __init intel_idle_cpuidle_driver_init(void)
1501 {
1502         struct cpuidle_driver *drv = &intel_idle_driver;
1503
1504         intel_idle_state_table_update();
1505
1506         cpuidle_poll_state_init(drv);
1507         drv->state_count = 1;
1508
1509         if (icpu)
1510                 intel_idle_init_cstates_icpu(drv);
1511         else
1512                 intel_idle_init_cstates_acpi(drv);
1513 }
1514
1515 /*
1516  * intel_idle_cpu_init()
1517  * allocate, initialize, register cpuidle_devices
1518  * @cpu: cpu/core to initialize
1519  */
1520 static int intel_idle_cpu_init(unsigned int cpu)
1521 {
1522         struct cpuidle_device *dev;
1523
1524         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1525         dev->cpu = cpu;
1526
1527         if (cpuidle_register_device(dev)) {
1528                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1529                 return -EIO;
1530         }
1531
1532         if (!icpu)
1533                 return 0;
1534
1535         if (icpu->auto_demotion_disable_flags)
1536                 auto_demotion_disable();
1537
1538         if (icpu->disable_promotion_to_c1e)
1539                 c1e_promotion_disable();
1540
1541         return 0;
1542 }
1543
1544 static int intel_idle_cpu_online(unsigned int cpu)
1545 {
1546         struct cpuidle_device *dev;
1547
1548         if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1549                 __setup_broadcast_timer(true);
1550
1551         /*
1552          * Some systems can hotplug a cpu at runtime after
1553          * the kernel has booted, we have to initialize the
1554          * driver in this case
1555          */
1556         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1557         if (!dev->registered)
1558                 return intel_idle_cpu_init(cpu);
1559
1560         return 0;
1561 }
1562
1563 static int __init intel_idle_init(void)
1564 {
1565         int retval;
1566
1567         /* Do not load intel_idle at all for now if idle= is passed */
1568         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1569                 return -ENODEV;
1570
1571         retval = intel_idle_probe();
1572         if (retval)
1573                 return retval;
1574
1575         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1576         if (intel_idle_cpuidle_devices == NULL)
1577                 return -ENOMEM;
1578
1579         intel_idle_cpuidle_driver_init();
1580         retval = cpuidle_register_driver(&intel_idle_driver);
1581         if (retval) {
1582                 struct cpuidle_driver *drv = cpuidle_get_driver();
1583                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1584                        drv ? drv->name : "none");
1585                 goto init_driver_fail;
1586         }
1587
1588         if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
1589                 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1590
1591         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1592                                    intel_idle_cpu_online, NULL);
1593         if (retval < 0)
1594                 goto hp_setup_fail;
1595
1596         pr_debug("lapic_timer_reliable_states 0x%x\n",
1597                  lapic_timer_reliable_states);
1598
1599         return 0;
1600
1601 hp_setup_fail:
1602         intel_idle_cpuidle_devices_uninit();
1603         cpuidle_unregister_driver(&intel_idle_driver);
1604 init_driver_fail:
1605         free_percpu(intel_idle_cpuidle_devices);
1606         return retval;
1607
1608 }
1609 device_initcall(intel_idle_init);
1610
1611 /*
1612  * We are not really modular, but we used to support that.  Meaning we also
1613  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1614  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1615  * is the easiest way (currently) to continue doing that.
1616  */
1617 module_param(max_cstate, int, 0444);