]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/idle/intel_idle.c
Merge tag 'vfio-v5.6-rc1' of git://github.com/awilliam/linux-vfio
[linux.git] / drivers / idle / intel_idle.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  */
8
9 /*
10  * intel_idle is a cpuidle driver that loads on specific Intel processors
11  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
12  * make Linux more efficient on these processors, as intel_idle knows
13  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
14  */
15
16 /*
17  * Design Assumptions
18  *
19  * All CPUs have same idle states as boot CPU
20  *
21  * Chipset BM_STS (bus master status) bit is a NOP
22  *      for preventing entry into deep C-stats
23  */
24
25 /*
26  * Known limitations
27  *
28  * The driver currently initializes for_each_online_cpu() upon modprobe.
29  * It it unaware of subsequent processors hot-added to the system.
30  * This means that if you boot with maxcpus=n and later online
31  * processors above n, those processors will use C1 only.
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 #define DEBUG
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/notifier.h>
51 #include <linux/cpu.h>
52 #include <linux/moduleparam.h>
53 #include <asm/cpu_device_id.h>
54 #include <asm/intel-family.h>
55 #include <asm/mwait.h>
56 #include <asm/msr.h>
57
58 #define INTEL_IDLE_VERSION "0.4.1"
59
60 static struct cpuidle_driver intel_idle_driver = {
61         .name = "intel_idle",
62         .owner = THIS_MODULE,
63 };
64 /* intel_idle.max_cstate=0 disables driver */
65 static int max_cstate = CPUIDLE_STATE_MAX - 1;
66
67 static unsigned int mwait_substates;
68
69 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
70 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
71 static unsigned int lapic_timer_reliable_states = (1 << 1);      /* Default to only C1 */
72
73 struct idle_cpu {
74         struct cpuidle_state *state_table;
75
76         /*
77          * Hardware C-state auto-demotion may not always be optimal.
78          * Indicate which enable bits to clear here.
79          */
80         unsigned long auto_demotion_disable_flags;
81         bool byt_auto_demotion_disable_flag;
82         bool disable_promotion_to_c1e;
83         bool use_acpi;
84 };
85
86 static const struct idle_cpu *icpu;
87 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
88 static int intel_idle(struct cpuidle_device *dev,
89                         struct cpuidle_driver *drv, int index);
90 static void intel_idle_s2idle(struct cpuidle_device *dev,
91                               struct cpuidle_driver *drv, int index);
92 static struct cpuidle_state *cpuidle_state_table;
93
94 /*
95  * Enable this state by default even if the ACPI _CST does not list it.
96  */
97 #define CPUIDLE_FLAG_ALWAYS_ENABLE      BIT(15)
98
99 /*
100  * Set this flag for states where the HW flushes the TLB for us
101  * and so we don't need cross-calls to keep it consistent.
102  * If this flag is set, SW flushes the TLB, so even if the
103  * HW doesn't do the flushing, this flag is safe to use.
104  */
105 #define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
106
107 /*
108  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
109  * the C-state (top nibble) and sub-state (bottom nibble)
110  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
111  *
112  * We store the hint at the top of our "flags" for each state.
113  */
114 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
115 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
116
117 /*
118  * States are indexed by the cstate number,
119  * which is also the index into the MWAIT hint array.
120  * Thus C0 is a dummy.
121  */
122 static struct cpuidle_state nehalem_cstates[] = {
123         {
124                 .name = "C1",
125                 .desc = "MWAIT 0x00",
126                 .flags = MWAIT2flg(0x00),
127                 .exit_latency = 3,
128                 .target_residency = 6,
129                 .enter = &intel_idle,
130                 .enter_s2idle = intel_idle_s2idle, },
131         {
132                 .name = "C1E",
133                 .desc = "MWAIT 0x01",
134                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
135                 .exit_latency = 10,
136                 .target_residency = 20,
137                 .enter = &intel_idle,
138                 .enter_s2idle = intel_idle_s2idle, },
139         {
140                 .name = "C3",
141                 .desc = "MWAIT 0x10",
142                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
143                 .exit_latency = 20,
144                 .target_residency = 80,
145                 .enter = &intel_idle,
146                 .enter_s2idle = intel_idle_s2idle, },
147         {
148                 .name = "C6",
149                 .desc = "MWAIT 0x20",
150                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
151                 .exit_latency = 200,
152                 .target_residency = 800,
153                 .enter = &intel_idle,
154                 .enter_s2idle = intel_idle_s2idle, },
155         {
156                 .enter = NULL }
157 };
158
159 static struct cpuidle_state snb_cstates[] = {
160         {
161                 .name = "C1",
162                 .desc = "MWAIT 0x00",
163                 .flags = MWAIT2flg(0x00),
164                 .exit_latency = 2,
165                 .target_residency = 2,
166                 .enter = &intel_idle,
167                 .enter_s2idle = intel_idle_s2idle, },
168         {
169                 .name = "C1E",
170                 .desc = "MWAIT 0x01",
171                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
172                 .exit_latency = 10,
173                 .target_residency = 20,
174                 .enter = &intel_idle,
175                 .enter_s2idle = intel_idle_s2idle, },
176         {
177                 .name = "C3",
178                 .desc = "MWAIT 0x10",
179                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
180                 .exit_latency = 80,
181                 .target_residency = 211,
182                 .enter = &intel_idle,
183                 .enter_s2idle = intel_idle_s2idle, },
184         {
185                 .name = "C6",
186                 .desc = "MWAIT 0x20",
187                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
188                 .exit_latency = 104,
189                 .target_residency = 345,
190                 .enter = &intel_idle,
191                 .enter_s2idle = intel_idle_s2idle, },
192         {
193                 .name = "C7",
194                 .desc = "MWAIT 0x30",
195                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
196                 .exit_latency = 109,
197                 .target_residency = 345,
198                 .enter = &intel_idle,
199                 .enter_s2idle = intel_idle_s2idle, },
200         {
201                 .enter = NULL }
202 };
203
204 static struct cpuidle_state byt_cstates[] = {
205         {
206                 .name = "C1",
207                 .desc = "MWAIT 0x00",
208                 .flags = MWAIT2flg(0x00),
209                 .exit_latency = 1,
210                 .target_residency = 1,
211                 .enter = &intel_idle,
212                 .enter_s2idle = intel_idle_s2idle, },
213         {
214                 .name = "C6N",
215                 .desc = "MWAIT 0x58",
216                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
217                 .exit_latency = 300,
218                 .target_residency = 275,
219                 .enter = &intel_idle,
220                 .enter_s2idle = intel_idle_s2idle, },
221         {
222                 .name = "C6S",
223                 .desc = "MWAIT 0x52",
224                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
225                 .exit_latency = 500,
226                 .target_residency = 560,
227                 .enter = &intel_idle,
228                 .enter_s2idle = intel_idle_s2idle, },
229         {
230                 .name = "C7",
231                 .desc = "MWAIT 0x60",
232                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
233                 .exit_latency = 1200,
234                 .target_residency = 4000,
235                 .enter = &intel_idle,
236                 .enter_s2idle = intel_idle_s2idle, },
237         {
238                 .name = "C7S",
239                 .desc = "MWAIT 0x64",
240                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
241                 .exit_latency = 10000,
242                 .target_residency = 20000,
243                 .enter = &intel_idle,
244                 .enter_s2idle = intel_idle_s2idle, },
245         {
246                 .enter = NULL }
247 };
248
249 static struct cpuidle_state cht_cstates[] = {
250         {
251                 .name = "C1",
252                 .desc = "MWAIT 0x00",
253                 .flags = MWAIT2flg(0x00),
254                 .exit_latency = 1,
255                 .target_residency = 1,
256                 .enter = &intel_idle,
257                 .enter_s2idle = intel_idle_s2idle, },
258         {
259                 .name = "C6N",
260                 .desc = "MWAIT 0x58",
261                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
262                 .exit_latency = 80,
263                 .target_residency = 275,
264                 .enter = &intel_idle,
265                 .enter_s2idle = intel_idle_s2idle, },
266         {
267                 .name = "C6S",
268                 .desc = "MWAIT 0x52",
269                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
270                 .exit_latency = 200,
271                 .target_residency = 560,
272                 .enter = &intel_idle,
273                 .enter_s2idle = intel_idle_s2idle, },
274         {
275                 .name = "C7",
276                 .desc = "MWAIT 0x60",
277                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
278                 .exit_latency = 1200,
279                 .target_residency = 4000,
280                 .enter = &intel_idle,
281                 .enter_s2idle = intel_idle_s2idle, },
282         {
283                 .name = "C7S",
284                 .desc = "MWAIT 0x64",
285                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
286                 .exit_latency = 10000,
287                 .target_residency = 20000,
288                 .enter = &intel_idle,
289                 .enter_s2idle = intel_idle_s2idle, },
290         {
291                 .enter = NULL }
292 };
293
294 static struct cpuidle_state ivb_cstates[] = {
295         {
296                 .name = "C1",
297                 .desc = "MWAIT 0x00",
298                 .flags = MWAIT2flg(0x00),
299                 .exit_latency = 1,
300                 .target_residency = 1,
301                 .enter = &intel_idle,
302                 .enter_s2idle = intel_idle_s2idle, },
303         {
304                 .name = "C1E",
305                 .desc = "MWAIT 0x01",
306                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
307                 .exit_latency = 10,
308                 .target_residency = 20,
309                 .enter = &intel_idle,
310                 .enter_s2idle = intel_idle_s2idle, },
311         {
312                 .name = "C3",
313                 .desc = "MWAIT 0x10",
314                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
315                 .exit_latency = 59,
316                 .target_residency = 156,
317                 .enter = &intel_idle,
318                 .enter_s2idle = intel_idle_s2idle, },
319         {
320                 .name = "C6",
321                 .desc = "MWAIT 0x20",
322                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
323                 .exit_latency = 80,
324                 .target_residency = 300,
325                 .enter = &intel_idle,
326                 .enter_s2idle = intel_idle_s2idle, },
327         {
328                 .name = "C7",
329                 .desc = "MWAIT 0x30",
330                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
331                 .exit_latency = 87,
332                 .target_residency = 300,
333                 .enter = &intel_idle,
334                 .enter_s2idle = intel_idle_s2idle, },
335         {
336                 .enter = NULL }
337 };
338
339 static struct cpuidle_state ivt_cstates[] = {
340         {
341                 .name = "C1",
342                 .desc = "MWAIT 0x00",
343                 .flags = MWAIT2flg(0x00),
344                 .exit_latency = 1,
345                 .target_residency = 1,
346                 .enter = &intel_idle,
347                 .enter_s2idle = intel_idle_s2idle, },
348         {
349                 .name = "C1E",
350                 .desc = "MWAIT 0x01",
351                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
352                 .exit_latency = 10,
353                 .target_residency = 80,
354                 .enter = &intel_idle,
355                 .enter_s2idle = intel_idle_s2idle, },
356         {
357                 .name = "C3",
358                 .desc = "MWAIT 0x10",
359                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
360                 .exit_latency = 59,
361                 .target_residency = 156,
362                 .enter = &intel_idle,
363                 .enter_s2idle = intel_idle_s2idle, },
364         {
365                 .name = "C6",
366                 .desc = "MWAIT 0x20",
367                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
368                 .exit_latency = 82,
369                 .target_residency = 300,
370                 .enter = &intel_idle,
371                 .enter_s2idle = intel_idle_s2idle, },
372         {
373                 .enter = NULL }
374 };
375
376 static struct cpuidle_state ivt_cstates_4s[] = {
377         {
378                 .name = "C1",
379                 .desc = "MWAIT 0x00",
380                 .flags = MWAIT2flg(0x00),
381                 .exit_latency = 1,
382                 .target_residency = 1,
383                 .enter = &intel_idle,
384                 .enter_s2idle = intel_idle_s2idle, },
385         {
386                 .name = "C1E",
387                 .desc = "MWAIT 0x01",
388                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
389                 .exit_latency = 10,
390                 .target_residency = 250,
391                 .enter = &intel_idle,
392                 .enter_s2idle = intel_idle_s2idle, },
393         {
394                 .name = "C3",
395                 .desc = "MWAIT 0x10",
396                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
397                 .exit_latency = 59,
398                 .target_residency = 300,
399                 .enter = &intel_idle,
400                 .enter_s2idle = intel_idle_s2idle, },
401         {
402                 .name = "C6",
403                 .desc = "MWAIT 0x20",
404                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
405                 .exit_latency = 84,
406                 .target_residency = 400,
407                 .enter = &intel_idle,
408                 .enter_s2idle = intel_idle_s2idle, },
409         {
410                 .enter = NULL }
411 };
412
413 static struct cpuidle_state ivt_cstates_8s[] = {
414         {
415                 .name = "C1",
416                 .desc = "MWAIT 0x00",
417                 .flags = MWAIT2flg(0x00),
418                 .exit_latency = 1,
419                 .target_residency = 1,
420                 .enter = &intel_idle,
421                 .enter_s2idle = intel_idle_s2idle, },
422         {
423                 .name = "C1E",
424                 .desc = "MWAIT 0x01",
425                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
426                 .exit_latency = 10,
427                 .target_residency = 500,
428                 .enter = &intel_idle,
429                 .enter_s2idle = intel_idle_s2idle, },
430         {
431                 .name = "C3",
432                 .desc = "MWAIT 0x10",
433                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
434                 .exit_latency = 59,
435                 .target_residency = 600,
436                 .enter = &intel_idle,
437                 .enter_s2idle = intel_idle_s2idle, },
438         {
439                 .name = "C6",
440                 .desc = "MWAIT 0x20",
441                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
442                 .exit_latency = 88,
443                 .target_residency = 700,
444                 .enter = &intel_idle,
445                 .enter_s2idle = intel_idle_s2idle, },
446         {
447                 .enter = NULL }
448 };
449
450 static struct cpuidle_state hsw_cstates[] = {
451         {
452                 .name = "C1",
453                 .desc = "MWAIT 0x00",
454                 .flags = MWAIT2flg(0x00),
455                 .exit_latency = 2,
456                 .target_residency = 2,
457                 .enter = &intel_idle,
458                 .enter_s2idle = intel_idle_s2idle, },
459         {
460                 .name = "C1E",
461                 .desc = "MWAIT 0x01",
462                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
463                 .exit_latency = 10,
464                 .target_residency = 20,
465                 .enter = &intel_idle,
466                 .enter_s2idle = intel_idle_s2idle, },
467         {
468                 .name = "C3",
469                 .desc = "MWAIT 0x10",
470                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
471                 .exit_latency = 33,
472                 .target_residency = 100,
473                 .enter = &intel_idle,
474                 .enter_s2idle = intel_idle_s2idle, },
475         {
476                 .name = "C6",
477                 .desc = "MWAIT 0x20",
478                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
479                 .exit_latency = 133,
480                 .target_residency = 400,
481                 .enter = &intel_idle,
482                 .enter_s2idle = intel_idle_s2idle, },
483         {
484                 .name = "C7s",
485                 .desc = "MWAIT 0x32",
486                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
487                 .exit_latency = 166,
488                 .target_residency = 500,
489                 .enter = &intel_idle,
490                 .enter_s2idle = intel_idle_s2idle, },
491         {
492                 .name = "C8",
493                 .desc = "MWAIT 0x40",
494                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
495                 .exit_latency = 300,
496                 .target_residency = 900,
497                 .enter = &intel_idle,
498                 .enter_s2idle = intel_idle_s2idle, },
499         {
500                 .name = "C9",
501                 .desc = "MWAIT 0x50",
502                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
503                 .exit_latency = 600,
504                 .target_residency = 1800,
505                 .enter = &intel_idle,
506                 .enter_s2idle = intel_idle_s2idle, },
507         {
508                 .name = "C10",
509                 .desc = "MWAIT 0x60",
510                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
511                 .exit_latency = 2600,
512                 .target_residency = 7700,
513                 .enter = &intel_idle,
514                 .enter_s2idle = intel_idle_s2idle, },
515         {
516                 .enter = NULL }
517 };
518 static struct cpuidle_state bdw_cstates[] = {
519         {
520                 .name = "C1",
521                 .desc = "MWAIT 0x00",
522                 .flags = MWAIT2flg(0x00),
523                 .exit_latency = 2,
524                 .target_residency = 2,
525                 .enter = &intel_idle,
526                 .enter_s2idle = intel_idle_s2idle, },
527         {
528                 .name = "C1E",
529                 .desc = "MWAIT 0x01",
530                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
531                 .exit_latency = 10,
532                 .target_residency = 20,
533                 .enter = &intel_idle,
534                 .enter_s2idle = intel_idle_s2idle, },
535         {
536                 .name = "C3",
537                 .desc = "MWAIT 0x10",
538                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
539                 .exit_latency = 40,
540                 .target_residency = 100,
541                 .enter = &intel_idle,
542                 .enter_s2idle = intel_idle_s2idle, },
543         {
544                 .name = "C6",
545                 .desc = "MWAIT 0x20",
546                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
547                 .exit_latency = 133,
548                 .target_residency = 400,
549                 .enter = &intel_idle,
550                 .enter_s2idle = intel_idle_s2idle, },
551         {
552                 .name = "C7s",
553                 .desc = "MWAIT 0x32",
554                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
555                 .exit_latency = 166,
556                 .target_residency = 500,
557                 .enter = &intel_idle,
558                 .enter_s2idle = intel_idle_s2idle, },
559         {
560                 .name = "C8",
561                 .desc = "MWAIT 0x40",
562                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
563                 .exit_latency = 300,
564                 .target_residency = 900,
565                 .enter = &intel_idle,
566                 .enter_s2idle = intel_idle_s2idle, },
567         {
568                 .name = "C9",
569                 .desc = "MWAIT 0x50",
570                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
571                 .exit_latency = 600,
572                 .target_residency = 1800,
573                 .enter = &intel_idle,
574                 .enter_s2idle = intel_idle_s2idle, },
575         {
576                 .name = "C10",
577                 .desc = "MWAIT 0x60",
578                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
579                 .exit_latency = 2600,
580                 .target_residency = 7700,
581                 .enter = &intel_idle,
582                 .enter_s2idle = intel_idle_s2idle, },
583         {
584                 .enter = NULL }
585 };
586
587 static struct cpuidle_state skl_cstates[] = {
588         {
589                 .name = "C1",
590                 .desc = "MWAIT 0x00",
591                 .flags = MWAIT2flg(0x00),
592                 .exit_latency = 2,
593                 .target_residency = 2,
594                 .enter = &intel_idle,
595                 .enter_s2idle = intel_idle_s2idle, },
596         {
597                 .name = "C1E",
598                 .desc = "MWAIT 0x01",
599                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
600                 .exit_latency = 10,
601                 .target_residency = 20,
602                 .enter = &intel_idle,
603                 .enter_s2idle = intel_idle_s2idle, },
604         {
605                 .name = "C3",
606                 .desc = "MWAIT 0x10",
607                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
608                 .exit_latency = 70,
609                 .target_residency = 100,
610                 .enter = &intel_idle,
611                 .enter_s2idle = intel_idle_s2idle, },
612         {
613                 .name = "C6",
614                 .desc = "MWAIT 0x20",
615                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
616                 .exit_latency = 85,
617                 .target_residency = 200,
618                 .enter = &intel_idle,
619                 .enter_s2idle = intel_idle_s2idle, },
620         {
621                 .name = "C7s",
622                 .desc = "MWAIT 0x33",
623                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
624                 .exit_latency = 124,
625                 .target_residency = 800,
626                 .enter = &intel_idle,
627                 .enter_s2idle = intel_idle_s2idle, },
628         {
629                 .name = "C8",
630                 .desc = "MWAIT 0x40",
631                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
632                 .exit_latency = 200,
633                 .target_residency = 800,
634                 .enter = &intel_idle,
635                 .enter_s2idle = intel_idle_s2idle, },
636         {
637                 .name = "C9",
638                 .desc = "MWAIT 0x50",
639                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
640                 .exit_latency = 480,
641                 .target_residency = 5000,
642                 .enter = &intel_idle,
643                 .enter_s2idle = intel_idle_s2idle, },
644         {
645                 .name = "C10",
646                 .desc = "MWAIT 0x60",
647                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
648                 .exit_latency = 890,
649                 .target_residency = 5000,
650                 .enter = &intel_idle,
651                 .enter_s2idle = intel_idle_s2idle, },
652         {
653                 .enter = NULL }
654 };
655
656 static struct cpuidle_state skx_cstates[] = {
657         {
658                 .name = "C1",
659                 .desc = "MWAIT 0x00",
660                 .flags = MWAIT2flg(0x00),
661                 .exit_latency = 2,
662                 .target_residency = 2,
663                 .enter = &intel_idle,
664                 .enter_s2idle = intel_idle_s2idle, },
665         {
666                 .name = "C1E",
667                 .desc = "MWAIT 0x01",
668                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
669                 .exit_latency = 10,
670                 .target_residency = 20,
671                 .enter = &intel_idle,
672                 .enter_s2idle = intel_idle_s2idle, },
673         {
674                 .name = "C6",
675                 .desc = "MWAIT 0x20",
676                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
677                 .exit_latency = 133,
678                 .target_residency = 600,
679                 .enter = &intel_idle,
680                 .enter_s2idle = intel_idle_s2idle, },
681         {
682                 .enter = NULL }
683 };
684
685 static struct cpuidle_state atom_cstates[] = {
686         {
687                 .name = "C1E",
688                 .desc = "MWAIT 0x00",
689                 .flags = MWAIT2flg(0x00),
690                 .exit_latency = 10,
691                 .target_residency = 20,
692                 .enter = &intel_idle,
693                 .enter_s2idle = intel_idle_s2idle, },
694         {
695                 .name = "C2",
696                 .desc = "MWAIT 0x10",
697                 .flags = MWAIT2flg(0x10),
698                 .exit_latency = 20,
699                 .target_residency = 80,
700                 .enter = &intel_idle,
701                 .enter_s2idle = intel_idle_s2idle, },
702         {
703                 .name = "C4",
704                 .desc = "MWAIT 0x30",
705                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
706                 .exit_latency = 100,
707                 .target_residency = 400,
708                 .enter = &intel_idle,
709                 .enter_s2idle = intel_idle_s2idle, },
710         {
711                 .name = "C6",
712                 .desc = "MWAIT 0x52",
713                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
714                 .exit_latency = 140,
715                 .target_residency = 560,
716                 .enter = &intel_idle,
717                 .enter_s2idle = intel_idle_s2idle, },
718         {
719                 .enter = NULL }
720 };
721 static struct cpuidle_state tangier_cstates[] = {
722         {
723                 .name = "C1",
724                 .desc = "MWAIT 0x00",
725                 .flags = MWAIT2flg(0x00),
726                 .exit_latency = 1,
727                 .target_residency = 4,
728                 .enter = &intel_idle,
729                 .enter_s2idle = intel_idle_s2idle, },
730         {
731                 .name = "C4",
732                 .desc = "MWAIT 0x30",
733                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
734                 .exit_latency = 100,
735                 .target_residency = 400,
736                 .enter = &intel_idle,
737                 .enter_s2idle = intel_idle_s2idle, },
738         {
739                 .name = "C6",
740                 .desc = "MWAIT 0x52",
741                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
742                 .exit_latency = 140,
743                 .target_residency = 560,
744                 .enter = &intel_idle,
745                 .enter_s2idle = intel_idle_s2idle, },
746         {
747                 .name = "C7",
748                 .desc = "MWAIT 0x60",
749                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
750                 .exit_latency = 1200,
751                 .target_residency = 4000,
752                 .enter = &intel_idle,
753                 .enter_s2idle = intel_idle_s2idle, },
754         {
755                 .name = "C9",
756                 .desc = "MWAIT 0x64",
757                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
758                 .exit_latency = 10000,
759                 .target_residency = 20000,
760                 .enter = &intel_idle,
761                 .enter_s2idle = intel_idle_s2idle, },
762         {
763                 .enter = NULL }
764 };
765 static struct cpuidle_state avn_cstates[] = {
766         {
767                 .name = "C1",
768                 .desc = "MWAIT 0x00",
769                 .flags = MWAIT2flg(0x00),
770                 .exit_latency = 2,
771                 .target_residency = 2,
772                 .enter = &intel_idle,
773                 .enter_s2idle = intel_idle_s2idle, },
774         {
775                 .name = "C6",
776                 .desc = "MWAIT 0x51",
777                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
778                 .exit_latency = 15,
779                 .target_residency = 45,
780                 .enter = &intel_idle,
781                 .enter_s2idle = intel_idle_s2idle, },
782         {
783                 .enter = NULL }
784 };
785 static struct cpuidle_state knl_cstates[] = {
786         {
787                 .name = "C1",
788                 .desc = "MWAIT 0x00",
789                 .flags = MWAIT2flg(0x00),
790                 .exit_latency = 1,
791                 .target_residency = 2,
792                 .enter = &intel_idle,
793                 .enter_s2idle = intel_idle_s2idle },
794         {
795                 .name = "C6",
796                 .desc = "MWAIT 0x10",
797                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
798                 .exit_latency = 120,
799                 .target_residency = 500,
800                 .enter = &intel_idle,
801                 .enter_s2idle = intel_idle_s2idle },
802         {
803                 .enter = NULL }
804 };
805
806 static struct cpuidle_state bxt_cstates[] = {
807         {
808                 .name = "C1",
809                 .desc = "MWAIT 0x00",
810                 .flags = MWAIT2flg(0x00),
811                 .exit_latency = 2,
812                 .target_residency = 2,
813                 .enter = &intel_idle,
814                 .enter_s2idle = intel_idle_s2idle, },
815         {
816                 .name = "C1E",
817                 .desc = "MWAIT 0x01",
818                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
819                 .exit_latency = 10,
820                 .target_residency = 20,
821                 .enter = &intel_idle,
822                 .enter_s2idle = intel_idle_s2idle, },
823         {
824                 .name = "C6",
825                 .desc = "MWAIT 0x20",
826                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
827                 .exit_latency = 133,
828                 .target_residency = 133,
829                 .enter = &intel_idle,
830                 .enter_s2idle = intel_idle_s2idle, },
831         {
832                 .name = "C7s",
833                 .desc = "MWAIT 0x31",
834                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
835                 .exit_latency = 155,
836                 .target_residency = 155,
837                 .enter = &intel_idle,
838                 .enter_s2idle = intel_idle_s2idle, },
839         {
840                 .name = "C8",
841                 .desc = "MWAIT 0x40",
842                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
843                 .exit_latency = 1000,
844                 .target_residency = 1000,
845                 .enter = &intel_idle,
846                 .enter_s2idle = intel_idle_s2idle, },
847         {
848                 .name = "C9",
849                 .desc = "MWAIT 0x50",
850                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
851                 .exit_latency = 2000,
852                 .target_residency = 2000,
853                 .enter = &intel_idle,
854                 .enter_s2idle = intel_idle_s2idle, },
855         {
856                 .name = "C10",
857                 .desc = "MWAIT 0x60",
858                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
859                 .exit_latency = 10000,
860                 .target_residency = 10000,
861                 .enter = &intel_idle,
862                 .enter_s2idle = intel_idle_s2idle, },
863         {
864                 .enter = NULL }
865 };
866
867 static struct cpuidle_state dnv_cstates[] = {
868         {
869                 .name = "C1",
870                 .desc = "MWAIT 0x00",
871                 .flags = MWAIT2flg(0x00),
872                 .exit_latency = 2,
873                 .target_residency = 2,
874                 .enter = &intel_idle,
875                 .enter_s2idle = intel_idle_s2idle, },
876         {
877                 .name = "C1E",
878                 .desc = "MWAIT 0x01",
879                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
880                 .exit_latency = 10,
881                 .target_residency = 20,
882                 .enter = &intel_idle,
883                 .enter_s2idle = intel_idle_s2idle, },
884         {
885                 .name = "C6",
886                 .desc = "MWAIT 0x20",
887                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
888                 .exit_latency = 50,
889                 .target_residency = 500,
890                 .enter = &intel_idle,
891                 .enter_s2idle = intel_idle_s2idle, },
892         {
893                 .enter = NULL }
894 };
895
896 /**
897  * intel_idle
898  * @dev: cpuidle_device
899  * @drv: cpuidle driver
900  * @index: index of cpuidle state
901  *
902  * Must be called under local_irq_disable().
903  */
904 static __cpuidle int intel_idle(struct cpuidle_device *dev,
905                                 struct cpuidle_driver *drv, int index)
906 {
907         unsigned long ecx = 1; /* break on interrupt flag */
908         struct cpuidle_state *state = &drv->states[index];
909         unsigned long eax = flg2MWAIT(state->flags);
910         unsigned int cstate;
911         bool uninitialized_var(tick);
912         int cpu = smp_processor_id();
913
914         /*
915          * leave_mm() to avoid costly and often unnecessary wakeups
916          * for flushing the user TLB's associated with the active mm.
917          */
918         if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
919                 leave_mm(cpu);
920
921         if (!static_cpu_has(X86_FEATURE_ARAT)) {
922                 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
923                                 MWAIT_CSTATE_MASK) + 1;
924                 tick = false;
925                 if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
926                         tick = true;
927                         tick_broadcast_enter();
928                 }
929         }
930
931         mwait_idle_with_hints(eax, ecx);
932
933         if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
934                 tick_broadcast_exit();
935
936         return index;
937 }
938
939 /**
940  * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
941  * @dev: cpuidle_device
942  * @drv: cpuidle driver
943  * @index: state index
944  */
945 static void intel_idle_s2idle(struct cpuidle_device *dev,
946                              struct cpuidle_driver *drv, int index)
947 {
948         unsigned long ecx = 1; /* break on interrupt flag */
949         unsigned long eax = flg2MWAIT(drv->states[index].flags);
950
951         mwait_idle_with_hints(eax, ecx);
952 }
953
954 static const struct idle_cpu idle_cpu_nehalem = {
955         .state_table = nehalem_cstates,
956         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
957         .disable_promotion_to_c1e = true,
958 };
959
960 static const struct idle_cpu idle_cpu_nhx = {
961         .state_table = nehalem_cstates,
962         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
963         .disable_promotion_to_c1e = true,
964         .use_acpi = true,
965 };
966
967 static const struct idle_cpu idle_cpu_atom = {
968         .state_table = atom_cstates,
969 };
970
971 static const struct idle_cpu idle_cpu_tangier = {
972         .state_table = tangier_cstates,
973 };
974
975 static const struct idle_cpu idle_cpu_lincroft = {
976         .state_table = atom_cstates,
977         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
978 };
979
980 static const struct idle_cpu idle_cpu_snb = {
981         .state_table = snb_cstates,
982         .disable_promotion_to_c1e = true,
983 };
984
985 static const struct idle_cpu idle_cpu_snx = {
986         .state_table = snb_cstates,
987         .disable_promotion_to_c1e = true,
988         .use_acpi = true,
989 };
990
991 static const struct idle_cpu idle_cpu_byt = {
992         .state_table = byt_cstates,
993         .disable_promotion_to_c1e = true,
994         .byt_auto_demotion_disable_flag = true,
995 };
996
997 static const struct idle_cpu idle_cpu_cht = {
998         .state_table = cht_cstates,
999         .disable_promotion_to_c1e = true,
1000         .byt_auto_demotion_disable_flag = true,
1001 };
1002
1003 static const struct idle_cpu idle_cpu_ivb = {
1004         .state_table = ivb_cstates,
1005         .disable_promotion_to_c1e = true,
1006 };
1007
1008 static const struct idle_cpu idle_cpu_ivt = {
1009         .state_table = ivt_cstates,
1010         .disable_promotion_to_c1e = true,
1011         .use_acpi = true,
1012 };
1013
1014 static const struct idle_cpu idle_cpu_hsw = {
1015         .state_table = hsw_cstates,
1016         .disable_promotion_to_c1e = true,
1017 };
1018
1019 static const struct idle_cpu idle_cpu_hsx = {
1020         .state_table = hsw_cstates,
1021         .disable_promotion_to_c1e = true,
1022         .use_acpi = true,
1023 };
1024
1025 static const struct idle_cpu idle_cpu_bdw = {
1026         .state_table = bdw_cstates,
1027         .disable_promotion_to_c1e = true,
1028 };
1029
1030 static const struct idle_cpu idle_cpu_bdx = {
1031         .state_table = bdw_cstates,
1032         .disable_promotion_to_c1e = true,
1033         .use_acpi = true,
1034 };
1035
1036 static const struct idle_cpu idle_cpu_skl = {
1037         .state_table = skl_cstates,
1038         .disable_promotion_to_c1e = true,
1039 };
1040
1041 static const struct idle_cpu idle_cpu_skx = {
1042         .state_table = skx_cstates,
1043         .disable_promotion_to_c1e = true,
1044         .use_acpi = true,
1045 };
1046
1047 static const struct idle_cpu idle_cpu_avn = {
1048         .state_table = avn_cstates,
1049         .disable_promotion_to_c1e = true,
1050         .use_acpi = true,
1051 };
1052
1053 static const struct idle_cpu idle_cpu_knl = {
1054         .state_table = knl_cstates,
1055         .use_acpi = true,
1056 };
1057
1058 static const struct idle_cpu idle_cpu_bxt = {
1059         .state_table = bxt_cstates,
1060         .disable_promotion_to_c1e = true,
1061 };
1062
1063 static const struct idle_cpu idle_cpu_dnv = {
1064         .state_table = dnv_cstates,
1065         .disable_promotion_to_c1e = true,
1066         .use_acpi = true,
1067 };
1068
1069 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1070         INTEL_CPU_FAM6(NEHALEM_EP,              idle_cpu_nhx),
1071         INTEL_CPU_FAM6(NEHALEM,                 idle_cpu_nehalem),
1072         INTEL_CPU_FAM6(NEHALEM_G,               idle_cpu_nehalem),
1073         INTEL_CPU_FAM6(WESTMERE,                idle_cpu_nehalem),
1074         INTEL_CPU_FAM6(WESTMERE_EP,             idle_cpu_nhx),
1075         INTEL_CPU_FAM6(NEHALEM_EX,              idle_cpu_nhx),
1076         INTEL_CPU_FAM6(ATOM_BONNELL,            idle_cpu_atom),
1077         INTEL_CPU_FAM6(ATOM_BONNELL_MID,        idle_cpu_lincroft),
1078         INTEL_CPU_FAM6(WESTMERE_EX,             idle_cpu_nhx),
1079         INTEL_CPU_FAM6(SANDYBRIDGE,             idle_cpu_snb),
1080         INTEL_CPU_FAM6(SANDYBRIDGE_X,           idle_cpu_snx),
1081         INTEL_CPU_FAM6(ATOM_SALTWELL,           idle_cpu_atom),
1082         INTEL_CPU_FAM6(ATOM_SILVERMONT,         idle_cpu_byt),
1083         INTEL_CPU_FAM6(ATOM_SILVERMONT_MID,     idle_cpu_tangier),
1084         INTEL_CPU_FAM6(ATOM_AIRMONT,            idle_cpu_cht),
1085         INTEL_CPU_FAM6(IVYBRIDGE,               idle_cpu_ivb),
1086         INTEL_CPU_FAM6(IVYBRIDGE_X,             idle_cpu_ivt),
1087         INTEL_CPU_FAM6(HASWELL,                 idle_cpu_hsw),
1088         INTEL_CPU_FAM6(HASWELL_X,               idle_cpu_hsx),
1089         INTEL_CPU_FAM6(HASWELL_L,               idle_cpu_hsw),
1090         INTEL_CPU_FAM6(HASWELL_G,               idle_cpu_hsw),
1091         INTEL_CPU_FAM6(ATOM_SILVERMONT_D,       idle_cpu_avn),
1092         INTEL_CPU_FAM6(BROADWELL,               idle_cpu_bdw),
1093         INTEL_CPU_FAM6(BROADWELL_G,             idle_cpu_bdw),
1094         INTEL_CPU_FAM6(BROADWELL_X,             idle_cpu_bdx),
1095         INTEL_CPU_FAM6(BROADWELL_D,             idle_cpu_bdx),
1096         INTEL_CPU_FAM6(SKYLAKE_L,               idle_cpu_skl),
1097         INTEL_CPU_FAM6(SKYLAKE,                 idle_cpu_skl),
1098         INTEL_CPU_FAM6(KABYLAKE_L,              idle_cpu_skl),
1099         INTEL_CPU_FAM6(KABYLAKE,                idle_cpu_skl),
1100         INTEL_CPU_FAM6(SKYLAKE_X,               idle_cpu_skx),
1101         INTEL_CPU_FAM6(XEON_PHI_KNL,            idle_cpu_knl),
1102         INTEL_CPU_FAM6(XEON_PHI_KNM,            idle_cpu_knl),
1103         INTEL_CPU_FAM6(ATOM_GOLDMONT,           idle_cpu_bxt),
1104         INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS,      idle_cpu_bxt),
1105         INTEL_CPU_FAM6(ATOM_GOLDMONT_D,         idle_cpu_dnv),
1106         INTEL_CPU_FAM6(ATOM_TREMONT_D,          idle_cpu_dnv),
1107         {}
1108 };
1109
1110 #define INTEL_CPU_FAM6_MWAIT \
1111         { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_MWAIT, 0 }
1112
1113 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1114         INTEL_CPU_FAM6_MWAIT,
1115         {}
1116 };
1117
1118 static bool __init intel_idle_max_cstate_reached(int cstate)
1119 {
1120         if (cstate + 1 > max_cstate) {
1121                 pr_info("max_cstate %d reached\n", max_cstate);
1122                 return true;
1123         }
1124         return false;
1125 }
1126
1127 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1128 #include <acpi/processor.h>
1129
1130 static bool no_acpi __read_mostly;
1131 module_param(no_acpi, bool, 0444);
1132 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1133
1134 static struct acpi_processor_power acpi_state_table __initdata;
1135
1136 /**
1137  * intel_idle_cst_usable - Check if the _CST information can be used.
1138  *
1139  * Check if all of the C-states listed by _CST in the max_cstate range are
1140  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1141  */
1142 static bool __init intel_idle_cst_usable(void)
1143 {
1144         int cstate, limit;
1145
1146         limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1147                       acpi_state_table.count);
1148
1149         for (cstate = 1; cstate < limit; cstate++) {
1150                 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1151
1152                 if (cx->entry_method != ACPI_CSTATE_FFH)
1153                         return false;
1154         }
1155
1156         return true;
1157 }
1158
1159 static bool __init intel_idle_acpi_cst_extract(void)
1160 {
1161         unsigned int cpu;
1162
1163         if (no_acpi) {
1164                 pr_debug("Not allowed to use ACPI _CST\n");
1165                 return false;
1166         }
1167
1168         for_each_possible_cpu(cpu) {
1169                 struct acpi_processor *pr = per_cpu(processors, cpu);
1170
1171                 if (!pr)
1172                         continue;
1173
1174                 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1175                         continue;
1176
1177                 acpi_state_table.count++;
1178
1179                 if (!intel_idle_cst_usable())
1180                         continue;
1181
1182                 if (!acpi_processor_claim_cst_control()) {
1183                         acpi_state_table.count = 0;
1184                         return false;
1185                 }
1186
1187                 return true;
1188         }
1189
1190         pr_debug("ACPI _CST not found or not usable\n");
1191         return false;
1192 }
1193
1194 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1195 {
1196         int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1197
1198         /*
1199          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1200          * the interesting states are ACPI_CSTATE_FFH.
1201          */
1202         for (cstate = 1; cstate < limit; cstate++) {
1203                 struct acpi_processor_cx *cx;
1204                 struct cpuidle_state *state;
1205
1206                 if (intel_idle_max_cstate_reached(cstate))
1207                         break;
1208
1209                 cx = &acpi_state_table.states[cstate];
1210
1211                 state = &drv->states[drv->state_count++];
1212
1213                 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1214                 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1215                 state->exit_latency = cx->latency;
1216                 /*
1217                  * For C1-type C-states use the same number for both the exit
1218                  * latency and target residency, because that is the case for
1219                  * C1 in the majority of the static C-states tables above.
1220                  * For the other types of C-states, however, set the target
1221                  * residency to 3 times the exit latency which should lead to
1222                  * a reasonable balance between energy-efficiency and
1223                  * performance in the majority of interesting cases.
1224                  */
1225                 state->target_residency = cx->latency;
1226                 if (cx->type > ACPI_STATE_C1)
1227                         state->target_residency *= 3;
1228
1229                 state->flags = MWAIT2flg(cx->address);
1230                 if (cx->type > ACPI_STATE_C2)
1231                         state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1232
1233                 state->enter = intel_idle;
1234                 state->enter_s2idle = intel_idle_s2idle;
1235         }
1236 }
1237
1238 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1239 {
1240         int cstate, limit;
1241
1242         /*
1243          * If there are no _CST C-states, do not disable any C-states by
1244          * default.
1245          */
1246         if (!acpi_state_table.count)
1247                 return false;
1248
1249         limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1250         /*
1251          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1252          * the interesting states are ACPI_CSTATE_FFH.
1253          */
1254         for (cstate = 1; cstate < limit; cstate++) {
1255                 if (acpi_state_table.states[cstate].address == mwait_hint)
1256                         return false;
1257         }
1258         return true;
1259 }
1260 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1261 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1262 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1263 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1264 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1265
1266 /*
1267  * ivt_idle_state_table_update(void)
1268  *
1269  * Tune IVT multi-socket targets
1270  * Assumption: num_sockets == (max_package_num + 1)
1271  */
1272 static void __init ivt_idle_state_table_update(void)
1273 {
1274         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1275         int cpu, package_num, num_sockets = 1;
1276
1277         for_each_online_cpu(cpu) {
1278                 package_num = topology_physical_package_id(cpu);
1279                 if (package_num + 1 > num_sockets) {
1280                         num_sockets = package_num + 1;
1281
1282                         if (num_sockets > 4) {
1283                                 cpuidle_state_table = ivt_cstates_8s;
1284                                 return;
1285                         }
1286                 }
1287         }
1288
1289         if (num_sockets > 2)
1290                 cpuidle_state_table = ivt_cstates_4s;
1291
1292         /* else, 1 and 2 socket systems use default ivt_cstates */
1293 }
1294
1295 /**
1296  * irtl_2_usec - IRTL to microseconds conversion.
1297  * @irtl: IRTL MSR value.
1298  *
1299  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1300  */
1301 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1302 {
1303         static const unsigned int irtl_ns_units[] __initconst = {
1304                 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1305         };
1306         unsigned long long ns;
1307
1308         if (!irtl)
1309                 return 0;
1310
1311         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1312
1313         return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1314 }
1315
1316 /*
1317  * bxt_idle_state_table_update(void)
1318  *
1319  * On BXT, we trust the IRTL to show the definitive maximum latency
1320  * We use the same value for target_residency.
1321  */
1322 static void __init bxt_idle_state_table_update(void)
1323 {
1324         unsigned long long msr;
1325         unsigned int usec;
1326
1327         rdmsrl(MSR_PKGC6_IRTL, msr);
1328         usec = irtl_2_usec(msr);
1329         if (usec) {
1330                 bxt_cstates[2].exit_latency = usec;
1331                 bxt_cstates[2].target_residency = usec;
1332         }
1333
1334         rdmsrl(MSR_PKGC7_IRTL, msr);
1335         usec = irtl_2_usec(msr);
1336         if (usec) {
1337                 bxt_cstates[3].exit_latency = usec;
1338                 bxt_cstates[3].target_residency = usec;
1339         }
1340
1341         rdmsrl(MSR_PKGC8_IRTL, msr);
1342         usec = irtl_2_usec(msr);
1343         if (usec) {
1344                 bxt_cstates[4].exit_latency = usec;
1345                 bxt_cstates[4].target_residency = usec;
1346         }
1347
1348         rdmsrl(MSR_PKGC9_IRTL, msr);
1349         usec = irtl_2_usec(msr);
1350         if (usec) {
1351                 bxt_cstates[5].exit_latency = usec;
1352                 bxt_cstates[5].target_residency = usec;
1353         }
1354
1355         rdmsrl(MSR_PKGC10_IRTL, msr);
1356         usec = irtl_2_usec(msr);
1357         if (usec) {
1358                 bxt_cstates[6].exit_latency = usec;
1359                 bxt_cstates[6].target_residency = usec;
1360         }
1361
1362 }
1363 /*
1364  * sklh_idle_state_table_update(void)
1365  *
1366  * On SKL-H (model 0x5e) disable C8 and C9 if:
1367  * C10 is enabled and SGX disabled
1368  */
1369 static void __init sklh_idle_state_table_update(void)
1370 {
1371         unsigned long long msr;
1372         unsigned int eax, ebx, ecx, edx;
1373
1374
1375         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1376         if (max_cstate <= 7)
1377                 return;
1378
1379         /* if PC10 not present in CPUID.MWAIT.EDX */
1380         if ((mwait_substates & (0xF << 28)) == 0)
1381                 return;
1382
1383         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1384
1385         /* PC10 is not enabled in PKG C-state limit */
1386         if ((msr & 0xF) != 8)
1387                 return;
1388
1389         ecx = 0;
1390         cpuid(7, &eax, &ebx, &ecx, &edx);
1391
1392         /* if SGX is present */
1393         if (ebx & (1 << 2)) {
1394
1395                 rdmsrl(MSR_IA32_FEAT_CTL, msr);
1396
1397                 /* if SGX is enabled */
1398                 if (msr & (1 << 18))
1399                         return;
1400         }
1401
1402         skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
1403         skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
1404 }
1405
1406 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1407 {
1408         unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1409         unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1410                                         MWAIT_SUBSTATE_MASK;
1411
1412         /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1413         if (num_substates == 0)
1414                 return false;
1415
1416         if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1417                 mark_tsc_unstable("TSC halts in idle states deeper than C2");
1418
1419         return true;
1420 }
1421
1422 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1423 {
1424         int cstate;
1425
1426         switch (boot_cpu_data.x86_model) {
1427         case INTEL_FAM6_IVYBRIDGE_X:
1428                 ivt_idle_state_table_update();
1429                 break;
1430         case INTEL_FAM6_ATOM_GOLDMONT:
1431         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1432                 bxt_idle_state_table_update();
1433                 break;
1434         case INTEL_FAM6_SKYLAKE:
1435                 sklh_idle_state_table_update();
1436                 break;
1437         }
1438
1439         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1440                 unsigned int mwait_hint;
1441
1442                 if (intel_idle_max_cstate_reached(cstate))
1443                         break;
1444
1445                 if (!cpuidle_state_table[cstate].enter &&
1446                     !cpuidle_state_table[cstate].enter_s2idle)
1447                         break;
1448
1449                 /* If marked as unusable, skip this state. */
1450                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1451                         pr_debug("state %s is disabled\n",
1452                                  cpuidle_state_table[cstate].name);
1453                         continue;
1454                 }
1455
1456                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1457                 if (!intel_idle_verify_cstate(mwait_hint))
1458                         continue;
1459
1460                 /* Structure copy. */
1461                 drv->states[drv->state_count] = cpuidle_state_table[cstate];
1462
1463                 if (icpu->use_acpi && intel_idle_off_by_default(mwait_hint) &&
1464                     !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))
1465                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1466
1467                 drv->state_count++;
1468         }
1469
1470         if (icpu->byt_auto_demotion_disable_flag) {
1471                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1472                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1473         }
1474 }
1475
1476 /*
1477  * intel_idle_cpuidle_driver_init()
1478  * allocate, initialize cpuidle_states
1479  */
1480 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1481 {
1482         cpuidle_poll_state_init(drv);
1483         drv->state_count = 1;
1484
1485         if (icpu)
1486                 intel_idle_init_cstates_icpu(drv);
1487         else
1488                 intel_idle_init_cstates_acpi(drv);
1489 }
1490
1491 static void auto_demotion_disable(void)
1492 {
1493         unsigned long long msr_bits;
1494
1495         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1496         msr_bits &= ~(icpu->auto_demotion_disable_flags);
1497         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1498 }
1499
1500 static void c1e_promotion_disable(void)
1501 {
1502         unsigned long long msr_bits;
1503
1504         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1505         msr_bits &= ~0x2;
1506         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1507 }
1508
1509 /*
1510  * intel_idle_cpu_init()
1511  * allocate, initialize, register cpuidle_devices
1512  * @cpu: cpu/core to initialize
1513  */
1514 static int intel_idle_cpu_init(unsigned int cpu)
1515 {
1516         struct cpuidle_device *dev;
1517
1518         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1519         dev->cpu = cpu;
1520
1521         if (cpuidle_register_device(dev)) {
1522                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1523                 return -EIO;
1524         }
1525
1526         if (!icpu)
1527                 return 0;
1528
1529         if (icpu->auto_demotion_disable_flags)
1530                 auto_demotion_disable();
1531
1532         if (icpu->disable_promotion_to_c1e)
1533                 c1e_promotion_disable();
1534
1535         return 0;
1536 }
1537
1538 static int intel_idle_cpu_online(unsigned int cpu)
1539 {
1540         struct cpuidle_device *dev;
1541
1542         if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1543                 tick_broadcast_enable();
1544
1545         /*
1546          * Some systems can hotplug a cpu at runtime after
1547          * the kernel has booted, we have to initialize the
1548          * driver in this case
1549          */
1550         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1551         if (!dev->registered)
1552                 return intel_idle_cpu_init(cpu);
1553
1554         return 0;
1555 }
1556
1557 /**
1558  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1559  */
1560 static void __init intel_idle_cpuidle_devices_uninit(void)
1561 {
1562         int i;
1563
1564         for_each_online_cpu(i)
1565                 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1566 }
1567
1568 static int __init intel_idle_init(void)
1569 {
1570         const struct x86_cpu_id *id;
1571         unsigned int eax, ebx, ecx;
1572         int retval;
1573
1574         /* Do not load intel_idle at all for now if idle= is passed */
1575         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1576                 return -ENODEV;
1577
1578         if (max_cstate == 0) {
1579                 pr_debug("disabled\n");
1580                 return -EPERM;
1581         }
1582
1583         id = x86_match_cpu(intel_idle_ids);
1584         if (id) {
1585                 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1586                         pr_debug("Please enable MWAIT in BIOS SETUP\n");
1587                         return -ENODEV;
1588                 }
1589         } else {
1590                 id = x86_match_cpu(intel_mwait_ids);
1591                 if (!id)
1592                         return -ENODEV;
1593         }
1594
1595         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1596                 return -ENODEV;
1597
1598         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1599
1600         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1601             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1602             !mwait_substates)
1603                         return -ENODEV;
1604
1605         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1606
1607         icpu = (const struct idle_cpu *)id->driver_data;
1608         if (icpu) {
1609                 cpuidle_state_table = icpu->state_table;
1610                 if (icpu->use_acpi)
1611                         intel_idle_acpi_cst_extract();
1612         } else if (!intel_idle_acpi_cst_extract()) {
1613                 return -ENODEV;
1614         }
1615
1616         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1617                  boot_cpu_data.x86_model);
1618
1619         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1620         if (!intel_idle_cpuidle_devices)
1621                 return -ENOMEM;
1622
1623         intel_idle_cpuidle_driver_init(&intel_idle_driver);
1624
1625         retval = cpuidle_register_driver(&intel_idle_driver);
1626         if (retval) {
1627                 struct cpuidle_driver *drv = cpuidle_get_driver();
1628                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1629                        drv ? drv->name : "none");
1630                 goto init_driver_fail;
1631         }
1632
1633         if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
1634                 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1635
1636         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1637                                    intel_idle_cpu_online, NULL);
1638         if (retval < 0)
1639                 goto hp_setup_fail;
1640
1641         pr_debug("lapic_timer_reliable_states 0x%x\n",
1642                  lapic_timer_reliable_states);
1643
1644         return 0;
1645
1646 hp_setup_fail:
1647         intel_idle_cpuidle_devices_uninit();
1648         cpuidle_unregister_driver(&intel_idle_driver);
1649 init_driver_fail:
1650         free_percpu(intel_idle_cpuidle_devices);
1651         return retval;
1652
1653 }
1654 device_initcall(intel_idle_init);
1655
1656 /*
1657  * We are not really modular, but we used to support that.  Meaning we also
1658  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1659  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1660  * is the easiest way (currently) to continue doing that.
1661  */
1662 module_param(max_cstate, int, 0444);