2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eugeni Dodonov <eugeni.dodonov@intel.com>
28 #include <linux/cpufreq.h>
29 #include <linux/module.h>
30 #include <linux/pm_runtime.h>
32 #include <drm/drm_atomic_helper.h>
33 #include <drm/drm_fourcc.h>
34 #include <drm/drm_plane_helper.h>
37 #include "intel_drv.h"
38 #include "intel_fbc.h"
40 #include "intel_sprite.h"
41 #include "../../../platform/x86/intel_ips.h"
46 * RC6 is a special power stage which allows the GPU to enter an very
47 * low-voltage mode when idle, using down to 0V while at this stage. This
48 * stage is entered automatically when the GPU is idle when RC6 support is
49 * enabled, and as soon as new workload arises GPU wakes up automatically as well.
51 * There are different RC6 modes available in Intel GPU, which differentiate
52 * among each other with the latency required to enter and leave RC6 and
53 * voltage consumed by the GPU in different states.
55 * The combination of the following flags define which states GPU is allowed
56 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
57 * RC6pp is deepest RC6. Their support by hardware varies according to the
58 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
59 * which brings the most power savings; deeper states save more power, but
60 * require higher latency to switch to and wake up.
63 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
65 if (HAS_LLC(dev_priv)) {
67 * WaCompressedResourceDisplayNewHashMode:skl,kbl
68 * Display WA #0390: skl,kbl
70 * Must match Sampler, Pixel Back End, and Media. See
71 * WaCompressedResourceSamplerPbeMediaNewHashMode.
73 I915_WRITE(CHICKEN_PAR1_1,
74 I915_READ(CHICKEN_PAR1_1) |
75 SKL_DE_COMPRESSED_HASH_MODE);
78 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
79 I915_WRITE(CHICKEN_PAR1_1,
80 I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
82 /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
83 I915_WRITE(GEN8_CHICKEN_DCPR_1,
84 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
86 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
87 /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
88 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
90 DISP_FBC_MEMORY_WAKE);
92 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
93 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
94 ILK_DPFC_DISABLE_DUMMY0);
96 if (IS_SKYLAKE(dev_priv)) {
97 /* WaDisableDopClockGating */
98 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
99 & ~GEN7_DOP_CLOCK_GATE_ENABLE);
103 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
105 gen9_init_clock_gating(dev_priv);
107 /* WaDisableSDEUnitClockGating:bxt */
108 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
109 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
113 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
115 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
116 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
119 * Wa: Backlight PWM may stop in the asserted state, causing backlight
122 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
123 PWM1_GATING_DIS | PWM2_GATING_DIS);
126 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
128 gen9_init_clock_gating(dev_priv);
131 * WaDisablePWMClockGating:glk
132 * Backlight PWM may stop in the asserted state, causing backlight
135 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
136 PWM1_GATING_DIS | PWM2_GATING_DIS);
138 /* WaDDIIOTimeout:glk */
139 if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
140 u32 val = I915_READ(CHICKEN_MISC_2);
141 val &= ~(GLK_CL0_PWR_DOWN |
144 I915_WRITE(CHICKEN_MISC_2, val);
149 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
153 tmp = I915_READ(CLKCFG);
155 switch (tmp & CLKCFG_FSB_MASK) {
157 dev_priv->fsb_freq = 533; /* 133*4 */
160 dev_priv->fsb_freq = 800; /* 200*4 */
163 dev_priv->fsb_freq = 667; /* 167*4 */
166 dev_priv->fsb_freq = 400; /* 100*4 */
170 switch (tmp & CLKCFG_MEM_MASK) {
172 dev_priv->mem_freq = 533;
175 dev_priv->mem_freq = 667;
178 dev_priv->mem_freq = 800;
182 /* detect pineview DDR3 setting */
183 tmp = I915_READ(CSHRDDR3CTL);
184 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
187 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
191 ddrpll = I915_READ16(DDRMPLL1);
192 csipll = I915_READ16(CSIPLL0);
194 switch (ddrpll & 0xff) {
196 dev_priv->mem_freq = 800;
199 dev_priv->mem_freq = 1066;
202 dev_priv->mem_freq = 1333;
205 dev_priv->mem_freq = 1600;
208 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
210 dev_priv->mem_freq = 0;
214 dev_priv->ips.r_t = dev_priv->mem_freq;
216 switch (csipll & 0x3ff) {
218 dev_priv->fsb_freq = 3200;
221 dev_priv->fsb_freq = 3733;
224 dev_priv->fsb_freq = 4266;
227 dev_priv->fsb_freq = 4800;
230 dev_priv->fsb_freq = 5333;
233 dev_priv->fsb_freq = 5866;
236 dev_priv->fsb_freq = 6400;
239 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
241 dev_priv->fsb_freq = 0;
245 if (dev_priv->fsb_freq == 3200) {
246 dev_priv->ips.c_m = 0;
247 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
248 dev_priv->ips.c_m = 1;
250 dev_priv->ips.c_m = 2;
254 static const struct cxsr_latency cxsr_latency_table[] = {
255 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */
256 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */
257 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */
258 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */
259 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */
261 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */
262 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */
263 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */
264 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */
265 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */
267 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */
268 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */
269 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */
270 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */
271 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */
273 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */
274 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */
275 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */
276 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */
277 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */
279 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */
280 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */
281 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */
282 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */
283 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */
285 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */
286 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */
287 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */
288 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */
289 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */
292 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
297 const struct cxsr_latency *latency;
300 if (fsb == 0 || mem == 0)
303 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
304 latency = &cxsr_latency_table[i];
305 if (is_desktop == latency->is_desktop &&
306 is_ddr3 == latency->is_ddr3 &&
307 fsb == latency->fsb_freq && mem == latency->mem_freq)
311 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
316 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
320 mutex_lock(&dev_priv->pcu_lock);
322 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
324 val &= ~FORCE_DDR_HIGH_FREQ;
326 val |= FORCE_DDR_HIGH_FREQ;
327 val &= ~FORCE_DDR_LOW_FREQ;
328 val |= FORCE_DDR_FREQ_REQ_ACK;
329 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
331 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
332 FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
333 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
335 mutex_unlock(&dev_priv->pcu_lock);
338 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
342 mutex_lock(&dev_priv->pcu_lock);
344 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
346 val |= DSP_MAXFIFO_PM5_ENABLE;
348 val &= ~DSP_MAXFIFO_PM5_ENABLE;
349 vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val);
351 mutex_unlock(&dev_priv->pcu_lock);
354 #define FW_WM(value, plane) \
355 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
357 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
362 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
363 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
364 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
365 POSTING_READ(FW_BLC_SELF_VLV);
366 } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
367 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
368 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
369 POSTING_READ(FW_BLC_SELF);
370 } else if (IS_PINEVIEW(dev_priv)) {
371 val = I915_READ(DSPFW3);
372 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
374 val |= PINEVIEW_SELF_REFRESH_EN;
376 val &= ~PINEVIEW_SELF_REFRESH_EN;
377 I915_WRITE(DSPFW3, val);
378 POSTING_READ(DSPFW3);
379 } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
380 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
381 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
382 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
383 I915_WRITE(FW_BLC_SELF, val);
384 POSTING_READ(FW_BLC_SELF);
385 } else if (IS_I915GM(dev_priv)) {
387 * FIXME can't find a bit like this for 915G, and
388 * and yet it does have the related watermark in
389 * FW_BLC_SELF. What's going on?
391 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
392 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
393 _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
394 I915_WRITE(INSTPM, val);
395 POSTING_READ(INSTPM);
400 trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
402 DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
403 enableddisabled(enable),
404 enableddisabled(was_enabled));
410 * intel_set_memory_cxsr - Configure CxSR state
411 * @dev_priv: i915 device
412 * @enable: Allow vs. disallow CxSR
414 * Allow or disallow the system to enter a special CxSR
415 * (C-state self refresh) state. What typically happens in CxSR mode
416 * is that several display FIFOs may get combined into a single larger
417 * FIFO for a particular plane (so called max FIFO mode) to allow the
418 * system to defer memory fetches longer, and the memory will enter
421 * Note that enabling CxSR does not guarantee that the system enter
422 * this special mode, nor does it guarantee that the system stays
423 * in that mode once entered. So this just allows/disallows the system
424 * to autonomously utilize the CxSR mode. Other factors such as core
425 * C-states will affect when/if the system actually enters/exits the
428 * Note that on VLV/CHV this actually only controls the max FIFO mode,
429 * and the system is free to enter/exit memory self refresh at any time
430 * even when the use of CxSR has been disallowed.
432 * While the system is actually in the CxSR/max FIFO mode, some plane
433 * control registers will not get latched on vblank. Thus in order to
434 * guarantee the system will respond to changes in the plane registers
435 * we must always disallow CxSR prior to making changes to those registers.
436 * Unfortunately the system will re-evaluate the CxSR conditions at
437 * frame start which happens after vblank start (which is when the plane
438 * registers would get latched), so we can't proceed with the plane update
439 * during the same frame where we disallowed CxSR.
441 * Certain platforms also have a deeper HPLL SR mode. Fortunately the
442 * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
443 * the hardware w.r.t. HPLL SR when writing to plane registers.
444 * Disallowing just CxSR is sufficient.
446 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
450 mutex_lock(&dev_priv->wm.wm_mutex);
451 ret = _intel_set_memory_cxsr(dev_priv, enable);
452 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
453 dev_priv->wm.vlv.cxsr = enable;
454 else if (IS_G4X(dev_priv))
455 dev_priv->wm.g4x.cxsr = enable;
456 mutex_unlock(&dev_priv->wm.wm_mutex);
462 * Latency for FIFO fetches is dependent on several factors:
463 * - memory configuration (speed, channels)
465 * - current MCH state
466 * It can be fairly high in some situations, so here we assume a fairly
467 * pessimal value. It's a tradeoff between extra memory fetches (if we
468 * set this value too high, the FIFO will fetch frequently to stay full)
469 * and power consumption (set it too low to save power and we might see
470 * FIFO underruns and display "flicker").
472 * A value of 5us seems to be a good balance; safe for very low end
473 * platforms but not overly aggressive on lower latency configs.
475 static const int pessimal_latency_ns = 5000;
477 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
478 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
480 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
482 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
483 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
484 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
485 enum pipe pipe = crtc->pipe;
486 int sprite0_start, sprite1_start;
489 u32 dsparb, dsparb2, dsparb3;
491 dsparb = I915_READ(DSPARB);
492 dsparb2 = I915_READ(DSPARB2);
493 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
494 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
497 dsparb = I915_READ(DSPARB);
498 dsparb2 = I915_READ(DSPARB2);
499 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
500 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
503 dsparb2 = I915_READ(DSPARB2);
504 dsparb3 = I915_READ(DSPARB3);
505 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
506 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
513 fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
514 fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
515 fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
516 fifo_state->plane[PLANE_CURSOR] = 63;
519 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
520 enum i9xx_plane_id i9xx_plane)
522 u32 dsparb = I915_READ(DSPARB);
525 size = dsparb & 0x7f;
526 if (i9xx_plane == PLANE_B)
527 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
529 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
530 dsparb, plane_name(i9xx_plane), size);
535 static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
536 enum i9xx_plane_id i9xx_plane)
538 u32 dsparb = I915_READ(DSPARB);
541 size = dsparb & 0x1ff;
542 if (i9xx_plane == PLANE_B)
543 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
544 size >>= 1; /* Convert to cachelines */
546 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
547 dsparb, plane_name(i9xx_plane), size);
552 static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
553 enum i9xx_plane_id i9xx_plane)
555 u32 dsparb = I915_READ(DSPARB);
558 size = dsparb & 0x7f;
559 size >>= 2; /* Convert to cachelines */
561 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
562 dsparb, plane_name(i9xx_plane), size);
567 /* Pineview has different values for various configs */
568 static const struct intel_watermark_params pineview_display_wm = {
569 .fifo_size = PINEVIEW_DISPLAY_FIFO,
570 .max_wm = PINEVIEW_MAX_WM,
571 .default_wm = PINEVIEW_DFT_WM,
572 .guard_size = PINEVIEW_GUARD_WM,
573 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
575 static const struct intel_watermark_params pineview_display_hplloff_wm = {
576 .fifo_size = PINEVIEW_DISPLAY_FIFO,
577 .max_wm = PINEVIEW_MAX_WM,
578 .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
579 .guard_size = PINEVIEW_GUARD_WM,
580 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
582 static const struct intel_watermark_params pineview_cursor_wm = {
583 .fifo_size = PINEVIEW_CURSOR_FIFO,
584 .max_wm = PINEVIEW_CURSOR_MAX_WM,
585 .default_wm = PINEVIEW_CURSOR_DFT_WM,
586 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
587 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
589 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
590 .fifo_size = PINEVIEW_CURSOR_FIFO,
591 .max_wm = PINEVIEW_CURSOR_MAX_WM,
592 .default_wm = PINEVIEW_CURSOR_DFT_WM,
593 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
594 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
596 static const struct intel_watermark_params i965_cursor_wm_info = {
597 .fifo_size = I965_CURSOR_FIFO,
598 .max_wm = I965_CURSOR_MAX_WM,
599 .default_wm = I965_CURSOR_DFT_WM,
601 .cacheline_size = I915_FIFO_LINE_SIZE,
603 static const struct intel_watermark_params i945_wm_info = {
604 .fifo_size = I945_FIFO_SIZE,
605 .max_wm = I915_MAX_WM,
608 .cacheline_size = I915_FIFO_LINE_SIZE,
610 static const struct intel_watermark_params i915_wm_info = {
611 .fifo_size = I915_FIFO_SIZE,
612 .max_wm = I915_MAX_WM,
615 .cacheline_size = I915_FIFO_LINE_SIZE,
617 static const struct intel_watermark_params i830_a_wm_info = {
618 .fifo_size = I855GM_FIFO_SIZE,
619 .max_wm = I915_MAX_WM,
622 .cacheline_size = I830_FIFO_LINE_SIZE,
624 static const struct intel_watermark_params i830_bc_wm_info = {
625 .fifo_size = I855GM_FIFO_SIZE,
626 .max_wm = I915_MAX_WM/2,
629 .cacheline_size = I830_FIFO_LINE_SIZE,
631 static const struct intel_watermark_params i845_wm_info = {
632 .fifo_size = I830_FIFO_SIZE,
633 .max_wm = I915_MAX_WM,
636 .cacheline_size = I830_FIFO_LINE_SIZE,
640 * intel_wm_method1 - Method 1 / "small buffer" watermark formula
641 * @pixel_rate: Pipe pixel rate in kHz
642 * @cpp: Plane bytes per pixel
643 * @latency: Memory wakeup latency in 0.1us units
645 * Compute the watermark using the method 1 or "small buffer"
646 * formula. The caller may additonally add extra cachelines
647 * to account for TLB misses and clock crossings.
649 * This method is concerned with the short term drain rate
650 * of the FIFO, ie. it does not account for blanking periods
651 * which would effectively reduce the average drain rate across
652 * a longer period. The name "small" refers to the fact the
653 * FIFO is relatively small compared to the amount of data
656 * The FIFO level vs. time graph might look something like:
660 * __---__---__ (- plane active, _ blanking)
663 * or perhaps like this:
666 * __----__----__ (- plane active, _ blanking)
670 * The watermark in bytes
672 static unsigned int intel_wm_method1(unsigned int pixel_rate,
674 unsigned int latency)
678 ret = (u64)pixel_rate * cpp * latency;
679 ret = DIV_ROUND_UP_ULL(ret, 10000);
685 * intel_wm_method2 - Method 2 / "large buffer" watermark formula
686 * @pixel_rate: Pipe pixel rate in kHz
687 * @htotal: Pipe horizontal total
688 * @width: Plane width in pixels
689 * @cpp: Plane bytes per pixel
690 * @latency: Memory wakeup latency in 0.1us units
692 * Compute the watermark using the method 2 or "large buffer"
693 * formula. The caller may additonally add extra cachelines
694 * to account for TLB misses and clock crossings.
696 * This method is concerned with the long term drain rate
697 * of the FIFO, ie. it does account for blanking periods
698 * which effectively reduce the average drain rate across
699 * a longer period. The name "large" refers to the fact the
700 * FIFO is relatively large compared to the amount of data
703 * The FIFO level vs. time graph might look something like:
708 * __ --__--__--__--__--__--__ (- plane active, _ blanking)
712 * The watermark in bytes
714 static unsigned int intel_wm_method2(unsigned int pixel_rate,
718 unsigned int latency)
723 * FIXME remove once all users are computing
724 * watermarks in the correct place.
726 if (WARN_ON_ONCE(htotal == 0))
729 ret = (latency * pixel_rate) / (htotal * 10000);
730 ret = (ret + 1) * width * cpp;
736 * intel_calculate_wm - calculate watermark level
737 * @pixel_rate: pixel clock
738 * @wm: chip FIFO params
739 * @fifo_size: size of the FIFO buffer
740 * @cpp: bytes per pixel
741 * @latency_ns: memory latency for the platform
743 * Calculate the watermark level (the level at which the display plane will
744 * start fetching from memory again). Each chip has a different display
745 * FIFO size and allocation, so the caller needs to figure that out and pass
746 * in the correct intel_watermark_params structure.
748 * As the pixel clock runs, the FIFO will be drained at a rate that depends
749 * on the pixel size. When it reaches the watermark level, it'll start
750 * fetching FIFO line sized based chunks from memory until the FIFO fills
751 * past the watermark point. If the FIFO drains completely, a FIFO underrun
752 * will occur, and a display engine hang could result.
754 static unsigned int intel_calculate_wm(int pixel_rate,
755 const struct intel_watermark_params *wm,
756 int fifo_size, int cpp,
757 unsigned int latency_ns)
759 int entries, wm_size;
762 * Note: we need to make sure we don't overflow for various clock &
764 * clocks go from a few thousand to several hundred thousand.
765 * latency is usually a few thousand
767 entries = intel_wm_method1(pixel_rate, cpp,
769 entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
771 DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
773 wm_size = fifo_size - entries;
774 DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
776 /* Don't promote wm_size to unsigned... */
777 if (wm_size > wm->max_wm)
778 wm_size = wm->max_wm;
780 wm_size = wm->default_wm;
783 * Bspec seems to indicate that the value shouldn't be lower than
784 * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
785 * Lets go for 8 which is the burst size since certain platforms
786 * already use a hardcoded 8 (which is what the spec says should be
795 static bool is_disabling(int old, int new, int threshold)
797 return old >= threshold && new < threshold;
800 static bool is_enabling(int old, int new, int threshold)
802 return old < threshold && new >= threshold;
805 static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
807 return dev_priv->wm.max_level + 1;
810 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
811 const struct intel_plane_state *plane_state)
813 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
815 /* FIXME check the 'enable' instead */
816 if (!crtc_state->base.active)
820 * Treat cursor with fb as always visible since cursor updates
821 * can happen faster than the vrefresh rate, and the current
822 * watermark code doesn't handle that correctly. Cursor updates
823 * which set/clear the fb or change the cursor size are going
824 * to get throttled by intel_legacy_cursor_update() to work
825 * around this problem with the watermark code.
827 if (plane->id == PLANE_CURSOR)
828 return plane_state->base.fb != NULL;
830 return plane_state->base.visible;
833 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
835 struct intel_crtc *crtc, *enabled = NULL;
837 for_each_intel_crtc(&dev_priv->drm, crtc) {
838 if (intel_crtc_active(crtc)) {
848 static void pineview_update_wm(struct intel_crtc *unused_crtc)
850 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
851 struct intel_crtc *crtc;
852 const struct cxsr_latency *latency;
856 latency = intel_get_cxsr_latency(!IS_MOBILE(dev_priv),
861 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
862 intel_set_memory_cxsr(dev_priv, false);
866 crtc = single_enabled_crtc(dev_priv);
868 const struct drm_display_mode *adjusted_mode =
869 &crtc->config->base.adjusted_mode;
870 const struct drm_framebuffer *fb =
871 crtc->base.primary->state->fb;
872 int cpp = fb->format->cpp[0];
873 int clock = adjusted_mode->crtc_clock;
876 wm = intel_calculate_wm(clock, &pineview_display_wm,
877 pineview_display_wm.fifo_size,
878 cpp, latency->display_sr);
879 reg = I915_READ(DSPFW1);
880 reg &= ~DSPFW_SR_MASK;
881 reg |= FW_WM(wm, SR);
882 I915_WRITE(DSPFW1, reg);
883 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
886 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
887 pineview_display_wm.fifo_size,
888 4, latency->cursor_sr);
889 reg = I915_READ(DSPFW3);
890 reg &= ~DSPFW_CURSOR_SR_MASK;
891 reg |= FW_WM(wm, CURSOR_SR);
892 I915_WRITE(DSPFW3, reg);
894 /* Display HPLL off SR */
895 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
896 pineview_display_hplloff_wm.fifo_size,
897 cpp, latency->display_hpll_disable);
898 reg = I915_READ(DSPFW3);
899 reg &= ~DSPFW_HPLL_SR_MASK;
900 reg |= FW_WM(wm, HPLL_SR);
901 I915_WRITE(DSPFW3, reg);
903 /* cursor HPLL off SR */
904 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
905 pineview_display_hplloff_wm.fifo_size,
906 4, latency->cursor_hpll_disable);
907 reg = I915_READ(DSPFW3);
908 reg &= ~DSPFW_HPLL_CURSOR_MASK;
909 reg |= FW_WM(wm, HPLL_CURSOR);
910 I915_WRITE(DSPFW3, reg);
911 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
913 intel_set_memory_cxsr(dev_priv, true);
915 intel_set_memory_cxsr(dev_priv, false);
920 * Documentation says:
921 * "If the line size is small, the TLB fetches can get in the way of the
922 * data fetches, causing some lag in the pixel data return which is not
923 * accounted for in the above formulas. The following adjustment only
924 * needs to be applied if eight whole lines fit in the buffer at once.
925 * The WM is adjusted upwards by the difference between the FIFO size
926 * and the size of 8 whole lines. This adjustment is always performed
927 * in the actual pixel depth regardless of whether FBC is enabled or not."
929 static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
931 int tlb_miss = fifo_size * 64 - width * cpp * 8;
933 return max(0, tlb_miss);
936 static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
937 const struct g4x_wm_values *wm)
941 for_each_pipe(dev_priv, pipe)
942 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
945 FW_WM(wm->sr.plane, SR) |
946 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
947 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
948 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
950 (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
951 FW_WM(wm->sr.fbc, FBC_SR) |
952 FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
953 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
954 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
955 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
957 (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
958 FW_WM(wm->sr.cursor, CURSOR_SR) |
959 FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
960 FW_WM(wm->hpll.plane, HPLL_SR));
962 POSTING_READ(DSPFW1);
965 #define FW_WM_VLV(value, plane) \
966 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
968 static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
969 const struct vlv_wm_values *wm)
973 for_each_pipe(dev_priv, pipe) {
974 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
976 I915_WRITE(VLV_DDL(pipe),
977 (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
978 (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
979 (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
980 (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
984 * Zero the (unused) WM1 watermarks, and also clear all the
985 * high order bits so that there are no out of bounds values
986 * present in the registers during the reprogramming.
988 I915_WRITE(DSPHOWM, 0);
989 I915_WRITE(DSPHOWM1, 0);
990 I915_WRITE(DSPFW4, 0);
991 I915_WRITE(DSPFW5, 0);
992 I915_WRITE(DSPFW6, 0);
995 FW_WM(wm->sr.plane, SR) |
996 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
997 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
998 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
1000 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
1001 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
1002 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
1004 FW_WM(wm->sr.cursor, CURSOR_SR));
1006 if (IS_CHERRYVIEW(dev_priv)) {
1007 I915_WRITE(DSPFW7_CHV,
1008 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1009 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1010 I915_WRITE(DSPFW8_CHV,
1011 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1012 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1013 I915_WRITE(DSPFW9_CHV,
1014 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1015 FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1017 FW_WM(wm->sr.plane >> 9, SR_HI) |
1018 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1019 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1020 FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1021 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1022 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1023 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1024 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1025 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1026 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1029 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1030 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1032 FW_WM(wm->sr.plane >> 9, SR_HI) |
1033 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1034 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1035 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1036 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1037 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1038 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1041 POSTING_READ(DSPFW1);
1046 static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1048 /* all latencies in usec */
1049 dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1050 dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1051 dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1053 dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1056 static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1059 * DSPCNTR[13] supposedly controls whether the
1060 * primary plane can use the FIFO space otherwise
1061 * reserved for the sprite plane. It's not 100% clear
1062 * what the actual FIFO size is, but it looks like we
1063 * can happily set both primary and sprite watermarks
1064 * up to 127 cachelines. So that would seem to mean
1065 * that either DSPCNTR[13] doesn't do anything, or that
1066 * the total FIFO is >= 256 cachelines in size. Either
1067 * way, we don't seem to have to worry about this
1068 * repartitioning as the maximum watermark value the
1069 * register can hold for each plane is lower than the
1070 * minimum FIFO size.
1076 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1078 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1080 MISSING_CASE(plane_id);
1085 static int g4x_fbc_fifo_size(int level)
1088 case G4X_WM_LEVEL_SR:
1090 case G4X_WM_LEVEL_HPLL:
1093 MISSING_CASE(level);
1098 static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1099 const struct intel_plane_state *plane_state,
1102 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1103 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1104 const struct drm_display_mode *adjusted_mode =
1105 &crtc_state->base.adjusted_mode;
1106 unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1107 unsigned int clock, htotal, cpp, width, wm;
1112 if (!intel_wm_plane_visible(crtc_state, plane_state))
1116 * Not 100% sure which way ELK should go here as the
1117 * spec only says CL/CTG should assume 32bpp and BW
1118 * doesn't need to. But as these things followed the
1119 * mobile vs. desktop lines on gen3 as well, let's
1120 * assume ELK doesn't need this.
1122 * The spec also fails to list such a restriction for
1123 * the HPLL watermark, which seems a little strange.
1124 * Let's use 32bpp for the HPLL watermark as well.
1126 if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1127 level != G4X_WM_LEVEL_NORMAL)
1130 cpp = plane_state->base.fb->format->cpp[0];
1132 clock = adjusted_mode->crtc_clock;
1133 htotal = adjusted_mode->crtc_htotal;
1135 if (plane->id == PLANE_CURSOR)
1136 width = plane_state->base.crtc_w;
1138 width = drm_rect_width(&plane_state->base.dst);
1140 if (plane->id == PLANE_CURSOR) {
1141 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1142 } else if (plane->id == PLANE_PRIMARY &&
1143 level == G4X_WM_LEVEL_NORMAL) {
1144 wm = intel_wm_method1(clock, cpp, latency);
1146 unsigned int small, large;
1148 small = intel_wm_method1(clock, cpp, latency);
1149 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1151 wm = min(small, large);
1154 wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1157 wm = DIV_ROUND_UP(wm, 64) + 2;
1159 return min_t(unsigned int, wm, USHRT_MAX);
1162 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1163 int level, enum plane_id plane_id, u16 value)
1165 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1168 for (; level < intel_wm_num_levels(dev_priv); level++) {
1169 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1171 dirty |= raw->plane[plane_id] != value;
1172 raw->plane[plane_id] = value;
1178 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1179 int level, u16 value)
1181 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1184 /* NORMAL level doesn't have an FBC watermark */
1185 level = max(level, G4X_WM_LEVEL_SR);
1187 for (; level < intel_wm_num_levels(dev_priv); level++) {
1188 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1190 dirty |= raw->fbc != value;
1197 static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1198 const struct intel_plane_state *pstate,
1201 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1202 const struct intel_plane_state *plane_state)
1204 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1205 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1206 enum plane_id plane_id = plane->id;
1210 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1211 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1212 if (plane_id == PLANE_PRIMARY)
1213 dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1217 for (level = 0; level < num_levels; level++) {
1218 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1221 wm = g4x_compute_wm(crtc_state, plane_state, level);
1222 max_wm = g4x_plane_fifo_size(plane_id, level);
1227 dirty |= raw->plane[plane_id] != wm;
1228 raw->plane[plane_id] = wm;
1230 if (plane_id != PLANE_PRIMARY ||
1231 level == G4X_WM_LEVEL_NORMAL)
1234 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1235 raw->plane[plane_id]);
1236 max_wm = g4x_fbc_fifo_size(level);
1239 * FBC wm is not mandatory as we
1240 * can always just disable its use.
1245 dirty |= raw->fbc != wm;
1249 /* mark watermarks as invalid */
1250 dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1252 if (plane_id == PLANE_PRIMARY)
1253 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1257 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1259 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1260 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1261 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1263 if (plane_id == PLANE_PRIMARY)
1264 DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1265 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1266 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1272 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1273 enum plane_id plane_id, int level)
1275 const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1277 return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1280 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1283 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1285 if (level > dev_priv->wm.max_level)
1288 return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1289 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1290 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1293 /* mark all levels starting from 'level' as invalid */
1294 static void g4x_invalidate_wms(struct intel_crtc *crtc,
1295 struct g4x_wm_state *wm_state, int level)
1297 if (level <= G4X_WM_LEVEL_NORMAL) {
1298 enum plane_id plane_id;
1300 for_each_plane_id_on_crtc(crtc, plane_id)
1301 wm_state->wm.plane[plane_id] = USHRT_MAX;
1304 if (level <= G4X_WM_LEVEL_SR) {
1305 wm_state->cxsr = false;
1306 wm_state->sr.cursor = USHRT_MAX;
1307 wm_state->sr.plane = USHRT_MAX;
1308 wm_state->sr.fbc = USHRT_MAX;
1311 if (level <= G4X_WM_LEVEL_HPLL) {
1312 wm_state->hpll_en = false;
1313 wm_state->hpll.cursor = USHRT_MAX;
1314 wm_state->hpll.plane = USHRT_MAX;
1315 wm_state->hpll.fbc = USHRT_MAX;
1319 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1321 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1322 struct intel_atomic_state *state =
1323 to_intel_atomic_state(crtc_state->base.state);
1324 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1325 int num_active_planes = hweight32(crtc_state->active_planes &
1326 ~BIT(PLANE_CURSOR));
1327 const struct g4x_pipe_wm *raw;
1328 const struct intel_plane_state *old_plane_state;
1329 const struct intel_plane_state *new_plane_state;
1330 struct intel_plane *plane;
1331 enum plane_id plane_id;
1333 unsigned int dirty = 0;
1335 for_each_oldnew_intel_plane_in_state(state, plane,
1337 new_plane_state, i) {
1338 if (new_plane_state->base.crtc != &crtc->base &&
1339 old_plane_state->base.crtc != &crtc->base)
1342 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
1343 dirty |= BIT(plane->id);
1349 level = G4X_WM_LEVEL_NORMAL;
1350 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1353 raw = &crtc_state->wm.g4x.raw[level];
1354 for_each_plane_id_on_crtc(crtc, plane_id)
1355 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1357 level = G4X_WM_LEVEL_SR;
1359 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1362 raw = &crtc_state->wm.g4x.raw[level];
1363 wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1364 wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1365 wm_state->sr.fbc = raw->fbc;
1367 wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1369 level = G4X_WM_LEVEL_HPLL;
1371 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1374 raw = &crtc_state->wm.g4x.raw[level];
1375 wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1376 wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1377 wm_state->hpll.fbc = raw->fbc;
1379 wm_state->hpll_en = wm_state->cxsr;
1384 if (level == G4X_WM_LEVEL_NORMAL)
1387 /* invalidate the higher levels */
1388 g4x_invalidate_wms(crtc, wm_state, level);
1391 * Determine if the FBC watermark(s) can be used. IF
1392 * this isn't the case we prefer to disable the FBC
1393 ( watermark(s) rather than disable the SR/HPLL
1394 * level(s) entirely.
1396 wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1398 if (level >= G4X_WM_LEVEL_SR &&
1399 wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1400 wm_state->fbc_en = false;
1401 else if (level >= G4X_WM_LEVEL_HPLL &&
1402 wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1403 wm_state->fbc_en = false;
1408 static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
1410 struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
1411 struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1412 const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1413 struct intel_atomic_state *intel_state =
1414 to_intel_atomic_state(new_crtc_state->base.state);
1415 const struct intel_crtc_state *old_crtc_state =
1416 intel_atomic_get_old_crtc_state(intel_state, crtc);
1417 const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
1418 enum plane_id plane_id;
1420 if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
1421 *intermediate = *optimal;
1423 intermediate->cxsr = false;
1424 intermediate->hpll_en = false;
1428 intermediate->cxsr = optimal->cxsr && active->cxsr &&
1429 !new_crtc_state->disable_cxsr;
1430 intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1431 !new_crtc_state->disable_cxsr;
1432 intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1434 for_each_plane_id_on_crtc(crtc, plane_id) {
1435 intermediate->wm.plane[plane_id] =
1436 max(optimal->wm.plane[plane_id],
1437 active->wm.plane[plane_id]);
1439 WARN_ON(intermediate->wm.plane[plane_id] >
1440 g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1443 intermediate->sr.plane = max(optimal->sr.plane,
1445 intermediate->sr.cursor = max(optimal->sr.cursor,
1447 intermediate->sr.fbc = max(optimal->sr.fbc,
1450 intermediate->hpll.plane = max(optimal->hpll.plane,
1451 active->hpll.plane);
1452 intermediate->hpll.cursor = max(optimal->hpll.cursor,
1453 active->hpll.cursor);
1454 intermediate->hpll.fbc = max(optimal->hpll.fbc,
1457 WARN_ON((intermediate->sr.plane >
1458 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1459 intermediate->sr.cursor >
1460 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1461 intermediate->cxsr);
1462 WARN_ON((intermediate->sr.plane >
1463 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1464 intermediate->sr.cursor >
1465 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1466 intermediate->hpll_en);
1468 WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1469 intermediate->fbc_en && intermediate->cxsr);
1470 WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1471 intermediate->fbc_en && intermediate->hpll_en);
1475 * If our intermediate WM are identical to the final WM, then we can
1476 * omit the post-vblank programming; only update if it's different.
1478 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1479 new_crtc_state->wm.need_postvbl_update = true;
1484 static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1485 struct g4x_wm_values *wm)
1487 struct intel_crtc *crtc;
1488 int num_active_crtcs = 0;
1494 for_each_intel_crtc(&dev_priv->drm, crtc) {
1495 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1500 if (!wm_state->cxsr)
1502 if (!wm_state->hpll_en)
1503 wm->hpll_en = false;
1504 if (!wm_state->fbc_en)
1510 if (num_active_crtcs != 1) {
1512 wm->hpll_en = false;
1516 for_each_intel_crtc(&dev_priv->drm, crtc) {
1517 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1518 enum pipe pipe = crtc->pipe;
1520 wm->pipe[pipe] = wm_state->wm;
1521 if (crtc->active && wm->cxsr)
1522 wm->sr = wm_state->sr;
1523 if (crtc->active && wm->hpll_en)
1524 wm->hpll = wm_state->hpll;
1528 static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1530 struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1531 struct g4x_wm_values new_wm = {};
1533 g4x_merge_wm(dev_priv, &new_wm);
1535 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1538 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1539 _intel_set_memory_cxsr(dev_priv, false);
1541 g4x_write_wm_values(dev_priv, &new_wm);
1543 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1544 _intel_set_memory_cxsr(dev_priv, true);
1549 static void g4x_initial_watermarks(struct intel_atomic_state *state,
1550 struct intel_crtc_state *crtc_state)
1552 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1553 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1555 mutex_lock(&dev_priv->wm.wm_mutex);
1556 crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1557 g4x_program_watermarks(dev_priv);
1558 mutex_unlock(&dev_priv->wm.wm_mutex);
1561 static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1562 struct intel_crtc_state *crtc_state)
1564 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1565 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1567 if (!crtc_state->wm.need_postvbl_update)
1570 mutex_lock(&dev_priv->wm.wm_mutex);
1571 intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1572 g4x_program_watermarks(dev_priv);
1573 mutex_unlock(&dev_priv->wm.wm_mutex);
1576 /* latency must be in 0.1us units. */
1577 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1578 unsigned int htotal,
1581 unsigned int latency)
1585 ret = intel_wm_method2(pixel_rate, htotal,
1586 width, cpp, latency);
1587 ret = DIV_ROUND_UP(ret, 64);
1592 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1594 /* all latencies in usec */
1595 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1597 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1599 if (IS_CHERRYVIEW(dev_priv)) {
1600 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1601 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1603 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1607 static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1608 const struct intel_plane_state *plane_state,
1611 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1612 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1613 const struct drm_display_mode *adjusted_mode =
1614 &crtc_state->base.adjusted_mode;
1615 unsigned int clock, htotal, cpp, width, wm;
1617 if (dev_priv->wm.pri_latency[level] == 0)
1620 if (!intel_wm_plane_visible(crtc_state, plane_state))
1623 cpp = plane_state->base.fb->format->cpp[0];
1624 clock = adjusted_mode->crtc_clock;
1625 htotal = adjusted_mode->crtc_htotal;
1626 width = crtc_state->pipe_src_w;
1628 if (plane->id == PLANE_CURSOR) {
1630 * FIXME the formula gives values that are
1631 * too big for the cursor FIFO, and hence we
1632 * would never be able to use cursors. For
1633 * now just hardcode the watermark.
1637 wm = vlv_wm_method2(clock, htotal, width, cpp,
1638 dev_priv->wm.pri_latency[level] * 10);
1641 return min_t(unsigned int, wm, USHRT_MAX);
1644 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1646 return (active_planes & (BIT(PLANE_SPRITE0) |
1647 BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1650 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1652 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1653 const struct g4x_pipe_wm *raw =
1654 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1655 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1656 unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1657 int num_active_planes = hweight32(active_planes);
1658 const int fifo_size = 511;
1659 int fifo_extra, fifo_left = fifo_size;
1660 int sprite0_fifo_extra = 0;
1661 unsigned int total_rate;
1662 enum plane_id plane_id;
1665 * When enabling sprite0 after sprite1 has already been enabled
1666 * we tend to get an underrun unless sprite0 already has some
1667 * FIFO space allcoated. Hence we always allocate at least one
1668 * cacheline for sprite0 whenever sprite1 is enabled.
1670 * All other plane enable sequences appear immune to this problem.
1672 if (vlv_need_sprite0_fifo_workaround(active_planes))
1673 sprite0_fifo_extra = 1;
1675 total_rate = raw->plane[PLANE_PRIMARY] +
1676 raw->plane[PLANE_SPRITE0] +
1677 raw->plane[PLANE_SPRITE1] +
1680 if (total_rate > fifo_size)
1683 if (total_rate == 0)
1686 for_each_plane_id_on_crtc(crtc, plane_id) {
1689 if ((active_planes & BIT(plane_id)) == 0) {
1690 fifo_state->plane[plane_id] = 0;
1694 rate = raw->plane[plane_id];
1695 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1696 fifo_left -= fifo_state->plane[plane_id];
1699 fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1700 fifo_left -= sprite0_fifo_extra;
1702 fifo_state->plane[PLANE_CURSOR] = 63;
1704 fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1706 /* spread the remainder evenly */
1707 for_each_plane_id_on_crtc(crtc, plane_id) {
1713 if ((active_planes & BIT(plane_id)) == 0)
1716 plane_extra = min(fifo_extra, fifo_left);
1717 fifo_state->plane[plane_id] += plane_extra;
1718 fifo_left -= plane_extra;
1721 WARN_ON(active_planes != 0 && fifo_left != 0);
1723 /* give it all to the first plane if none are active */
1724 if (active_planes == 0) {
1725 WARN_ON(fifo_left != fifo_size);
1726 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1732 /* mark all levels starting from 'level' as invalid */
1733 static void vlv_invalidate_wms(struct intel_crtc *crtc,
1734 struct vlv_wm_state *wm_state, int level)
1736 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1738 for (; level < intel_wm_num_levels(dev_priv); level++) {
1739 enum plane_id plane_id;
1741 for_each_plane_id_on_crtc(crtc, plane_id)
1742 wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1744 wm_state->sr[level].cursor = USHRT_MAX;
1745 wm_state->sr[level].plane = USHRT_MAX;
1749 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1754 return fifo_size - wm;
1758 * Starting from 'level' set all higher
1759 * levels to 'value' in the "raw" watermarks.
1761 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1762 int level, enum plane_id plane_id, u16 value)
1764 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1765 int num_levels = intel_wm_num_levels(dev_priv);
1768 for (; level < num_levels; level++) {
1769 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1771 dirty |= raw->plane[plane_id] != value;
1772 raw->plane[plane_id] = value;
1778 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1779 const struct intel_plane_state *plane_state)
1781 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1782 enum plane_id plane_id = plane->id;
1783 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1787 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1788 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1792 for (level = 0; level < num_levels; level++) {
1793 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1794 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1795 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1800 dirty |= raw->plane[plane_id] != wm;
1801 raw->plane[plane_id] = wm;
1804 /* mark all higher levels as invalid */
1805 dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1809 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1811 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1812 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1813 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1818 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1819 enum plane_id plane_id, int level)
1821 const struct g4x_pipe_wm *raw =
1822 &crtc_state->wm.vlv.raw[level];
1823 const struct vlv_fifo_state *fifo_state =
1824 &crtc_state->wm.vlv.fifo_state;
1826 return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1829 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1831 return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1832 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1833 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1834 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1837 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1839 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1840 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1841 struct intel_atomic_state *state =
1842 to_intel_atomic_state(crtc_state->base.state);
1843 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1844 const struct vlv_fifo_state *fifo_state =
1845 &crtc_state->wm.vlv.fifo_state;
1846 int num_active_planes = hweight32(crtc_state->active_planes &
1847 ~BIT(PLANE_CURSOR));
1848 bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
1849 const struct intel_plane_state *old_plane_state;
1850 const struct intel_plane_state *new_plane_state;
1851 struct intel_plane *plane;
1852 enum plane_id plane_id;
1854 unsigned int dirty = 0;
1856 for_each_oldnew_intel_plane_in_state(state, plane,
1858 new_plane_state, i) {
1859 if (new_plane_state->base.crtc != &crtc->base &&
1860 old_plane_state->base.crtc != &crtc->base)
1863 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
1864 dirty |= BIT(plane->id);
1868 * DSPARB registers may have been reset due to the
1869 * power well being turned off. Make sure we restore
1870 * them to a consistent state even if no primary/sprite
1871 * planes are initially active.
1874 crtc_state->fifo_changed = true;
1879 /* cursor changes don't warrant a FIFO recompute */
1880 if (dirty & ~BIT(PLANE_CURSOR)) {
1881 const struct intel_crtc_state *old_crtc_state =
1882 intel_atomic_get_old_crtc_state(state, crtc);
1883 const struct vlv_fifo_state *old_fifo_state =
1884 &old_crtc_state->wm.vlv.fifo_state;
1886 ret = vlv_compute_fifo(crtc_state);
1890 if (needs_modeset ||
1891 memcmp(old_fifo_state, fifo_state,
1892 sizeof(*fifo_state)) != 0)
1893 crtc_state->fifo_changed = true;
1896 /* initially allow all levels */
1897 wm_state->num_levels = intel_wm_num_levels(dev_priv);
1899 * Note that enabling cxsr with no primary/sprite planes
1900 * enabled can wedge the pipe. Hence we only allow cxsr
1901 * with exactly one enabled primary/sprite plane.
1903 wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1905 for (level = 0; level < wm_state->num_levels; level++) {
1906 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1907 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1909 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1912 for_each_plane_id_on_crtc(crtc, plane_id) {
1913 wm_state->wm[level].plane[plane_id] =
1914 vlv_invert_wm_value(raw->plane[plane_id],
1915 fifo_state->plane[plane_id]);
1918 wm_state->sr[level].plane =
1919 vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1920 raw->plane[PLANE_SPRITE0],
1921 raw->plane[PLANE_SPRITE1]),
1924 wm_state->sr[level].cursor =
1925 vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1932 /* limit to only levels we can actually handle */
1933 wm_state->num_levels = level;
1935 /* invalidate the higher levels */
1936 vlv_invalidate_wms(crtc, wm_state, level);
1941 #define VLV_FIFO(plane, value) \
1942 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1944 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1945 struct intel_crtc_state *crtc_state)
1947 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1948 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1949 const struct vlv_fifo_state *fifo_state =
1950 &crtc_state->wm.vlv.fifo_state;
1951 int sprite0_start, sprite1_start, fifo_size;
1953 if (!crtc_state->fifo_changed)
1956 sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1957 sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1958 fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1960 WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1961 WARN_ON(fifo_size != 511);
1963 trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1966 * uncore.lock serves a double purpose here. It allows us to
1967 * use the less expensive I915_{READ,WRITE}_FW() functions, and
1968 * it protects the DSPARB registers from getting clobbered by
1969 * parallel updates from multiple pipes.
1971 * intel_pipe_update_start() has already disabled interrupts
1972 * for us, so a plain spin_lock() is sufficient here.
1974 spin_lock(&dev_priv->uncore.lock);
1976 switch (crtc->pipe) {
1977 u32 dsparb, dsparb2, dsparb3;
1979 dsparb = I915_READ_FW(DSPARB);
1980 dsparb2 = I915_READ_FW(DSPARB2);
1982 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1983 VLV_FIFO(SPRITEB, 0xff));
1984 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1985 VLV_FIFO(SPRITEB, sprite1_start));
1987 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1988 VLV_FIFO(SPRITEB_HI, 0x1));
1989 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1990 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1992 I915_WRITE_FW(DSPARB, dsparb);
1993 I915_WRITE_FW(DSPARB2, dsparb2);
1996 dsparb = I915_READ_FW(DSPARB);
1997 dsparb2 = I915_READ_FW(DSPARB2);
1999 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
2000 VLV_FIFO(SPRITED, 0xff));
2001 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
2002 VLV_FIFO(SPRITED, sprite1_start));
2004 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
2005 VLV_FIFO(SPRITED_HI, 0xff));
2006 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2007 VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2009 I915_WRITE_FW(DSPARB, dsparb);
2010 I915_WRITE_FW(DSPARB2, dsparb2);
2013 dsparb3 = I915_READ_FW(DSPARB3);
2014 dsparb2 = I915_READ_FW(DSPARB2);
2016 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2017 VLV_FIFO(SPRITEF, 0xff));
2018 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2019 VLV_FIFO(SPRITEF, sprite1_start));
2021 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2022 VLV_FIFO(SPRITEF_HI, 0xff));
2023 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2024 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2026 I915_WRITE_FW(DSPARB3, dsparb3);
2027 I915_WRITE_FW(DSPARB2, dsparb2);
2033 POSTING_READ_FW(DSPARB);
2035 spin_unlock(&dev_priv->uncore.lock);
2040 static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
2042 struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
2043 struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2044 const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2045 struct intel_atomic_state *intel_state =
2046 to_intel_atomic_state(new_crtc_state->base.state);
2047 const struct intel_crtc_state *old_crtc_state =
2048 intel_atomic_get_old_crtc_state(intel_state, crtc);
2049 const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
2052 if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
2053 *intermediate = *optimal;
2055 intermediate->cxsr = false;
2059 intermediate->num_levels = min(optimal->num_levels, active->num_levels);
2060 intermediate->cxsr = optimal->cxsr && active->cxsr &&
2061 !new_crtc_state->disable_cxsr;
2063 for (level = 0; level < intermediate->num_levels; level++) {
2064 enum plane_id plane_id;
2066 for_each_plane_id_on_crtc(crtc, plane_id) {
2067 intermediate->wm[level].plane[plane_id] =
2068 min(optimal->wm[level].plane[plane_id],
2069 active->wm[level].plane[plane_id]);
2072 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2073 active->sr[level].plane);
2074 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2075 active->sr[level].cursor);
2078 vlv_invalidate_wms(crtc, intermediate, level);
2082 * If our intermediate WM are identical to the final WM, then we can
2083 * omit the post-vblank programming; only update if it's different.
2085 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
2086 new_crtc_state->wm.need_postvbl_update = true;
2091 static void vlv_merge_wm(struct drm_i915_private *dev_priv,
2092 struct vlv_wm_values *wm)
2094 struct intel_crtc *crtc;
2095 int num_active_crtcs = 0;
2097 wm->level = dev_priv->wm.max_level;
2100 for_each_intel_crtc(&dev_priv->drm, crtc) {
2101 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2106 if (!wm_state->cxsr)
2110 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2113 if (num_active_crtcs != 1)
2116 if (num_active_crtcs > 1)
2117 wm->level = VLV_WM_LEVEL_PM2;
2119 for_each_intel_crtc(&dev_priv->drm, crtc) {
2120 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2121 enum pipe pipe = crtc->pipe;
2123 wm->pipe[pipe] = wm_state->wm[wm->level];
2124 if (crtc->active && wm->cxsr)
2125 wm->sr = wm_state->sr[wm->level];
2127 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2128 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2129 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2130 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
2134 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
2136 struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2137 struct vlv_wm_values new_wm = {};
2139 vlv_merge_wm(dev_priv, &new_wm);
2141 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
2144 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2145 chv_set_memory_dvfs(dev_priv, false);
2147 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2148 chv_set_memory_pm5(dev_priv, false);
2150 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
2151 _intel_set_memory_cxsr(dev_priv, false);
2153 vlv_write_wm_values(dev_priv, &new_wm);
2155 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
2156 _intel_set_memory_cxsr(dev_priv, true);
2158 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2159 chv_set_memory_pm5(dev_priv, true);
2161 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2162 chv_set_memory_dvfs(dev_priv, true);
2167 static void vlv_initial_watermarks(struct intel_atomic_state *state,
2168 struct intel_crtc_state *crtc_state)
2170 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2171 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2173 mutex_lock(&dev_priv->wm.wm_mutex);
2174 crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2175 vlv_program_watermarks(dev_priv);
2176 mutex_unlock(&dev_priv->wm.wm_mutex);
2179 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2180 struct intel_crtc_state *crtc_state)
2182 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2183 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2185 if (!crtc_state->wm.need_postvbl_update)
2188 mutex_lock(&dev_priv->wm.wm_mutex);
2189 intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
2190 vlv_program_watermarks(dev_priv);
2191 mutex_unlock(&dev_priv->wm.wm_mutex);
2194 static void i965_update_wm(struct intel_crtc *unused_crtc)
2196 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2197 struct intel_crtc *crtc;
2202 /* Calc sr entries for one plane configs */
2203 crtc = single_enabled_crtc(dev_priv);
2205 /* self-refresh has much higher latency */
2206 static const int sr_latency_ns = 12000;
2207 const struct drm_display_mode *adjusted_mode =
2208 &crtc->config->base.adjusted_mode;
2209 const struct drm_framebuffer *fb =
2210 crtc->base.primary->state->fb;
2211 int clock = adjusted_mode->crtc_clock;
2212 int htotal = adjusted_mode->crtc_htotal;
2213 int hdisplay = crtc->config->pipe_src_w;
2214 int cpp = fb->format->cpp[0];
2217 entries = intel_wm_method2(clock, htotal,
2218 hdisplay, cpp, sr_latency_ns / 100);
2219 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2220 srwm = I965_FIFO_SIZE - entries;
2224 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2227 entries = intel_wm_method2(clock, htotal,
2228 crtc->base.cursor->state->crtc_w, 4,
2229 sr_latency_ns / 100);
2230 entries = DIV_ROUND_UP(entries,
2231 i965_cursor_wm_info.cacheline_size) +
2232 i965_cursor_wm_info.guard_size;
2234 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
2235 if (cursor_sr > i965_cursor_wm_info.max_wm)
2236 cursor_sr = i965_cursor_wm_info.max_wm;
2238 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2239 "cursor %d\n", srwm, cursor_sr);
2241 cxsr_enabled = true;
2243 cxsr_enabled = false;
2244 /* Turn off self refresh if both pipes are enabled */
2245 intel_set_memory_cxsr(dev_priv, false);
2248 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2251 /* 965 has limitations... */
2252 I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2256 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2257 FW_WM(8, PLANEC_OLD));
2258 /* update cursor SR watermark */
2259 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
2262 intel_set_memory_cxsr(dev_priv, true);
2267 static void i9xx_update_wm(struct intel_crtc *unused_crtc)
2269 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2270 const struct intel_watermark_params *wm_info;
2275 int planea_wm, planeb_wm;
2276 struct intel_crtc *crtc, *enabled = NULL;
2278 if (IS_I945GM(dev_priv))
2279 wm_info = &i945_wm_info;
2280 else if (!IS_GEN(dev_priv, 2))
2281 wm_info = &i915_wm_info;
2283 wm_info = &i830_a_wm_info;
2285 fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2286 crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
2287 if (intel_crtc_active(crtc)) {
2288 const struct drm_display_mode *adjusted_mode =
2289 &crtc->config->base.adjusted_mode;
2290 const struct drm_framebuffer *fb =
2291 crtc->base.primary->state->fb;
2294 if (IS_GEN(dev_priv, 2))
2297 cpp = fb->format->cpp[0];
2299 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2300 wm_info, fifo_size, cpp,
2301 pessimal_latency_ns);
2304 planea_wm = fifo_size - wm_info->guard_size;
2305 if (planea_wm > (long)wm_info->max_wm)
2306 planea_wm = wm_info->max_wm;
2309 if (IS_GEN(dev_priv, 2))
2310 wm_info = &i830_bc_wm_info;
2312 fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2313 crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
2314 if (intel_crtc_active(crtc)) {
2315 const struct drm_display_mode *adjusted_mode =
2316 &crtc->config->base.adjusted_mode;
2317 const struct drm_framebuffer *fb =
2318 crtc->base.primary->state->fb;
2321 if (IS_GEN(dev_priv, 2))
2324 cpp = fb->format->cpp[0];
2326 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2327 wm_info, fifo_size, cpp,
2328 pessimal_latency_ns);
2329 if (enabled == NULL)
2334 planeb_wm = fifo_size - wm_info->guard_size;
2335 if (planeb_wm > (long)wm_info->max_wm)
2336 planeb_wm = wm_info->max_wm;
2339 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2341 if (IS_I915GM(dev_priv) && enabled) {
2342 struct drm_i915_gem_object *obj;
2344 obj = intel_fb_obj(enabled->base.primary->state->fb);
2346 /* self-refresh seems busted with untiled */
2347 if (!i915_gem_object_is_tiled(obj))
2352 * Overlay gets an aggressive default since video jitter is bad.
2356 /* Play safe and disable self-refresh before adjusting watermarks. */
2357 intel_set_memory_cxsr(dev_priv, false);
2359 /* Calc sr entries for one plane configs */
2360 if (HAS_FW_BLC(dev_priv) && enabled) {
2361 /* self-refresh has much higher latency */
2362 static const int sr_latency_ns = 6000;
2363 const struct drm_display_mode *adjusted_mode =
2364 &enabled->config->base.adjusted_mode;
2365 const struct drm_framebuffer *fb =
2366 enabled->base.primary->state->fb;
2367 int clock = adjusted_mode->crtc_clock;
2368 int htotal = adjusted_mode->crtc_htotal;
2369 int hdisplay = enabled->config->pipe_src_w;
2373 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2376 cpp = fb->format->cpp[0];
2378 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2379 sr_latency_ns / 100);
2380 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2381 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2382 srwm = wm_info->fifo_size - entries;
2386 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
2387 I915_WRITE(FW_BLC_SELF,
2388 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2390 I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2393 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2394 planea_wm, planeb_wm, cwm, srwm);
2396 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2397 fwater_hi = (cwm & 0x1f);
2399 /* Set request length to 8 cachelines per fetch */
2400 fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2401 fwater_hi = fwater_hi | (1 << 8);
2403 I915_WRITE(FW_BLC, fwater_lo);
2404 I915_WRITE(FW_BLC2, fwater_hi);
2407 intel_set_memory_cxsr(dev_priv, true);
2410 static void i845_update_wm(struct intel_crtc *unused_crtc)
2412 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2413 struct intel_crtc *crtc;
2414 const struct drm_display_mode *adjusted_mode;
2418 crtc = single_enabled_crtc(dev_priv);
2422 adjusted_mode = &crtc->config->base.adjusted_mode;
2423 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2425 dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
2426 4, pessimal_latency_ns);
2427 fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2428 fwater_lo |= (3<<8) | planea_wm;
2430 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2432 I915_WRITE(FW_BLC, fwater_lo);
2435 /* latency must be in 0.1us units. */
2436 static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2438 unsigned int latency)
2442 ret = intel_wm_method1(pixel_rate, cpp, latency);
2443 ret = DIV_ROUND_UP(ret, 64) + 2;
2448 /* latency must be in 0.1us units. */
2449 static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2450 unsigned int htotal,
2453 unsigned int latency)
2457 ret = intel_wm_method2(pixel_rate, htotal,
2458 width, cpp, latency);
2459 ret = DIV_ROUND_UP(ret, 64) + 2;
2464 static u32 ilk_wm_fbc(u32 pri_val, u32 horiz_pixels, u8 cpp)
2467 * Neither of these should be possible since this function shouldn't be
2468 * called if the CRTC is off or the plane is invisible. But let's be
2469 * extra paranoid to avoid a potential divide-by-zero if we screw up
2470 * elsewhere in the driver.
2474 if (WARN_ON(!horiz_pixels))
2477 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2480 struct ilk_wm_maximums {
2488 * For both WM_PIPE and WM_LP.
2489 * mem_value must be in 0.1us units.
2491 static u32 ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
2492 const struct intel_plane_state *pstate,
2493 u32 mem_value, bool is_lp)
2495 u32 method1, method2;
2501 if (!intel_wm_plane_visible(cstate, pstate))
2504 cpp = pstate->base.fb->format->cpp[0];
2506 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2511 method2 = ilk_wm_method2(cstate->pixel_rate,
2512 cstate->base.adjusted_mode.crtc_htotal,
2513 drm_rect_width(&pstate->base.dst),
2516 return min(method1, method2);
2520 * For both WM_PIPE and WM_LP.
2521 * mem_value must be in 0.1us units.
2523 static u32 ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
2524 const struct intel_plane_state *pstate,
2527 u32 method1, method2;
2533 if (!intel_wm_plane_visible(cstate, pstate))
2536 cpp = pstate->base.fb->format->cpp[0];
2538 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2539 method2 = ilk_wm_method2(cstate->pixel_rate,
2540 cstate->base.adjusted_mode.crtc_htotal,
2541 drm_rect_width(&pstate->base.dst),
2543 return min(method1, method2);
2547 * For both WM_PIPE and WM_LP.
2548 * mem_value must be in 0.1us units.
2550 static u32 ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
2551 const struct intel_plane_state *pstate,
2559 if (!intel_wm_plane_visible(cstate, pstate))
2562 cpp = pstate->base.fb->format->cpp[0];
2564 return ilk_wm_method2(cstate->pixel_rate,
2565 cstate->base.adjusted_mode.crtc_htotal,
2566 pstate->base.crtc_w, cpp, mem_value);
2569 /* Only for WM_LP. */
2570 static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
2571 const struct intel_plane_state *pstate,
2576 if (!intel_wm_plane_visible(cstate, pstate))
2579 cpp = pstate->base.fb->format->cpp[0];
2581 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
2585 ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2587 if (INTEL_GEN(dev_priv) >= 8)
2589 else if (INTEL_GEN(dev_priv) >= 7)
2596 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2597 int level, bool is_sprite)
2599 if (INTEL_GEN(dev_priv) >= 8)
2600 /* BDW primary/sprite plane watermarks */
2601 return level == 0 ? 255 : 2047;
2602 else if (INTEL_GEN(dev_priv) >= 7)
2603 /* IVB/HSW primary/sprite plane watermarks */
2604 return level == 0 ? 127 : 1023;
2605 else if (!is_sprite)
2606 /* ILK/SNB primary plane watermarks */
2607 return level == 0 ? 127 : 511;
2609 /* ILK/SNB sprite plane watermarks */
2610 return level == 0 ? 63 : 255;
2614 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2616 if (INTEL_GEN(dev_priv) >= 7)
2617 return level == 0 ? 63 : 255;
2619 return level == 0 ? 31 : 63;
2622 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2624 if (INTEL_GEN(dev_priv) >= 8)
2630 /* Calculate the maximum primary/sprite plane watermark */
2631 static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
2633 const struct intel_wm_config *config,
2634 enum intel_ddb_partitioning ddb_partitioning,
2637 unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2639 /* if sprites aren't enabled, sprites get nothing */
2640 if (is_sprite && !config->sprites_enabled)
2643 /* HSW allows LP1+ watermarks even with multiple pipes */
2644 if (level == 0 || config->num_pipes_active > 1) {
2645 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
2648 * For some reason the non self refresh
2649 * FIFO size is only half of the self
2650 * refresh FIFO size on ILK/SNB.
2652 if (INTEL_GEN(dev_priv) <= 6)
2656 if (config->sprites_enabled) {
2657 /* level 0 is always calculated with 1:1 split */
2658 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2667 /* clamp to max that the registers can hold */
2668 return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2671 /* Calculate the maximum cursor plane watermark */
2672 static unsigned int ilk_cursor_wm_max(const struct drm_i915_private *dev_priv,
2674 const struct intel_wm_config *config)
2676 /* HSW LP1+ watermarks w/ multiple pipes */
2677 if (level > 0 && config->num_pipes_active > 1)
2680 /* otherwise just report max that registers can hold */
2681 return ilk_cursor_wm_reg_max(dev_priv, level);
2684 static void ilk_compute_wm_maximums(const struct drm_i915_private *dev_priv,
2686 const struct intel_wm_config *config,
2687 enum intel_ddb_partitioning ddb_partitioning,
2688 struct ilk_wm_maximums *max)
2690 max->pri = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, false);
2691 max->spr = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, true);
2692 max->cur = ilk_cursor_wm_max(dev_priv, level, config);
2693 max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2696 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2698 struct ilk_wm_maximums *max)
2700 max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2701 max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2702 max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2703 max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2706 static bool ilk_validate_wm_level(int level,
2707 const struct ilk_wm_maximums *max,
2708 struct intel_wm_level *result)
2712 /* already determined to be invalid? */
2713 if (!result->enable)
2716 result->enable = result->pri_val <= max->pri &&
2717 result->spr_val <= max->spr &&
2718 result->cur_val <= max->cur;
2720 ret = result->enable;
2723 * HACK until we can pre-compute everything,
2724 * and thus fail gracefully if LP0 watermarks
2727 if (level == 0 && !result->enable) {
2728 if (result->pri_val > max->pri)
2729 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2730 level, result->pri_val, max->pri);
2731 if (result->spr_val > max->spr)
2732 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2733 level, result->spr_val, max->spr);
2734 if (result->cur_val > max->cur)
2735 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2736 level, result->cur_val, max->cur);
2738 result->pri_val = min_t(u32, result->pri_val, max->pri);
2739 result->spr_val = min_t(u32, result->spr_val, max->spr);
2740 result->cur_val = min_t(u32, result->cur_val, max->cur);
2741 result->enable = true;
2747 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2748 const struct intel_crtc *intel_crtc,
2750 struct intel_crtc_state *cstate,
2751 const struct intel_plane_state *pristate,
2752 const struct intel_plane_state *sprstate,
2753 const struct intel_plane_state *curstate,
2754 struct intel_wm_level *result)
2756 u16 pri_latency = dev_priv->wm.pri_latency[level];
2757 u16 spr_latency = dev_priv->wm.spr_latency[level];
2758 u16 cur_latency = dev_priv->wm.cur_latency[level];
2760 /* WM1+ latency values stored in 0.5us units */
2768 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2769 pri_latency, level);
2770 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2774 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2777 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2779 result->enable = true;
2783 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
2785 const struct intel_atomic_state *intel_state =
2786 to_intel_atomic_state(cstate->base.state);
2787 const struct drm_display_mode *adjusted_mode =
2788 &cstate->base.adjusted_mode;
2789 u32 linetime, ips_linetime;
2791 if (!cstate->base.active)
2793 if (WARN_ON(adjusted_mode->crtc_clock == 0))
2795 if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
2798 /* The WM are computed with base on how long it takes to fill a single
2799 * row at the given clock rate, multiplied by 8.
2801 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2802 adjusted_mode->crtc_clock);
2803 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2804 intel_state->cdclk.logical.cdclk);
2806 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2807 PIPE_WM_LINETIME_TIME(linetime);
2810 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2813 if (INTEL_GEN(dev_priv) >= 9) {
2816 int level, max_level = ilk_wm_max_level(dev_priv);
2818 /* read the first set of memory latencies[0:3] */
2819 val = 0; /* data0 to be programmed to 0 for first set */
2820 mutex_lock(&dev_priv->pcu_lock);
2821 ret = sandybridge_pcode_read(dev_priv,
2822 GEN9_PCODE_READ_MEM_LATENCY,
2824 mutex_unlock(&dev_priv->pcu_lock);
2827 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2831 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2832 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2833 GEN9_MEM_LATENCY_LEVEL_MASK;
2834 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2835 GEN9_MEM_LATENCY_LEVEL_MASK;
2836 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2837 GEN9_MEM_LATENCY_LEVEL_MASK;
2839 /* read the second set of memory latencies[4:7] */
2840 val = 1; /* data0 to be programmed to 1 for second set */
2841 mutex_lock(&dev_priv->pcu_lock);
2842 ret = sandybridge_pcode_read(dev_priv,
2843 GEN9_PCODE_READ_MEM_LATENCY,
2845 mutex_unlock(&dev_priv->pcu_lock);
2847 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2851 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2852 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2853 GEN9_MEM_LATENCY_LEVEL_MASK;
2854 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2855 GEN9_MEM_LATENCY_LEVEL_MASK;
2856 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2857 GEN9_MEM_LATENCY_LEVEL_MASK;
2860 * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2861 * need to be disabled. We make sure to sanitize the values out
2862 * of the punit to satisfy this requirement.
2864 for (level = 1; level <= max_level; level++) {
2865 if (wm[level] == 0) {
2866 for (i = level + 1; i <= max_level; i++)
2873 * WaWmMemoryReadLatency:skl+,glk
2875 * punit doesn't take into account the read latency so we need
2876 * to add 2us to the various latency levels we retrieve from the
2877 * punit when level 0 response data us 0us.
2881 for (level = 1; level <= max_level; level++) {
2889 * WA Level-0 adjustment for 16GB DIMMs: SKL+
2890 * If we could not get dimm info enable this WA to prevent from
2891 * any underrun. If not able to get Dimm info assume 16GB dimm
2892 * to avoid any underrun.
2894 if (dev_priv->dram_info.is_16gb_dimm)
2897 } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2898 u64 sskpd = I915_READ64(MCH_SSKPD);
2900 wm[0] = (sskpd >> 56) & 0xFF;
2902 wm[0] = sskpd & 0xF;
2903 wm[1] = (sskpd >> 4) & 0xFF;
2904 wm[2] = (sskpd >> 12) & 0xFF;
2905 wm[3] = (sskpd >> 20) & 0x1FF;
2906 wm[4] = (sskpd >> 32) & 0x1FF;
2907 } else if (INTEL_GEN(dev_priv) >= 6) {
2908 u32 sskpd = I915_READ(MCH_SSKPD);
2910 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2911 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2912 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2913 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2914 } else if (INTEL_GEN(dev_priv) >= 5) {
2915 u32 mltr = I915_READ(MLTR_ILK);
2917 /* ILK primary LP0 latency is 700 ns */
2919 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2920 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2922 MISSING_CASE(INTEL_DEVID(dev_priv));
2926 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2929 /* ILK sprite LP0 latency is 1300 ns */
2930 if (IS_GEN(dev_priv, 5))
2934 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2937 /* ILK cursor LP0 latency is 1300 ns */
2938 if (IS_GEN(dev_priv, 5))
2942 int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2944 /* how many WM levels are we expecting */
2945 if (INTEL_GEN(dev_priv) >= 9)
2947 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2949 else if (INTEL_GEN(dev_priv) >= 6)
2955 static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2959 int level, max_level = ilk_wm_max_level(dev_priv);
2961 for (level = 0; level <= max_level; level++) {
2962 unsigned int latency = wm[level];
2965 DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2971 * - latencies are in us on gen9.
2972 * - before then, WM1+ latency values are in 0.5us units
2974 if (INTEL_GEN(dev_priv) >= 9)
2979 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2980 name, level, wm[level],
2981 latency / 10, latency % 10);
2985 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2988 int level, max_level = ilk_wm_max_level(dev_priv);
2993 wm[0] = max(wm[0], min);
2994 for (level = 1; level <= max_level; level++)
2995 wm[level] = max_t(u16, wm[level], DIV_ROUND_UP(min, 5));
3000 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
3005 * The BIOS provided WM memory latency values are often
3006 * inadequate for high resolution displays. Adjust them.
3008 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
3009 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
3010 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
3015 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
3016 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3017 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3018 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3021 static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3024 * On some SNB machines (Thinkpad X220 Tablet at least)
3025 * LP3 usage can cause vblank interrupts to be lost.
3026 * The DEIIR bit will go high but it looks like the CPU
3027 * never gets interrupted.
3029 * It's not clear whether other interrupt source could
3030 * be affected or if this is somehow limited to vblank
3031 * interrupts only. To play it safe we disable LP3
3032 * watermarks entirely.
3034 if (dev_priv->wm.pri_latency[3] == 0 &&
3035 dev_priv->wm.spr_latency[3] == 0 &&
3036 dev_priv->wm.cur_latency[3] == 0)
3039 dev_priv->wm.pri_latency[3] = 0;
3040 dev_priv->wm.spr_latency[3] = 0;
3041 dev_priv->wm.cur_latency[3] = 0;
3043 DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3044 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3045 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3046 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3049 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
3051 intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
3053 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3054 sizeof(dev_priv->wm.pri_latency));
3055 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3056 sizeof(dev_priv->wm.pri_latency));
3058 intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
3059 intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
3061 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3062 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3063 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3065 if (IS_GEN(dev_priv, 6)) {
3066 snb_wm_latency_quirk(dev_priv);
3067 snb_wm_lp3_irq_quirk(dev_priv);
3071 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
3073 intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
3074 intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
3077 static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv,
3078 struct intel_pipe_wm *pipe_wm)
3080 /* LP0 watermark maximums depend on this pipe alone */
3081 const struct intel_wm_config config = {
3082 .num_pipes_active = 1,
3083 .sprites_enabled = pipe_wm->sprites_enabled,
3084 .sprites_scaled = pipe_wm->sprites_scaled,
3086 struct ilk_wm_maximums max;
3088 /* LP0 watermarks always use 1/2 DDB partitioning */
3089 ilk_compute_wm_maximums(dev_priv, 0, &config, INTEL_DDB_PART_1_2, &max);
3091 /* At least LP0 must be valid */
3092 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3093 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3100 /* Compute new watermarks for the pipe */
3101 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
3103 struct drm_atomic_state *state = cstate->base.state;
3104 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3105 struct intel_pipe_wm *pipe_wm;
3106 struct drm_device *dev = state->dev;
3107 const struct drm_i915_private *dev_priv = to_i915(dev);
3108 struct drm_plane *plane;
3109 const struct drm_plane_state *plane_state;
3110 const struct intel_plane_state *pristate = NULL;
3111 const struct intel_plane_state *sprstate = NULL;
3112 const struct intel_plane_state *curstate = NULL;
3113 int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
3114 struct ilk_wm_maximums max;
3116 pipe_wm = &cstate->wm.ilk.optimal;
3118 drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) {
3119 const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
3121 if (plane->type == DRM_PLANE_TYPE_PRIMARY)
3123 else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
3125 else if (plane->type == DRM_PLANE_TYPE_CURSOR)
3129 pipe_wm->pipe_enabled = cstate->base.active;
3131 pipe_wm->sprites_enabled = sprstate->base.visible;
3132 pipe_wm->sprites_scaled = sprstate->base.visible &&
3133 (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3134 drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
3137 usable_level = max_level;
3139 /* ILK/SNB: LP2+ watermarks only w/o sprites */
3140 if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
3143 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3144 if (pipe_wm->sprites_scaled)
3147 memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
3148 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
3149 pristate, sprstate, curstate, &pipe_wm->wm[0]);
3151 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3152 pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
3154 if (!ilk_validate_pipe_wm(dev_priv, pipe_wm))
3157 ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
3159 for (level = 1; level <= usable_level; level++) {
3160 struct intel_wm_level *wm = &pipe_wm->wm[level];
3162 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
3163 pristate, sprstate, curstate, wm);
3166 * Disable any watermark level that exceeds the
3167 * register maximums since such watermarks are
3170 if (!ilk_validate_wm_level(level, &max, wm)) {
3171 memset(wm, 0, sizeof(*wm));
3180 * Build a set of 'intermediate' watermark values that satisfy both the old
3181 * state and the new state. These can be programmed to the hardware
3184 static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate)
3186 struct intel_crtc *intel_crtc = to_intel_crtc(newstate->base.crtc);
3187 struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
3188 struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
3189 struct intel_atomic_state *intel_state =
3190 to_intel_atomic_state(newstate->base.state);
3191 const struct intel_crtc_state *oldstate =
3192 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3193 const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
3194 int level, max_level = ilk_wm_max_level(dev_priv);
3197 * Start with the final, target watermarks, then combine with the
3198 * currently active watermarks to get values that are safe both before
3199 * and after the vblank.
3201 *a = newstate->wm.ilk.optimal;
3202 if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base) ||
3203 intel_state->skip_intermediate_wm)
3206 a->pipe_enabled |= b->pipe_enabled;
3207 a->sprites_enabled |= b->sprites_enabled;
3208 a->sprites_scaled |= b->sprites_scaled;
3210 for (level = 0; level <= max_level; level++) {
3211 struct intel_wm_level *a_wm = &a->wm[level];
3212 const struct intel_wm_level *b_wm = &b->wm[level];
3214 a_wm->enable &= b_wm->enable;
3215 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3216 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3217 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3218 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3222 * We need to make sure that these merged watermark values are
3223 * actually a valid configuration themselves. If they're not,
3224 * there's no safe way to transition from the old state to
3225 * the new state, so we need to fail the atomic transaction.
3227 if (!ilk_validate_pipe_wm(dev_priv, a))
3231 * If our intermediate WM are identical to the final WM, then we can
3232 * omit the post-vblank programming; only update if it's different.
3234 if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3235 newstate->wm.need_postvbl_update = true;
3241 * Merge the watermarks from all active pipes for a specific level.
3243 static void ilk_merge_wm_level(struct drm_i915_private *dev_priv,
3245 struct intel_wm_level *ret_wm)
3247 const struct intel_crtc *intel_crtc;
3249 ret_wm->enable = true;
3251 for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
3252 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
3253 const struct intel_wm_level *wm = &active->wm[level];
3255 if (!active->pipe_enabled)
3259 * The watermark values may have been used in the past,
3260 * so we must maintain them in the registers for some
3261 * time even if the level is now disabled.
3264 ret_wm->enable = false;
3266 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3267 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3268 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3269 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3274 * Merge all low power watermarks for all active pipes.
3276 static void ilk_wm_merge(struct drm_i915_private *dev_priv,
3277 const struct intel_wm_config *config,
3278 const struct ilk_wm_maximums *max,
3279 struct intel_pipe_wm *merged)
3281 int level, max_level = ilk_wm_max_level(dev_priv);
3282 int last_enabled_level = max_level;
3284 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3285 if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
3286 config->num_pipes_active > 1)
3287 last_enabled_level = 0;
3289 /* ILK: FBC WM must be disabled always */
3290 merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
3292 /* merge each WM1+ level */
3293 for (level = 1; level <= max_level; level++) {
3294 struct intel_wm_level *wm = &merged->wm[level];
3296 ilk_merge_wm_level(dev_priv, level, wm);
3298 if (level > last_enabled_level)
3300 else if (!ilk_validate_wm_level(level, max, wm))
3301 /* make sure all following levels get disabled */
3302 last_enabled_level = level - 1;
3305 * The spec says it is preferred to disable
3306 * FBC WMs instead of disabling a WM level.
3308 if (wm->fbc_val > max->fbc) {
3310 merged->fbc_wm_enabled = false;
3315 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3317 * FIXME this is racy. FBC might get enabled later.
3318 * What we should check here is whether FBC can be
3319 * enabled sometime later.
3321 if (IS_GEN(dev_priv, 5) && !merged->fbc_wm_enabled &&
3322 intel_fbc_is_active(dev_priv)) {
3323 for (level = 2; level <= max_level; level++) {
3324 struct intel_wm_level *wm = &merged->wm[level];
3331 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3333 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3334 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3337 /* The value we need to program into the WM_LPx latency field */
3338 static unsigned int ilk_wm_lp_latency(struct drm_i915_private *dev_priv,
3341 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3344 return dev_priv->wm.pri_latency[level];
3347 static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
3348 const struct intel_pipe_wm *merged,
3349 enum intel_ddb_partitioning partitioning,
3350 struct ilk_wm_values *results)
3352 struct intel_crtc *intel_crtc;
3355 results->enable_fbc_wm = merged->fbc_wm_enabled;
3356 results->partitioning = partitioning;
3358 /* LP1+ register values */
3359 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3360 const struct intel_wm_level *r;
3362 level = ilk_wm_lp_to_level(wm_lp, merged);
3364 r = &merged->wm[level];
3367 * Maintain the watermark values even if the level is
3368 * disabled. Doing otherwise could cause underruns.
3370 results->wm_lp[wm_lp - 1] =
3371 (ilk_wm_lp_latency(dev_priv, level) << WM1_LP_LATENCY_SHIFT) |
3372 (r->pri_val << WM1_LP_SR_SHIFT) |
3376 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3378 if (INTEL_GEN(dev_priv) >= 8)
3379 results->wm_lp[wm_lp - 1] |=
3380 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3382 results->wm_lp[wm_lp - 1] |=
3383 r->fbc_val << WM1_LP_FBC_SHIFT;
3386 * Always set WM1S_LP_EN when spr_val != 0, even if the
3387 * level is disabled. Doing otherwise could cause underruns.
3389 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
3390 WARN_ON(wm_lp != 1);
3391 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3393 results->wm_lp_spr[wm_lp - 1] = r->spr_val;
3396 /* LP0 register values */
3397 for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
3398 enum pipe pipe = intel_crtc->pipe;
3399 const struct intel_wm_level *r =
3400 &intel_crtc->wm.active.ilk.wm[0];
3402 if (WARN_ON(!r->enable))
3405 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
3407 results->wm_pipe[pipe] =
3408 (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3409 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3414 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
3415 * case both are at the same level. Prefer r1 in case they're the same. */
3416 static struct intel_pipe_wm *
3417 ilk_find_best_result(struct drm_i915_private *dev_priv,
3418 struct intel_pipe_wm *r1,
3419 struct intel_pipe_wm *r2)
3421 int level, max_level = ilk_wm_max_level(dev_priv);
3422 int level1 = 0, level2 = 0;
3424 for (level = 1; level <= max_level; level++) {
3425 if (r1->wm[level].enable)
3427 if (r2->wm[level].enable)
3431 if (level1 == level2) {
3432 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3436 } else if (level1 > level2) {
3443 /* dirty bits used to track which watermarks need changes */
3444 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3445 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3446 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3447 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3448 #define WM_DIRTY_FBC (1 << 24)
3449 #define WM_DIRTY_DDB (1 << 25)
3451 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3452 const struct ilk_wm_values *old,
3453 const struct ilk_wm_values *new)
3455 unsigned int dirty = 0;
3459 for_each_pipe(dev_priv, pipe) {
3460 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3461 dirty |= WM_DIRTY_LINETIME(pipe);
3462 /* Must disable LP1+ watermarks too */
3463 dirty |= WM_DIRTY_LP_ALL;
3466 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3467 dirty |= WM_DIRTY_PIPE(pipe);
3468 /* Must disable LP1+ watermarks too */
3469 dirty |= WM_DIRTY_LP_ALL;
3473 if (old->enable_fbc_wm != new->enable_fbc_wm) {
3474 dirty |= WM_DIRTY_FBC;
3475 /* Must disable LP1+ watermarks too */
3476 dirty |= WM_DIRTY_LP_ALL;
3479 if (old->partitioning != new->partitioning) {
3480 dirty |= WM_DIRTY_DDB;
3481 /* Must disable LP1+ watermarks too */
3482 dirty |= WM_DIRTY_LP_ALL;
3485 /* LP1+ watermarks already deemed dirty, no need to continue */
3486 if (dirty & WM_DIRTY_LP_ALL)
3489 /* Find the lowest numbered LP1+ watermark in need of an update... */
3490 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3491 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3492 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3496 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3497 for (; wm_lp <= 3; wm_lp++)
3498 dirty |= WM_DIRTY_LP(wm_lp);
3503 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3506 struct ilk_wm_values *previous = &dev_priv->wm.hw;
3507 bool changed = false;
3509 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3510 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3511 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3514 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3515 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3516 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3519 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3520 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3521 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3526 * Don't touch WM1S_LP_EN here.
3527 * Doing so could cause underruns.
3534 * The spec says we shouldn't write when we don't need, because every write
3535 * causes WMs to be re-evaluated, expending some power.
3537 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3538 struct ilk_wm_values *results)
3540 struct ilk_wm_values *previous = &dev_priv->wm.hw;
3544 dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3548 _ilk_disable_lp_wm(dev_priv, dirty);
3550 if (dirty & WM_DIRTY_PIPE(PIPE_A))
3551 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3552 if (dirty & WM_DIRTY_PIPE(PIPE_B))
3553 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3554 if (dirty & WM_DIRTY_PIPE(PIPE_C))
3555 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3557 if (dirty & WM_DIRTY_LINETIME(PIPE_A))
3558 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
3559 if (dirty & WM_DIRTY_LINETIME(PIPE_B))
3560 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
3561 if (dirty & WM_DIRTY_LINETIME(PIPE_C))
3562 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3564 if (dirty & WM_DIRTY_DDB) {
3565 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3566 val = I915_READ(WM_MISC);
3567 if (results->partitioning == INTEL_DDB_PART_1_2)
3568 val &= ~WM_MISC_DATA_PARTITION_5_6;
3570 val |= WM_MISC_DATA_PARTITION_5_6;
3571 I915_WRITE(WM_MISC, val);
3573 val = I915_READ(DISP_ARB_CTL2);
3574 if (results->partitioning == INTEL_DDB_PART_1_2)
3575 val &= ~DISP_DATA_PARTITION_5_6;
3577 val |= DISP_DATA_PARTITION_5_6;
3578 I915_WRITE(DISP_ARB_CTL2, val);
3582 if (dirty & WM_DIRTY_FBC) {
3583 val = I915_READ(DISP_ARB_CTL);
3584 if (results->enable_fbc_wm)
3585 val &= ~DISP_FBC_WM_DIS;
3587 val |= DISP_FBC_WM_DIS;
3588 I915_WRITE(DISP_ARB_CTL, val);
3591 if (dirty & WM_DIRTY_LP(1) &&
3592 previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3593 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3595 if (INTEL_GEN(dev_priv) >= 7) {
3596 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3597 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3598 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3599 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3602 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3603 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3604 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3605 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3606 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3607 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3609 dev_priv->wm.hw = *results;
3612 bool ilk_disable_lp_wm(struct drm_device *dev)
3614 struct drm_i915_private *dev_priv = to_i915(dev);
3616 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3619 static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
3623 /* Slice 1 will always be enabled */
3626 /* Gen prior to GEN11 have only one DBuf slice */
3627 if (INTEL_GEN(dev_priv) < 11)
3628 return enabled_slices;
3631 * FIXME: for now we'll only ever use 1 slice; pretend that we have
3632 * only that 1 slice enabled until we have a proper way for on-demand
3633 * toggling of the second slice.
3635 if (0 && I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
3638 return enabled_slices;
3642 * FIXME: We still don't have the proper code detect if we need to apply the WA,
3643 * so assume we'll always need it in order to avoid underruns.
3645 static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
3647 return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv);
3651 intel_has_sagv(struct drm_i915_private *dev_priv)
3653 return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) &&
3654 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;
3658 * SAGV dynamically adjusts the system agent voltage and clock frequencies
3659 * depending on power and performance requirements. The display engine access
3660 * to system memory is blocked during the adjustment time. Because of the
3661 * blocking time, having this enabled can cause full system hangs and/or pipe
3662 * underruns if we don't meet all of the following requirements:
3664 * - <= 1 pipe enabled
3665 * - All planes can enable watermarks for latencies >= SAGV engine block time
3666 * - We're not using an interlaced display configuration
3669 intel_enable_sagv(struct drm_i915_private *dev_priv)
3673 if (!intel_has_sagv(dev_priv))
3676 if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3679 DRM_DEBUG_KMS("Enabling SAGV\n");
3680 mutex_lock(&dev_priv->pcu_lock);
3682 ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3685 /* We don't need to wait for SAGV when enabling */
3686 mutex_unlock(&dev_priv->pcu_lock);
3689 * Some skl systems, pre-release machines in particular,
3690 * don't actually have SAGV.
3692 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3693 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3694 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3696 } else if (ret < 0) {
3697 DRM_ERROR("Failed to enable SAGV\n");
3701 dev_priv->sagv_status = I915_SAGV_ENABLED;
3706 intel_disable_sagv(struct drm_i915_private *dev_priv)
3710 if (!intel_has_sagv(dev_priv))
3713 if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3716 DRM_DEBUG_KMS("Disabling SAGV\n");
3717 mutex_lock(&dev_priv->pcu_lock);
3719 /* bspec says to keep retrying for at least 1 ms */
3720 ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3722 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3724 mutex_unlock(&dev_priv->pcu_lock);
3727 * Some skl systems, pre-release machines in particular,
3728 * don't actually have SAGV.
3730 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3731 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3732 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3734 } else if (ret < 0) {
3735 DRM_ERROR("Failed to disable SAGV (%d)\n", ret);
3739 dev_priv->sagv_status = I915_SAGV_DISABLED;
3743 bool intel_can_enable_sagv(struct drm_atomic_state *state)
3745 struct drm_device *dev = state->dev;
3746 struct drm_i915_private *dev_priv = to_i915(dev);
3747 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3748 struct intel_crtc *crtc;
3749 struct intel_plane *plane;
3750 struct intel_crtc_state *cstate;
3753 int sagv_block_time_us;
3755 if (!intel_has_sagv(dev_priv))
3758 if (IS_GEN(dev_priv, 9))
3759 sagv_block_time_us = 30;
3760 else if (IS_GEN(dev_priv, 10))
3761 sagv_block_time_us = 20;
3763 sagv_block_time_us = 10;
3766 * SKL+ workaround: bspec recommends we disable SAGV when we have
3767 * more then one pipe enabled
3769 * If there are no active CRTCs, no additional checks need be performed
3771 if (hweight32(intel_state->active_crtcs) == 0)
3773 else if (hweight32(intel_state->active_crtcs) > 1)
3776 /* Since we're now guaranteed to only have one active CRTC... */
3777 pipe = ffs(intel_state->active_crtcs) - 1;
3778 crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3779 cstate = to_intel_crtc_state(crtc->base.state);
3781 if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3784 for_each_intel_plane_on_crtc(dev, crtc, plane) {
3785 struct skl_plane_wm *wm =
3786 &cstate->wm.skl.optimal.planes[plane->id];
3788 /* Skip this plane if it's not enabled */
3789 if (!wm->wm[0].plane_en)
3792 /* Find the highest enabled wm level for this plane */
3793 for (level = ilk_wm_max_level(dev_priv);
3794 !wm->wm[level].plane_en; --level)
3797 latency = dev_priv->wm.skl_latency[level];
3799 if (skl_needs_memory_bw_wa(dev_priv) &&
3800 plane->base.state->fb->modifier ==
3801 I915_FORMAT_MOD_X_TILED)
3805 * If any of the planes on this pipe don't enable wm levels that
3806 * incur memory latencies higher than sagv_block_time_us we
3807 * can't enable SAGV.
3809 if (latency < sagv_block_time_us)
3816 static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
3817 const struct intel_crtc_state *cstate,
3818 const u64 total_data_rate,
3819 const int num_active,
3820 struct skl_ddb_allocation *ddb)
3822 const struct drm_display_mode *adjusted_mode;
3824 u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3826 WARN_ON(ddb_size == 0);
3828 if (INTEL_GEN(dev_priv) < 11)
3829 return ddb_size - 4; /* 4 blocks for bypass path allocation */
3831 adjusted_mode = &cstate->base.adjusted_mode;
3832 total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
3835 * 12GB/s is maximum BW supported by single DBuf slice.
3837 * FIXME dbuf slice code is broken:
3838 * - must wait for planes to stop using the slice before powering it off
3839 * - plane straddling both slices is illegal in multi-pipe scenarios
3840 * - should validate we stay within the hw bandwidth limits
3842 if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
3843 ddb->enabled_slices = 2;
3845 ddb->enabled_slices = 1;
3853 skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
3854 const struct intel_crtc_state *cstate,
3855 const u64 total_data_rate,
3856 struct skl_ddb_allocation *ddb,
3857 struct skl_ddb_entry *alloc, /* out */
3858 int *num_active /* out */)
3860 struct drm_atomic_state *state = cstate->base.state;
3861 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3862 struct drm_crtc *for_crtc = cstate->base.crtc;
3863 const struct drm_crtc_state *crtc_state;
3864 const struct drm_crtc *crtc;
3865 u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
3866 enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
3870 if (WARN_ON(!state) || !cstate->base.active) {
3873 *num_active = hweight32(dev_priv->active_crtcs);
3877 if (intel_state->active_pipe_changes)
3878 *num_active = hweight32(intel_state->active_crtcs);
3880 *num_active = hweight32(dev_priv->active_crtcs);
3882 ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
3886 * If the state doesn't change the active CRTC's or there is no
3887 * modeset request, then there's no need to recalculate;
3888 * the existing pipe allocation limits should remain unchanged.
3889 * Note that we're safe from racing commits since any racing commit
3890 * that changes the active CRTC list or do modeset would need to
3891 * grab _all_ crtc locks, including the one we currently hold.
3893 if (!intel_state->active_pipe_changes && !intel_state->modeset) {
3895 * alloc may be cleared by clear_intel_crtc_state,
3896 * copy from old state to be sure
3898 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3903 * Watermark/ddb requirement highly depends upon width of the
3904 * framebuffer, So instead of allocating DDB equally among pipes
3905 * distribute DDB based on resolution/width of the display.
3907 for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
3908 const struct drm_display_mode *adjusted_mode;
3909 int hdisplay, vdisplay;
3912 if (!crtc_state->enable)
3915 pipe = to_intel_crtc(crtc)->pipe;
3916 adjusted_mode = &crtc_state->adjusted_mode;
3917 drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
3918 total_width += hdisplay;
3920 if (pipe < for_pipe)
3921 width_before_pipe += hdisplay;
3922 else if (pipe == for_pipe)
3923 pipe_width = hdisplay;
3926 alloc->start = ddb_size * width_before_pipe / total_width;
3927 alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
3930 static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
3931 int width, const struct drm_format_info *format,
3932 u64 modifier, unsigned int rotation,
3933 u32 plane_pixel_rate, struct skl_wm_params *wp,
3935 static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
3937 const struct skl_wm_params *wp,
3938 const struct skl_wm_level *result_prev,
3939 struct skl_wm_level *result /* out */);
3942 skl_cursor_allocation(const struct intel_crtc_state *crtc_state,
3945 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
3946 int level, max_level = ilk_wm_max_level(dev_priv);
3947 struct skl_wm_level wm = {};
3948 int ret, min_ddb_alloc = 0;
3949 struct skl_wm_params wp;
3951 ret = skl_compute_wm_params(crtc_state, 256,
3952 drm_format_info(DRM_FORMAT_ARGB8888),
3953 DRM_FORMAT_MOD_LINEAR,
3955 crtc_state->pixel_rate, &wp, 0);
3958 for (level = 0; level <= max_level; level++) {
3959 skl_compute_plane_wm(crtc_state, level, &wp, &wm, &wm);
3960 if (wm.min_ddb_alloc == U16_MAX)
3963 min_ddb_alloc = wm.min_ddb_alloc;
3966 return max(num_active == 1 ? 32 : 8, min_ddb_alloc);
3969 static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
3970 struct skl_ddb_entry *entry, u32 reg)
3973 entry->start = reg & DDB_ENTRY_MASK;
3974 entry->end = (reg >> DDB_ENTRY_END_SHIFT) & DDB_ENTRY_MASK;
3981 skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
3982 const enum pipe pipe,
3983 const enum plane_id plane_id,
3984 struct skl_ddb_entry *ddb_y,
3985 struct skl_ddb_entry *ddb_uv)
3990 /* Cursor doesn't support NV12/planar, so no extra calculation needed */
3991 if (plane_id == PLANE_CURSOR) {
3992 val = I915_READ(CUR_BUF_CFG(pipe));
3993 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
3997 val = I915_READ(PLANE_CTL(pipe, plane_id));
3999 /* No DDB allocated for disabled planes */
4000 if (val & PLANE_CTL_ENABLE)
4001 fourcc = skl_format_to_fourcc(val & PLANE_CTL_FORMAT_MASK,
4002 val & PLANE_CTL_ORDER_RGBX,
4003 val & PLANE_CTL_ALPHA_MASK);
4005 if (INTEL_GEN(dev_priv) >= 11) {
4006 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
4007 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4009 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
4010 val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
4012 if (is_planar_yuv_format(fourcc))
4015 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4016 skl_ddb_entry_init_from_hw(dev_priv, ddb_uv, val2);
4020 void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
4021 struct skl_ddb_entry *ddb_y,
4022 struct skl_ddb_entry *ddb_uv)
4024 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4025 enum intel_display_power_domain power_domain;
4026 enum pipe pipe = crtc->pipe;
4027 intel_wakeref_t wakeref;
4028 enum plane_id plane_id;
4030 power_domain = POWER_DOMAIN_PIPE(pipe);
4031 wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
4035 for_each_plane_id_on_crtc(crtc, plane_id)
4036 skl_ddb_get_hw_plane_state(dev_priv, pipe,
4041 intel_display_power_put(dev_priv, power_domain, wakeref);
4044 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
4045 struct skl_ddb_allocation *ddb /* out */)
4047 ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
4051 * Determines the downscale amount of a plane for the purposes of watermark calculations.
4052 * The bspec defines downscale amount as:
4055 * Horizontal down scale amount = maximum[1, Horizontal source size /
4056 * Horizontal destination size]
4057 * Vertical down scale amount = maximum[1, Vertical source size /
4058 * Vertical destination size]
4059 * Total down scale amount = Horizontal down scale amount *
4060 * Vertical down scale amount
4063 * Return value is provided in 16.16 fixed point form to retain fractional part.
4064 * Caller should take care of dividing & rounding off the value.
4066 static uint_fixed_16_16_t
4067 skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
4068 const struct intel_plane_state *pstate)
4070 struct intel_plane *plane = to_intel_plane(pstate->base.plane);
4071 u32 src_w, src_h, dst_w, dst_h;
4072 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4073 uint_fixed_16_16_t downscale_h, downscale_w;
4075 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4076 return u32_to_fixed16(0);
4078 /* n.b., src is 16.16 fixed point, dst is whole integer */
4079 if (plane->id == PLANE_CURSOR) {
4081 * Cursors only support 0/180 degree rotation,
4082 * hence no need to account for rotation here.
4084 src_w = pstate->base.src_w >> 16;
4085 src_h = pstate->base.src_h >> 16;
4086 dst_w = pstate->base.crtc_w;
4087 dst_h = pstate->base.crtc_h;
4090 * Src coordinates are already rotated by 270 degrees for
4091 * the 90/270 degree plane rotation cases (to match the
4092 * GTT mapping), hence no need to account for rotation here.
4094 src_w = drm_rect_width(&pstate->base.src) >> 16;
4095 src_h = drm_rect_height(&pstate->base.src) >> 16;
4096 dst_w = drm_rect_width(&pstate->base.dst);
4097 dst_h = drm_rect_height(&pstate->base.dst);
4100 fp_w_ratio = div_fixed16(src_w, dst_w);
4101 fp_h_ratio = div_fixed16(src_h, dst_h);
4102 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4103 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4105 return mul_fixed16(downscale_w, downscale_h);
4108 static uint_fixed_16_16_t
4109 skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
4111 uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
4113 if (!crtc_state->base.enable)
4114 return pipe_downscale;
4116 if (crtc_state->pch_pfit.enabled) {
4117 u32 src_w, src_h, dst_w, dst_h;
4118 u32 pfit_size = crtc_state->pch_pfit.size;
4119 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4120 uint_fixed_16_16_t downscale_h, downscale_w;
4122 src_w = crtc_state->pipe_src_w;
4123 src_h = crtc_state->pipe_src_h;
4124 dst_w = pfit_size >> 16;
4125 dst_h = pfit_size & 0xffff;
4127 if (!dst_w || !dst_h)
4128 return pipe_downscale;
4130 fp_w_ratio = div_fixed16(src_w, dst_w);
4131 fp_h_ratio = div_fixed16(src_h, dst_h);
4132 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4133 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4135 pipe_downscale = mul_fixed16(downscale_w, downscale_h);
4138 return pipe_downscale;
4141 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
4142 struct intel_crtc_state *cstate)
4144 struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
4145 struct drm_crtc_state *crtc_state = &cstate->base;
4146 struct drm_atomic_state *state = crtc_state->state;
4147 struct drm_plane *plane;
4148 const struct drm_plane_state *pstate;
4149 struct intel_plane_state *intel_pstate;
4150 int crtc_clock, dotclk;
4151 u32 pipe_max_pixel_rate;
4152 uint_fixed_16_16_t pipe_downscale;
4153 uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
4155 if (!cstate->base.enable)
4158 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4159 uint_fixed_16_16_t plane_downscale;
4160 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
4163 if (!intel_wm_plane_visible(cstate,
4164 to_intel_plane_state(pstate)))
4167 if (WARN_ON(!pstate->fb))
4170 intel_pstate = to_intel_plane_state(pstate);
4171 plane_downscale = skl_plane_downscale_amount(cstate,
4173 bpp = pstate->fb->format->cpp[0] * 8;
4175 plane_downscale = mul_fixed16(plane_downscale,
4178 max_downscale = max_fixed16(plane_downscale, max_downscale);
4180 pipe_downscale = skl_pipe_downscale_amount(cstate);
4182 pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
4184 crtc_clock = crtc_state->adjusted_mode.crtc_clock;
4185 dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
4187 if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
4190 pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
4192 if (pipe_max_pixel_rate < crtc_clock) {
4193 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
4201 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
4202 const struct intel_plane_state *intel_pstate,
4205 struct intel_plane *intel_plane =
4206 to_intel_plane(intel_pstate->base.plane);
4208 u32 width = 0, height = 0;
4209 struct drm_framebuffer *fb;
4211 uint_fixed_16_16_t down_scale_amount;
4214 if (!intel_pstate->base.visible)
4217 fb = intel_pstate->base.fb;
4218 format = fb->format->format;
4220 if (intel_plane->id == PLANE_CURSOR)
4222 if (plane == 1 && !is_planar_yuv_format(format))
4226 * Src coordinates are already rotated by 270 degrees for
4227 * the 90/270 degree plane rotation cases (to match the
4228 * GTT mapping), hence no need to account for rotation here.
4230 width = drm_rect_width(&intel_pstate->base.src) >> 16;
4231 height = drm_rect_height(&intel_pstate->base.src) >> 16;
4233 /* UV plane does 1/2 pixel sub-sampling */
4234 if (plane == 1 && is_planar_yuv_format(format)) {
4239 data_rate = width * height;
4241 down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
4243 rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4245 rate *= fb->format->cpp[plane];
4250 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4251 u64 *plane_data_rate,
4252 u64 *uv_plane_data_rate)
4254 struct drm_crtc_state *cstate = &intel_cstate->base;
4255 struct drm_atomic_state *state = cstate->state;
4256 struct drm_plane *plane;
4257 const struct drm_plane_state *pstate;
4258 u64 total_data_rate = 0;
4260 if (WARN_ON(!state))
4263 /* Calculate and cache data rate for each plane */
4264 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4265 enum plane_id plane_id = to_intel_plane(plane)->id;
4267 const struct intel_plane_state *intel_pstate =
4268 to_intel_plane_state(pstate);
4271 rate = skl_plane_relative_data_rate(intel_cstate,
4273 plane_data_rate[plane_id] = rate;
4274 total_data_rate += rate;
4277 rate = skl_plane_relative_data_rate(intel_cstate,
4279 uv_plane_data_rate[plane_id] = rate;
4280 total_data_rate += rate;
4283 return total_data_rate;
4287 icl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4288 u64 *plane_data_rate)
4290 struct drm_crtc_state *cstate = &intel_cstate->base;
4291 struct drm_atomic_state *state = cstate->state;
4292 struct drm_plane *plane;
4293 const struct drm_plane_state *pstate;
4294 u64 total_data_rate = 0;
4296 if (WARN_ON(!state))
4299 /* Calculate and cache data rate for each plane */
4300 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4301 const struct intel_plane_state *intel_pstate =
4302 to_intel_plane_state(pstate);
4303 enum plane_id plane_id = to_intel_plane(plane)->id;
4306 if (!intel_pstate->linked_plane) {
4307 rate = skl_plane_relative_data_rate(intel_cstate,
4309 plane_data_rate[plane_id] = rate;
4310 total_data_rate += rate;
4312 enum plane_id y_plane_id;
4315 * The slave plane might not iterate in
4316 * drm_atomic_crtc_state_for_each_plane_state(),
4317 * and needs the master plane state which may be
4318 * NULL if we try get_new_plane_state(), so we
4319 * always calculate from the master.
4321 if (intel_pstate->slave)
4324 /* Y plane rate is calculated on the slave */
4325 rate = skl_plane_relative_data_rate(intel_cstate,
4327 y_plane_id = intel_pstate->linked_plane->id;
4328 plane_data_rate[y_plane_id] = rate;
4329 total_data_rate += rate;
4331 rate = skl_plane_relative_data_rate(intel_cstate,
4333 plane_data_rate[plane_id] = rate;
4334 total_data_rate += rate;
4338 return total_data_rate;
4342 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
4343 struct skl_ddb_allocation *ddb /* out */)
4345 struct drm_atomic_state *state = cstate->base.state;
4346 struct drm_crtc *crtc = cstate->base.crtc;
4347 struct drm_i915_private *dev_priv = to_i915(crtc->dev);
4348 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4349 struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
4350 u16 alloc_size, start = 0;
4351 u16 total[I915_MAX_PLANES] = {};
4352 u16 uv_total[I915_MAX_PLANES] = {};
4353 u64 total_data_rate;
4354 enum plane_id plane_id;
4356 u64 plane_data_rate[I915_MAX_PLANES] = {};
4357 u64 uv_plane_data_rate[I915_MAX_PLANES] = {};
4361 /* Clear the partitioning for disabled planes. */
4362 memset(cstate->wm.skl.plane_ddb_y, 0, sizeof(cstate->wm.skl.plane_ddb_y));
4363 memset(cstate->wm.skl.plane_ddb_uv, 0, sizeof(cstate->wm.skl.plane_ddb_uv));
4365 if (WARN_ON(!state))
4368 if (!cstate->base.active) {
4369 alloc->start = alloc->end = 0;
4373 if (INTEL_GEN(dev_priv) < 11)
4375 skl_get_total_relative_data_rate(cstate,
4377 uv_plane_data_rate);
4380 icl_get_total_relative_data_rate(cstate,
4383 skl_ddb_get_pipe_allocation_limits(dev_priv, cstate, total_data_rate,
4384 ddb, alloc, &num_active);
4385 alloc_size = skl_ddb_entry_size(alloc);
4386 if (alloc_size == 0)
4389 /* Allocate fixed number of blocks for cursor. */
4390 total[PLANE_CURSOR] = skl_cursor_allocation(cstate, num_active);
4391 alloc_size -= total[PLANE_CURSOR];
4392 cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].start =
4393 alloc->end - total[PLANE_CURSOR];
4394 cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end;
4396 if (total_data_rate == 0)
4400 * Find the highest watermark level for which we can satisfy the block
4401 * requirement of active planes.
4403 for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) {
4405 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4406 const struct skl_plane_wm *wm =
4407 &cstate->wm.skl.optimal.planes[plane_id];
4409 if (plane_id == PLANE_CURSOR) {
4410 if (WARN_ON(wm->wm[level].min_ddb_alloc >
4411 total[PLANE_CURSOR])) {
4418 blocks += wm->wm[level].min_ddb_alloc;
4419 blocks += wm->uv_wm[level].min_ddb_alloc;
4422 if (blocks <= alloc_size) {
4423 alloc_size -= blocks;
4429 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4430 DRM_DEBUG_KMS("minimum required %d/%d\n", blocks,
4436 * Grant each plane the blocks it requires at the highest achievable
4437 * watermark level, plus an extra share of the leftover blocks
4438 * proportional to its relative data rate.
4440 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4441 const struct skl_plane_wm *wm =
4442 &cstate->wm.skl.optimal.planes[plane_id];
4446 if (plane_id == PLANE_CURSOR)
4450 * We've accounted for all active planes; remaining planes are
4453 if (total_data_rate == 0)
4456 rate = plane_data_rate[plane_id];
4457 extra = min_t(u16, alloc_size,
4458 DIV64_U64_ROUND_UP(alloc_size * rate,
4460 total[plane_id] = wm->wm[level].min_ddb_alloc + extra;
4461 alloc_size -= extra;
4462 total_data_rate -= rate;
4464 if (total_data_rate == 0)
4467 rate = uv_plane_data_rate[plane_id];
4468 extra = min_t(u16, alloc_size,
4469 DIV64_U64_ROUND_UP(alloc_size * rate,
4471 uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;
4472 alloc_size -= extra;
4473 total_data_rate -= rate;
4475 WARN_ON(alloc_size != 0 || total_data_rate != 0);
4477 /* Set the actual DDB start/end points for each plane */
4478 start = alloc->start;
4479 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4480 struct skl_ddb_entry *plane_alloc =
4481 &cstate->wm.skl.plane_ddb_y[plane_id];
4482 struct skl_ddb_entry *uv_plane_alloc =
4483 &cstate->wm.skl.plane_ddb_uv[plane_id];
4485 if (plane_id == PLANE_CURSOR)
4488 /* Gen11+ uses a separate plane for UV watermarks */
4489 WARN_ON(INTEL_GEN(dev_priv) >= 11 && uv_total[plane_id]);
4491 /* Leave disabled planes at (0,0) */
4492 if (total[plane_id]) {
4493 plane_alloc->start = start;
4494 start += total[plane_id];
4495 plane_alloc->end = start;
4498 if (uv_total[plane_id]) {
4499 uv_plane_alloc->start = start;
4500 start += uv_total[plane_id];
4501 uv_plane_alloc->end = start;
4506 * When we calculated watermark values we didn't know how high
4507 * of a level we'd actually be able to hit, so we just marked
4508 * all levels as "enabled." Go back now and disable the ones
4509 * that aren't actually possible.
4511 for (level++; level <= ilk_wm_max_level(dev_priv); level++) {
4512 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4513 struct skl_plane_wm *wm =
4514 &cstate->wm.skl.optimal.planes[plane_id];
4517 * We only disable the watermarks for each plane if
4518 * they exceed the ddb allocation of said plane. This
4519 * is done so that we don't end up touching cursor
4520 * watermarks needlessly when some other plane reduces
4521 * our max possible watermark level.
4523 * Bspec has this to say about the PLANE_WM enable bit:
4524 * "All the watermarks at this level for all enabled
4525 * planes must be enabled before the level will be used."
4526 * So this is actually safe to do.
4528 if (wm->wm[level].min_ddb_alloc > total[plane_id] ||
4529 wm->uv_wm[level].min_ddb_alloc > uv_total[plane_id])
4530 memset(&wm->wm[level], 0, sizeof(wm->wm[level]));
4533 * Wa_1408961008:icl, ehl
4534 * Underruns with WM1+ disabled
4536 if (IS_GEN(dev_priv, 11) &&
4537 level == 1 && wm->wm[0].plane_en) {
4538 wm->wm[level].plane_res_b = wm->wm[0].plane_res_b;
4539 wm->wm[level].plane_res_l = wm->wm[0].plane_res_l;
4540 wm->wm[level].ignore_lines = wm->wm[0].ignore_lines;
4546 * Go back and disable the transition watermark if it turns out we
4547 * don't have enough DDB blocks for it.
4549 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4550 struct skl_plane_wm *wm =
4551 &cstate->wm.skl.optimal.planes[plane_id];
4553 if (wm->trans_wm.plane_res_b >= total[plane_id])
4554 memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));
4561 * The max latency should be 257 (max the punit can code is 255 and we add 2us
4562 * for the read latency) and cpp should always be <= 8, so that
4563 * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4564 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4566 static uint_fixed_16_16_t
4567 skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate,
4568 u8 cpp, u32 latency, u32 dbuf_block_size)
4570 u32 wm_intermediate_val;
4571 uint_fixed_16_16_t ret;
4574 return FP_16_16_MAX;
4576 wm_intermediate_val = latency * pixel_rate * cpp;
4577 ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
4579 if (INTEL_GEN(dev_priv) >= 10)
4580 ret = add_fixed16_u32(ret, 1);
4585 static uint_fixed_16_16_t
4586 skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency,
4587 uint_fixed_16_16_t plane_blocks_per_line)
4589 u32 wm_intermediate_val;
4590 uint_fixed_16_16_t ret;
4593 return FP_16_16_MAX;
4595 wm_intermediate_val = latency * pixel_rate;
4596 wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4597 pipe_htotal * 1000);
4598 ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
4602 static uint_fixed_16_16_t
4603 intel_get_linetime_us(const struct intel_crtc_state *cstate)
4607 uint_fixed_16_16_t linetime_us;
4609 if (!cstate->base.active)
4610 return u32_to_fixed16(0);
4612 pixel_rate = cstate->pixel_rate;
4614 if (WARN_ON(pixel_rate == 0))
4615 return u32_to_fixed16(0);
4617 crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
4618 linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
4624 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4625 const struct intel_plane_state *pstate)
4627 u64 adjusted_pixel_rate;
4628 uint_fixed_16_16_t downscale_amount;
4630 /* Shouldn't reach here on disabled planes... */
4631 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4635 * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4636 * with additional adjustments for plane-specific scaling.
4638 adjusted_pixel_rate = cstate->pixel_rate;
4639 downscale_amount = skl_plane_downscale_amount(cstate, pstate);
4641 return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4646 skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
4647 int width, const struct drm_format_info *format,
4648 u64 modifier, unsigned int rotation,
4649 u32 plane_pixel_rate, struct skl_wm_params *wp,
4652 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
4653 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4656 /* only planar format has two planes */
4657 if (color_plane == 1 && !is_planar_yuv_format(format->format)) {
4658 DRM_DEBUG_KMS("Non planar format have single plane\n");
4662 wp->y_tiled = modifier == I915_FORMAT_MOD_Y_TILED ||
4663 modifier == I915_FORMAT_MOD_Yf_TILED ||
4664 modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4665 modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4666 wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED;
4667 wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4668 modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4669 wp->is_planar = is_planar_yuv_format(format->format);
4672 if (color_plane == 1 && wp->is_planar)
4675 wp->cpp = format->cpp[color_plane];
4676 wp->plane_pixel_rate = plane_pixel_rate;
4678 if (INTEL_GEN(dev_priv) >= 11 &&
4679 modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 1)
4680 wp->dbuf_block_size = 256;
4682 wp->dbuf_block_size = 512;
4684 if (drm_rotation_90_or_270(rotation)) {
4687 wp->y_min_scanlines = 16;
4690 wp->y_min_scanlines = 8;
4693 wp->y_min_scanlines = 4;
4696 MISSING_CASE(wp->cpp);
4700 wp->y_min_scanlines = 4;
4703 if (skl_needs_memory_bw_wa(dev_priv))
4704 wp->y_min_scanlines *= 2;
4706 wp->plane_bytes_per_line = wp->width * wp->cpp;
4708 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4709 wp->y_min_scanlines,
4710 wp->dbuf_block_size);
4712 if (INTEL_GEN(dev_priv) >= 10)
4715 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4716 wp->y_min_scanlines);
4717 } else if (wp->x_tiled && IS_GEN(dev_priv, 9)) {
4718 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4719 wp->dbuf_block_size);
4720 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4722 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4723 wp->dbuf_block_size) + 1;
4724 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4727 wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4728 wp->plane_blocks_per_line);
4730 wp->linetime_us = fixed16_to_u32_round_up(
4731 intel_get_linetime_us(crtc_state));
4737 skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state,
4738 const struct intel_plane_state *plane_state,
4739 struct skl_wm_params *wp, int color_plane)
4741 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
4742 const struct drm_framebuffer *fb = plane_state->base.fb;
4745 if (plane->id == PLANE_CURSOR) {
4746 width = plane_state->base.crtc_w;
4749 * Src coordinates are already rotated by 270 degrees for
4750 * the 90/270 degree plane rotation cases (to match the
4751 * GTT mapping), hence no need to account for rotation here.
4753 width = drm_rect_width(&plane_state->base.src) >> 16;
4756 return skl_compute_wm_params(crtc_state, width,
4757 fb->format, fb->modifier,
4758 plane_state->base.rotation,
4759 skl_adjusted_plane_pixel_rate(crtc_state, plane_state),
4763 static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
4765 if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
4768 /* The number of lines are ignored for the level 0 watermark. */
4772 static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
4774 const struct skl_wm_params *wp,
4775 const struct skl_wm_level *result_prev,
4776 struct skl_wm_level *result /* out */)
4778 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
4779 u32 latency = dev_priv->wm.skl_latency[level];
4780 uint_fixed_16_16_t method1, method2;
4781 uint_fixed_16_16_t selected_result;
4782 u32 res_blocks, res_lines, min_ddb_alloc = 0;
4786 result->min_ddb_alloc = U16_MAX;
4790 /* Display WA #1141: kbl,cfl */
4791 if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
4792 IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
4793 dev_priv->ipc_enabled)
4796 if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled)
4799 method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4800 wp->cpp, latency, wp->dbuf_block_size);
4801 method2 = skl_wm_method2(wp->plane_pixel_rate,
4802 cstate->base.adjusted_mode.crtc_htotal,
4804 wp->plane_blocks_per_line);
4807 selected_result = max_fixed16(method2, wp->y_tile_minimum);
4809 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
4810 wp->dbuf_block_size < 1) &&
4811 (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) {
4812 selected_result = method2;
4813 } else if (latency >= wp->linetime_us) {
4814 if (IS_GEN(dev_priv, 9) &&
4815 !IS_GEMINILAKE(dev_priv))
4816 selected_result = min_fixed16(method1, method2);
4818 selected_result = method2;
4820 selected_result = method1;
4824 res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
4825 res_lines = div_round_up_fixed16(selected_result,
4826 wp->plane_blocks_per_line);
4828 if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) {
4829 /* Display WA #1125: skl,bxt,kbl */
4830 if (level == 0 && wp->rc_surface)
4832 fixed16_to_u32_round_up(wp->y_tile_minimum);
4834 /* Display WA #1126: skl,bxt,kbl */
4835 if (level >= 1 && level <= 7) {
4838 fixed16_to_u32_round_up(wp->y_tile_minimum);
4839 res_lines += wp->y_min_scanlines;
4845 * Make sure result blocks for higher latency levels are
4846 * atleast as high as level below the current level.
4847 * Assumption in DDB algorithm optimization for special
4848 * cases. Also covers Display WA #1125 for RC.
4850 if (result_prev->plane_res_b > res_blocks)
4851 res_blocks = result_prev->plane_res_b;
4855 if (INTEL_GEN(dev_priv) >= 11) {
4859 if (res_lines % wp->y_min_scanlines == 0)
4860 extra_lines = wp->y_min_scanlines;
4862 extra_lines = wp->y_min_scanlines * 2 -
4863 res_lines % wp->y_min_scanlines;
4865 min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines,
4866 wp->plane_blocks_per_line);
4868 min_ddb_alloc = res_blocks +
4869 DIV_ROUND_UP(res_blocks, 10);
4873 if (!skl_wm_has_lines(dev_priv, level))
4876 if (res_lines > 31) {
4878 result->min_ddb_alloc = U16_MAX;
4883 * If res_lines is valid, assume we can use this watermark level
4884 * for now. We'll come back and disable it after we calculate the
4885 * DDB allocation if it turns out we don't actually have enough
4886 * blocks to satisfy it.
4888 result->plane_res_b = res_blocks;
4889 result->plane_res_l = res_lines;
4890 /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */
4891 result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1;
4892 result->plane_en = true;
4896 skl_compute_wm_levels(const struct intel_crtc_state *cstate,
4897 const struct skl_wm_params *wm_params,
4898 struct skl_wm_level *levels)
4900 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
4901 int level, max_level = ilk_wm_max_level(dev_priv);
4902 struct skl_wm_level *result_prev = &levels[0];
4904 for (level = 0; level <= max_level; level++) {
4905 struct skl_wm_level *result = &levels[level];
4907 skl_compute_plane_wm(cstate, level, wm_params,
4908 result_prev, result);
4910 result_prev = result;
4915 skl_compute_linetime_wm(const struct intel_crtc_state *cstate)
4917 struct drm_atomic_state *state = cstate->base.state;
4918 struct drm_i915_private *dev_priv = to_i915(state->dev);
4919 uint_fixed_16_16_t linetime_us;
4922 linetime_us = intel_get_linetime_us(cstate);
4923 linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
4925 /* Display WA #1135: BXT:ALL GLK:ALL */
4926 if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled)
4932 static void skl_compute_transition_wm(const struct intel_crtc_state *cstate,
4933 const struct skl_wm_params *wp,
4934 struct skl_plane_wm *wm)
4936 struct drm_device *dev = cstate->base.crtc->dev;
4937 const struct drm_i915_private *dev_priv = to_i915(dev);
4938 u16 trans_min, trans_y_tile_min;
4939 const u16 trans_amount = 10; /* This is configurable amount */
4940 u16 wm0_sel_res_b, trans_offset_b, res_blocks;
4942 /* Transition WM are not recommended by HW team for GEN9 */
4943 if (INTEL_GEN(dev_priv) <= 9)
4946 /* Transition WM don't make any sense if ipc is disabled */
4947 if (!dev_priv->ipc_enabled)
4951 if (INTEL_GEN(dev_priv) >= 11)
4954 trans_offset_b = trans_min + trans_amount;
4957 * The spec asks for Selected Result Blocks for wm0 (the real value),
4958 * not Result Blocks (the integer value). Pay attention to the capital
4959 * letters. The value wm_l0->plane_res_b is actually Result Blocks, but
4960 * since Result Blocks is the ceiling of Selected Result Blocks plus 1,
4961 * and since we later will have to get the ceiling of the sum in the
4962 * transition watermarks calculation, we can just pretend Selected
4963 * Result Blocks is Result Blocks minus 1 and it should work for the
4964 * current platforms.
4966 wm0_sel_res_b = wm->wm[0].plane_res_b - 1;
4970 (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum);
4971 res_blocks = max(wm0_sel_res_b, trans_y_tile_min) +
4974 res_blocks = wm0_sel_res_b + trans_offset_b;
4976 /* WA BUG:1938466 add one block for non y-tile planes */
4977 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4983 * Just assume we can enable the transition watermark. After
4984 * computing the DDB we'll come back and disable it if that
4985 * assumption turns out to be false.
4987 wm->trans_wm.plane_res_b = res_blocks + 1;
4988 wm->trans_wm.plane_en = true;
4991 static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
4992 const struct intel_plane_state *plane_state,
4993 enum plane_id plane_id, int color_plane)
4995 struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
4996 struct skl_wm_params wm_params;
4999 ret = skl_compute_plane_wm_params(crtc_state, plane_state,
5000 &wm_params, color_plane);
5004 skl_compute_wm_levels(crtc_state, &wm_params, wm->wm);
5005 skl_compute_transition_wm(crtc_state, &wm_params, wm);
5010 static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state,
5011 const struct intel_plane_state *plane_state,
5012 enum plane_id plane_id)
5014 struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
5015 struct skl_wm_params wm_params;
5018 wm->is_planar = true;
5020 /* uv plane watermarks must also be validated for NV12/Planar */
5021 ret = skl_compute_plane_wm_params(crtc_state, plane_state,
5026 skl_compute_wm_levels(crtc_state, &wm_params, wm->uv_wm);
5031 static int skl_build_plane_wm(struct intel_crtc_state *crtc_state,
5032 const struct intel_plane_state *plane_state)
5034 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
5035 const struct drm_framebuffer *fb = plane_state->base.fb;
5036 enum plane_id plane_id = plane->id;
5039 if (!intel_wm_plane_visible(crtc_state, plane_state))
5042 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5047 if (fb->format->is_yuv && fb->format->num_planes > 1) {
5048 ret = skl_build_plane_wm_uv(crtc_state, plane_state,
5057 static int icl_build_plane_wm(struct intel_crtc_state *crtc_state,
5058 const struct intel_plane_state *plane_state)
5060 enum plane_id plane_id = to_intel_plane(plane_state->base.plane)->id;
5063 /* Watermarks calculated in master */
5064 if (plane_state->slave)
5067 if (plane_state->linked_plane) {
5068 const struct drm_framebuffer *fb = plane_state->base.fb;
5069 enum plane_id y_plane_id = plane_state->linked_plane->id;
5071 WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state));
5072 WARN_ON(!fb->format->is_yuv ||
5073 fb->format->num_planes == 1);
5075 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5080 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5084 } else if (intel_wm_plane_visible(crtc_state, plane_state)) {
5085 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5094 static int skl_build_pipe_wm(struct intel_crtc_state *cstate)
5096 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5097 struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5098 struct drm_crtc_state *crtc_state = &cstate->base;
5099 struct drm_plane *plane;
5100 const struct drm_plane_state *pstate;
5104 * We'll only calculate watermarks for planes that are actually
5105 * enabled, so make sure all other planes are set as disabled.
5107 memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
5109 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
5110 const struct intel_plane_state *intel_pstate =
5111 to_intel_plane_state(pstate);
5113 if (INTEL_GEN(dev_priv) >= 11)
5114 ret = icl_build_plane_wm(cstate, intel_pstate);
5116 ret = skl_build_plane_wm(cstate, intel_pstate);
5121 pipe_wm->linetime = skl_compute_linetime_wm(cstate);
5126 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
5128 const struct skl_ddb_entry *entry)
5131 I915_WRITE_FW(reg, (entry->end - 1) << 16 | entry->start);
5133 I915_WRITE_FW(reg, 0);
5136 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
5138 const struct skl_wm_level *level)
5142 if (level->plane_en)
5144 if (level->ignore_lines)
5145 val |= PLANE_WM_IGNORE_LINES;
5146 val |= level->plane_res_b;
5147 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
5149 I915_WRITE_FW(reg, val);
5152 void skl_write_plane_wm(struct intel_plane *plane,
5153 const struct intel_crtc_state *crtc_state)
5155 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5156 int level, max_level = ilk_wm_max_level(dev_priv);
5157 enum plane_id plane_id = plane->id;
5158 enum pipe pipe = plane->pipe;
5159 const struct skl_plane_wm *wm =
5160 &crtc_state->wm.skl.optimal.planes[plane_id];
5161 const struct skl_ddb_entry *ddb_y =
5162 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5163 const struct skl_ddb_entry *ddb_uv =
5164 &crtc_state->wm.skl.plane_ddb_uv[plane_id];
5166 for (level = 0; level <= max_level; level++) {
5167 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
5170 skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
5173 if (INTEL_GEN(dev_priv) >= 11) {
5174 skl_ddb_entry_write(dev_priv,
5175 PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5180 swap(ddb_y, ddb_uv);
5182 skl_ddb_entry_write(dev_priv,
5183 PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5184 skl_ddb_entry_write(dev_priv,
5185 PLANE_NV12_BUF_CFG(pipe, plane_id), ddb_uv);
5188 void skl_write_cursor_wm(struct intel_plane *plane,
5189 const struct intel_crtc_state *crtc_state)
5191 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5192 int level, max_level = ilk_wm_max_level(dev_priv);
5193 enum plane_id plane_id = plane->id;
5194 enum pipe pipe = plane->pipe;
5195 const struct skl_plane_wm *wm =
5196 &crtc_state->wm.skl.optimal.planes[plane_id];
5197 const struct skl_ddb_entry *ddb =
5198 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5200 for (level = 0; level <= max_level; level++) {
5201 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
5204 skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
5206 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), ddb);
5209 bool skl_wm_level_equals(const struct skl_wm_level *l1,
5210 const struct skl_wm_level *l2)
5212 return l1->plane_en == l2->plane_en &&
5213 l1->ignore_lines == l2->ignore_lines &&
5214 l1->plane_res_l == l2->plane_res_l &&
5215 l1->plane_res_b == l2->plane_res_b;
5218 static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
5219 const struct skl_plane_wm *wm1,
5220 const struct skl_plane_wm *wm2)
5222 int level, max_level = ilk_wm_max_level(dev_priv);
5224 for (level = 0; level <= max_level; level++) {
5225 if (!skl_wm_level_equals(&wm1->wm[level], &wm2->wm[level]) ||
5226 !skl_wm_level_equals(&wm1->uv_wm[level], &wm2->uv_wm[level]))
5230 return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
5233 static bool skl_pipe_wm_equals(struct intel_crtc *crtc,
5234 const struct skl_pipe_wm *wm1,
5235 const struct skl_pipe_wm *wm2)
5237 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5238 enum plane_id plane_id;
5240 for_each_plane_id_on_crtc(crtc, plane_id) {
5241 if (!skl_plane_wm_equals(dev_priv,
5242 &wm1->planes[plane_id],
5243 &wm2->planes[plane_id]))
5247 return wm1->linetime == wm2->linetime;
5250 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
5251 const struct skl_ddb_entry *b)
5253 return a->start < b->end && b->start < a->end;
5256 bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
5257 const struct skl_ddb_entry *entries,
5258 int num_entries, int ignore_idx)
5262 for (i = 0; i < num_entries; i++) {
5263 if (i != ignore_idx &&
5264 skl_ddb_entries_overlap(ddb, &entries[i]))
5272 pipes_modified(struct intel_atomic_state *state)
5274 struct intel_crtc *crtc;
5275 struct intel_crtc_state *cstate;
5278 for_each_new_intel_crtc_in_state(state, crtc, cstate, i)
5279 ret |= drm_crtc_mask(&crtc->base);
5285 skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state,
5286 struct intel_crtc_state *new_crtc_state)
5288 struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->base.state);
5289 struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
5290 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5291 struct intel_plane *plane;
5293 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5294 struct intel_plane_state *plane_state;
5295 enum plane_id plane_id = plane->id;
5297 if (skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_y[plane_id],
5298 &new_crtc_state->wm.skl.plane_ddb_y[plane_id]) &&
5299 skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_uv[plane_id],
5300 &new_crtc_state->wm.skl.plane_ddb_uv[plane_id]))
5303 plane_state = intel_atomic_get_plane_state(state, plane);
5304 if (IS_ERR(plane_state))
5305 return PTR_ERR(plane_state);
5307 new_crtc_state->update_planes |= BIT(plane_id);
5314 skl_compute_ddb(struct intel_atomic_state *state)
5316 const struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5317 struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
5318 struct intel_crtc_state *old_crtc_state;
5319 struct intel_crtc_state *new_crtc_state;
5320 struct intel_crtc *crtc;
5323 memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5325 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5326 new_crtc_state, i) {
5327 ret = skl_allocate_pipe_ddb(new_crtc_state, ddb);
5331 ret = skl_ddb_add_affected_planes(old_crtc_state,
5340 static char enast(bool enable)
5342 return enable ? '*' : ' ';
5346 skl_print_wm_changes(struct intel_atomic_state *state)
5348 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5349 const struct intel_crtc_state *old_crtc_state;
5350 const struct intel_crtc_state *new_crtc_state;
5351 struct intel_plane *plane;
5352 struct intel_crtc *crtc;
5355 if ((drm_debug & DRM_UT_KMS) == 0)
5358 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5359 new_crtc_state, i) {
5360 const struct skl_pipe_wm *old_pipe_wm, *new_pipe_wm;
5362 old_pipe_wm = &old_crtc_state->wm.skl.optimal;
5363 new_pipe_wm = &new_crtc_state->wm.skl.optimal;
5365 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5366 enum plane_id plane_id = plane->id;
5367 const struct skl_ddb_entry *old, *new;
5369 old = &old_crtc_state->wm.skl.plane_ddb_y[plane_id];
5370 new = &new_crtc_state->wm.skl.plane_ddb_y[plane_id];
5372 if (skl_ddb_entry_equal(old, new))
5375 DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n",
5376 plane->base.base.id, plane->base.name,
5377 old->start, old->end, new->start, new->end,
5378 skl_ddb_entry_size(old), skl_ddb_entry_size(new));
5381 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5382 enum plane_id plane_id = plane->id;
5383 const struct skl_plane_wm *old_wm, *new_wm;
5385 old_wm = &old_pipe_wm->planes[plane_id];
5386 new_wm = &new_pipe_wm->planes[plane_id];
5388 if (skl_plane_wm_equals(dev_priv, old_wm, new_wm))
5391 DRM_DEBUG_KMS("[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
5392 " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
5393 plane->base.base.id, plane->base.name,
5394 enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
5395 enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
5396 enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
5397 enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
5398 enast(old_wm->trans_wm.plane_en),
5399 enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
5400 enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
5401 enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
5402 enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
5403 enast(new_wm->trans_wm.plane_en));
5405 DRM_DEBUG_KMS("[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
5406 " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",
5407 plane->base.base.id, plane->base.name,
5408 enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
5409 enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
5410 enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l,
5411 enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l,
5412 enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l,
5413 enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l,
5414 enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
5415 enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
5416 enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
5418 enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
5419 enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
5420 enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l,
5421 enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l,
5422 enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l,
5423 enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
5424 enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
5425 enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
5426 enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
5428 DRM_DEBUG_KMS("[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5429 " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5430 plane->base.base.id, plane->base.name,
5431 old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
5432 old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
5433 old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
5434 old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
5435 old_wm->trans_wm.plane_res_b,
5436 new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
5437 new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
5438 new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
5439 new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
5440 new_wm->trans_wm.plane_res_b);
5442 DRM_DEBUG_KMS("[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5443 " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5444 plane->base.base.id, plane->base.name,
5445 old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
5446 old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
5447 old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
5448 old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
5449 old_wm->trans_wm.min_ddb_alloc,
5450 new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
5451 new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
5452 new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
5453 new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
5454 new_wm->trans_wm.min_ddb_alloc);
5460 skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
5462 struct drm_device *dev = state->base.dev;
5463 const struct drm_i915_private *dev_priv = to_i915(dev);
5464 struct intel_crtc *crtc;
5465 struct intel_crtc_state *crtc_state;
5466 u32 realloc_pipes = pipes_modified(state);
5470 * When we distrust bios wm we always need to recompute to set the
5471 * expected DDB allocations for each CRTC.
5473 if (dev_priv->wm.distrust_bios_wm)
5477 * If this transaction isn't actually touching any CRTC's, don't
5478 * bother with watermark calculation. Note that if we pass this
5479 * test, we're guaranteed to hold at least one CRTC state mutex,
5480 * which means we can safely use values like dev_priv->active_crtcs
5481 * since any racing commits that want to update them would need to
5482 * hold _all_ CRTC state mutexes.
5484 for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
5491 * If this is our first atomic update following hardware readout,
5492 * we can't trust the DDB that the BIOS programmed for us. Let's
5493 * pretend that all pipes switched active status so that we'll
5494 * ensure a full DDB recompute.
5496 if (dev_priv->wm.distrust_bios_wm) {
5497 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
5498 state->base.acquire_ctx);
5502 state->active_pipe_changes = ~0;
5505 * We usually only initialize state->active_crtcs if we
5506 * we're doing a modeset; make sure this field is always
5507 * initialized during the sanitization process that happens
5508 * on the first commit too.
5510 if (!state->modeset)
5511 state->active_crtcs = dev_priv->active_crtcs;
5515 * If the modeset changes which CRTC's are active, we need to
5516 * recompute the DDB allocation for *all* active pipes, even
5517 * those that weren't otherwise being modified in any way by this
5518 * atomic commit. Due to the shrinking of the per-pipe allocations
5519 * when new active CRTC's are added, it's possible for a pipe that
5520 * we were already using and aren't changing at all here to suddenly
5521 * become invalid if its DDB needs exceeds its new allocation.
5523 * Note that if we wind up doing a full DDB recompute, we can't let
5524 * any other display updates race with this transaction, so we need
5525 * to grab the lock on *all* CRTC's.
5527 if (state->active_pipe_changes || state->modeset) {
5529 state->wm_results.dirty_pipes = ~0;
5533 * We're not recomputing for the pipes not included in the commit, so
5534 * make sure we start with the current state.
5536 for_each_intel_crtc_mask(dev, crtc, realloc_pipes) {
5537 crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
5538 if (IS_ERR(crtc_state))
5539 return PTR_ERR(crtc_state);
5546 * To make sure the cursor watermark registers are always consistent
5547 * with our computed state the following scenario needs special
5551 * 2. move cursor entirely offscreen
5554 * Step 2. does call .disable_plane() but does not zero the watermarks
5555 * (since we consider an offscreen cursor still active for the purposes
5556 * of watermarks). Step 3. would not normally call .disable_plane()
5557 * because the actual plane visibility isn't changing, and we don't
5558 * deallocate the cursor ddb until the pipe gets disabled. So we must
5559 * force step 3. to call .disable_plane() to update the watermark
5560 * registers properly.
5562 * Other planes do not suffer from this issues as their watermarks are
5563 * calculated based on the actual plane visibility. The only time this
5564 * can trigger for the other planes is during the initial readout as the
5565 * default value of the watermarks registers is not zero.
5567 static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
5568 struct intel_crtc *crtc)
5570 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5571 const struct intel_crtc_state *old_crtc_state =
5572 intel_atomic_get_old_crtc_state(state, crtc);
5573 struct intel_crtc_state *new_crtc_state =
5574 intel_atomic_get_new_crtc_state(state, crtc);
5575 struct intel_plane *plane;
5577 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5578 struct intel_plane_state *plane_state;
5579 enum plane_id plane_id = plane->id;
5582 * Force a full wm update for every plane on modeset.
5583 * Required because the reset value of the wm registers
5584 * is non-zero, whereas we want all disabled planes to
5585 * have zero watermarks. So if we turn off the relevant
5586 * power well the hardware state will go out of sync
5587 * with the software state.
5589 if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->base) &&
5590 skl_plane_wm_equals(dev_priv,
5591 &old_crtc_state->wm.skl.optimal.planes[plane_id],
5592 &new_crtc_state->wm.skl.optimal.planes[plane_id]))
5595 plane_state = intel_atomic_get_plane_state(state, plane);
5596 if (IS_ERR(plane_state))
5597 return PTR_ERR(plane_state);
5599 new_crtc_state->update_planes |= BIT(plane_id);
5606 skl_compute_wm(struct intel_atomic_state *state)
5608 struct intel_crtc *crtc;
5609 struct intel_crtc_state *new_crtc_state;
5610 struct intel_crtc_state *old_crtc_state;
5611 struct skl_ddb_values *results = &state->wm_results;
5612 bool changed = false;
5615 /* Clear all dirty flags */
5616 results->dirty_pipes = 0;
5618 ret = skl_ddb_add_affected_pipes(state, &changed);
5619 if (ret || !changed)
5623 * Calculate WM's for all pipes that are part of this transaction.
5624 * Note that skl_ddb_add_affected_pipes may have added more CRTC's that
5625 * weren't otherwise being modified (and set bits in dirty_pipes) if
5626 * pipe allocations had to change.
5628 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5629 new_crtc_state, i) {
5630 ret = skl_build_pipe_wm(new_crtc_state);
5634 ret = skl_wm_add_affected_planes(state, crtc);
5638 if (!skl_pipe_wm_equals(crtc,
5639 &old_crtc_state->wm.skl.optimal,
5640 &new_crtc_state->wm.skl.optimal))
5641 results->dirty_pipes |= drm_crtc_mask(&crtc->base);
5644 ret = skl_compute_ddb(state);
5648 skl_print_wm_changes(state);
5653 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5654 struct intel_crtc_state *cstate)
5656 struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5657 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5658 struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5659 enum pipe pipe = crtc->pipe;
5661 if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5664 I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5667 static void skl_initial_wm(struct intel_atomic_state *state,
5668 struct intel_crtc_state *cstate)
5670 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5671 struct drm_device *dev = intel_crtc->base.dev;
5672 struct drm_i915_private *dev_priv = to_i915(dev);
5673 struct skl_ddb_values *results = &state->wm_results;
5675 if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
5678 mutex_lock(&dev_priv->wm.wm_mutex);
5680 if (cstate->base.active_changed)
5681 skl_atomic_update_crtc_wm(state, cstate);
5683 mutex_unlock(&dev_priv->wm.wm_mutex);
5686 static void ilk_compute_wm_config(struct drm_i915_private *dev_priv,
5687 struct intel_wm_config *config)
5689 struct intel_crtc *crtc;
5691 /* Compute the currently _active_ config */
5692 for_each_intel_crtc(&dev_priv->drm, crtc) {
5693 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5695 if (!wm->pipe_enabled)
5698 config->sprites_enabled |= wm->sprites_enabled;
5699 config->sprites_scaled |= wm->sprites_scaled;
5700 config->num_pipes_active++;
5704 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
5706 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
5707 struct ilk_wm_maximums max;
5708 struct intel_wm_config config = {};
5709 struct ilk_wm_values results = {};
5710 enum intel_ddb_partitioning partitioning;
5712 ilk_compute_wm_config(dev_priv, &config);
5714 ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_1_2, &max);
5715 ilk_wm_merge(dev_priv, &config, &max, &lp_wm_1_2);
5717 /* 5/6 split only in single pipe config on IVB+ */
5718 if (INTEL_GEN(dev_priv) >= 7 &&
5719 config.num_pipes_active == 1 && config.sprites_enabled) {
5720 ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_5_6, &max);
5721 ilk_wm_merge(dev_priv, &config, &max, &lp_wm_5_6);
5723 best_lp_wm = ilk_find_best_result(dev_priv, &lp_wm_1_2, &lp_wm_5_6);
5725 best_lp_wm = &lp_wm_1_2;
5728 partitioning = (best_lp_wm == &lp_wm_1_2) ?
5729 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
5731 ilk_compute_wm_results(dev_priv, best_lp_wm, partitioning, &results);
5733 ilk_write_wm_values(dev_priv, &results);
5736 static void ilk_initial_watermarks(struct intel_atomic_state *state,
5737 struct intel_crtc_state *cstate)
5739 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5740 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5742 mutex_lock(&dev_priv->wm.wm_mutex);
5743 intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
5744 ilk_program_watermarks(dev_priv);
5745 mutex_unlock(&dev_priv->wm.wm_mutex);
5748 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5749 struct intel_crtc_state *cstate)
5751 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5752 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5754 mutex_lock(&dev_priv->wm.wm_mutex);
5755 if (cstate->wm.need_postvbl_update) {
5756 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
5757 ilk_program_watermarks(dev_priv);
5759 mutex_unlock(&dev_priv->wm.wm_mutex);
5762 static inline void skl_wm_level_from_reg_val(u32 val,
5763 struct skl_wm_level *level)
5765 level->plane_en = val & PLANE_WM_EN;
5766 level->ignore_lines = val & PLANE_WM_IGNORE_LINES;
5767 level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5768 level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5769 PLANE_WM_LINES_MASK;
5772 void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
5773 struct skl_pipe_wm *out)
5775 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5776 enum pipe pipe = crtc->pipe;
5777 int level, max_level;
5778 enum plane_id plane_id;
5781 max_level = ilk_wm_max_level(dev_priv);
5783 for_each_plane_id_on_crtc(crtc, plane_id) {
5784 struct skl_plane_wm *wm = &out->planes[plane_id];
5786 for (level = 0; level <= max_level; level++) {
5787 if (plane_id != PLANE_CURSOR)
5788 val = I915_READ(PLANE_WM(pipe, plane_id, level));
5790 val = I915_READ(CUR_WM(pipe, level));
5792 skl_wm_level_from_reg_val(val, &wm->wm[level]);
5795 if (plane_id != PLANE_CURSOR)
5796 val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
5798 val = I915_READ(CUR_WM_TRANS(pipe));
5800 skl_wm_level_from_reg_val(val, &wm->trans_wm);
5806 out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
5809 void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
5811 struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
5812 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
5813 struct intel_crtc *crtc;
5814 struct intel_crtc_state *cstate;
5816 skl_ddb_get_hw_state(dev_priv, ddb);
5817 for_each_intel_crtc(&dev_priv->drm, crtc) {
5818 cstate = to_intel_crtc_state(crtc->base.state);
5820 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
5823 hw->dirty_pipes |= drm_crtc_mask(&crtc->base);
5826 if (dev_priv->active_crtcs) {
5827 /* Fully recompute DDB on first atomic commit */
5828 dev_priv->wm.distrust_bios_wm = true;
5832 static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc)
5834 struct drm_device *dev = crtc->base.dev;
5835 struct drm_i915_private *dev_priv = to_i915(dev);
5836 struct ilk_wm_values *hw = &dev_priv->wm.hw;
5837 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->base.state);
5838 struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
5839 enum pipe pipe = crtc->pipe;
5840 static const i915_reg_t wm0_pipe_reg[] = {
5841 [PIPE_A] = WM0_PIPEA_ILK,
5842 [PIPE_B] = WM0_PIPEB_ILK,
5843 [PIPE_C] = WM0_PIPEC_IVB,
5846 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
5847 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5848 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
5850 memset(active, 0, sizeof(*active));
5852 active->pipe_enabled = crtc->active;
5854 if (active->pipe_enabled) {
5855 u32 tmp = hw->wm_pipe[pipe];
5858 * For active pipes LP0 watermark is marked as
5859 * enabled, and LP1+ watermaks as disabled since
5860 * we can't really reverse compute them in case
5861 * multiple pipes are active.
5863 active->wm[0].enable = true;
5864 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5865 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5866 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5867 active->linetime = hw->wm_linetime[pipe];
5869 int level, max_level = ilk_wm_max_level(dev_priv);
5872 * For inactive pipes, all watermark levels
5873 * should be marked as enabled but zeroed,
5874 * which is what we'd compute them to.
5876 for (level = 0; level <= max_level; level++)
5877 active->wm[level].enable = true;
5880 crtc->wm.active.ilk = *active;
5883 #define _FW_WM(value, plane) \
5884 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5885 #define _FW_WM_VLV(value, plane) \
5886 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5888 static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5889 struct g4x_wm_values *wm)
5893 tmp = I915_READ(DSPFW1);
5894 wm->sr.plane = _FW_WM(tmp, SR);
5895 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5896 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5897 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5899 tmp = I915_READ(DSPFW2);
5900 wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5901 wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5902 wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5903 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5904 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5905 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5907 tmp = I915_READ(DSPFW3);
5908 wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5909 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5910 wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5911 wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5914 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5915 struct vlv_wm_values *wm)
5920 for_each_pipe(dev_priv, pipe) {
5921 tmp = I915_READ(VLV_DDL(pipe));
5923 wm->ddl[pipe].plane[PLANE_PRIMARY] =
5924 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5925 wm->ddl[pipe].plane[PLANE_CURSOR] =
5926 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5927 wm->ddl[pipe].plane[PLANE_SPRITE0] =
5928 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5929 wm->ddl[pipe].plane[PLANE_SPRITE1] =
5930 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5933 tmp = I915_READ(DSPFW1);
5934 wm->sr.plane = _FW_WM(tmp, SR);
5935 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5936 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5937 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
5939 tmp = I915_READ(DSPFW2);
5940 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5941 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5942 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
5944 tmp = I915_READ(DSPFW3);
5945 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5947 if (IS_CHERRYVIEW(dev_priv)) {
5948 tmp = I915_READ(DSPFW7_CHV);
5949 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5950 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5952 tmp = I915_READ(DSPFW8_CHV);
5953 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5954 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
5956 tmp = I915_READ(DSPFW9_CHV);
5957 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5958 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
5960 tmp = I915_READ(DSPHOWM);
5961 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5962 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5963 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5964 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5965 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5966 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5967 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5968 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5969 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5970 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5972 tmp = I915_READ(DSPFW7);
5973 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5974 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5976 tmp = I915_READ(DSPHOWM);
5977 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5978 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5979 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5980 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5981 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5982 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5983 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5990 void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv)
5992 struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5993 struct intel_crtc *crtc;
5995 g4x_read_wm_values(dev_priv, wm);
5997 wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5999 for_each_intel_crtc(&dev_priv->drm, crtc) {
6000 struct intel_crtc_state *crtc_state =
6001 to_intel_crtc_state(crtc->base.state);
6002 struct g4x_wm_state *active = &crtc->wm.active.g4x;
6003 struct g4x_pipe_wm *raw;
6004 enum pipe pipe = crtc->pipe;
6005 enum plane_id plane_id;
6006 int level, max_level;
6008 active->cxsr = wm->cxsr;
6009 active->hpll_en = wm->hpll_en;
6010 active->fbc_en = wm->fbc_en;
6012 active->sr = wm->sr;
6013 active->hpll = wm->hpll;
6015 for_each_plane_id_on_crtc(crtc, plane_id) {
6016 active->wm.plane[plane_id] =
6017 wm->pipe[pipe].plane[plane_id];
6020 if (wm->cxsr && wm->hpll_en)
6021 max_level = G4X_WM_LEVEL_HPLL;
6023 max_level = G4X_WM_LEVEL_SR;
6025 max_level = G4X_WM_LEVEL_NORMAL;
6027 level = G4X_WM_LEVEL_NORMAL;
6028 raw = &crtc_state->wm.g4x.raw[level];
6029 for_each_plane_id_on_crtc(crtc, plane_id)
6030 raw->plane[plane_id] = active->wm.plane[plane_id];
6032 if (++level > max_level)
6035 raw = &crtc_state->wm.g4x.raw[level];
6036 raw->plane[PLANE_PRIMARY] = active->sr.plane;
6037 raw->plane[PLANE_CURSOR] = active->sr.cursor;
6038 raw->plane[PLANE_SPRITE0] = 0;
6039 raw->fbc = active->sr.fbc;
6041 if (++level > max_level)
6044 raw = &crtc_state->wm.g4x.raw[level];
6045 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
6046 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
6047 raw->plane[PLANE_SPRITE0] = 0;
6048 raw->fbc = active->hpll.fbc;
6051 for_each_plane_id_on_crtc(crtc, plane_id)
6052 g4x_raw_plane_wm_set(crtc_state, level,
6053 plane_id, USHRT_MAX);
6054 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
6056 crtc_state->wm.g4x.optimal = *active;
6057 crtc_state->wm.g4x.intermediate = *active;
6059 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
6061 wm->pipe[pipe].plane[PLANE_PRIMARY],
6062 wm->pipe[pipe].plane[PLANE_CURSOR],
6063 wm->pipe[pipe].plane[PLANE_SPRITE0]);
6066 DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
6067 wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
6068 DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
6069 wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
6070 DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
6071 yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
6074 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
6076 struct intel_plane *plane;
6077 struct intel_crtc *crtc;
6079 mutex_lock(&dev_priv->wm.wm_mutex);
6081 for_each_intel_plane(&dev_priv->drm, plane) {
6082 struct intel_crtc *crtc =
6083 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6084 struct intel_crtc_state *crtc_state =
6085 to_intel_crtc_state(crtc->base.state);
6086 struct intel_plane_state *plane_state =
6087 to_intel_plane_state(plane->base.state);
6088 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
6089 enum plane_id plane_id = plane->id;
6092 if (plane_state->base.visible)
6095 for (level = 0; level < 3; level++) {
6096 struct g4x_pipe_wm *raw =
6097 &crtc_state->wm.g4x.raw[level];
6099 raw->plane[plane_id] = 0;
6100 wm_state->wm.plane[plane_id] = 0;
6103 if (plane_id == PLANE_PRIMARY) {
6104 for (level = 0; level < 3; level++) {
6105 struct g4x_pipe_wm *raw =
6106 &crtc_state->wm.g4x.raw[level];
6110 wm_state->sr.fbc = 0;
6111 wm_state->hpll.fbc = 0;
6112 wm_state->fbc_en = false;
6116 for_each_intel_crtc(&dev_priv->drm, crtc) {
6117 struct intel_crtc_state *crtc_state =
6118 to_intel_crtc_state(crtc->base.state);
6120 crtc_state->wm.g4x.intermediate =
6121 crtc_state->wm.g4x.optimal;
6122 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
6125 g4x_program_watermarks(dev_priv);
6127 mutex_unlock(&dev_priv->wm.wm_mutex);
6130 void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
6132 struct vlv_wm_values *wm = &dev_priv->wm.vlv;
6133 struct intel_crtc *crtc;
6136 vlv_read_wm_values(dev_priv, wm);
6138 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
6139 wm->level = VLV_WM_LEVEL_PM2;
6141 if (IS_CHERRYVIEW(dev_priv)) {
6142 mutex_lock(&dev_priv->pcu_lock);
6144 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
6145 if (val & DSP_MAXFIFO_PM5_ENABLE)
6146 wm->level = VLV_WM_LEVEL_PM5;
6149 * If DDR DVFS is disabled in the BIOS, Punit
6150 * will never ack the request. So if that happens
6151 * assume we don't have to enable/disable DDR DVFS
6152 * dynamically. To test that just set the REQ_ACK
6153 * bit to poke the Punit, but don't change the
6154 * HIGH/LOW bits so that we don't actually change
6155 * the current state.
6157 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6158 val |= FORCE_DDR_FREQ_REQ_ACK;
6159 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
6161 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
6162 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
6163 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
6164 "assuming DDR DVFS is disabled\n");
6165 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
6167 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6168 if ((val & FORCE_DDR_HIGH_FREQ) == 0)
6169 wm->level = VLV_WM_LEVEL_DDR_DVFS;
6172 mutex_unlock(&dev_priv->pcu_lock);
6175 for_each_intel_crtc(&dev_priv->drm, crtc) {
6176 struct intel_crtc_state *crtc_state =
6177 to_intel_crtc_state(crtc->base.state);
6178 struct vlv_wm_state *active = &crtc->wm.active.vlv;
6179 const struct vlv_fifo_state *fifo_state =
6180 &crtc_state->wm.vlv.fifo_state;
6181 enum pipe pipe = crtc->pipe;
6182 enum plane_id plane_id;
6185 vlv_get_fifo_size(crtc_state);
6187 active->num_levels = wm->level + 1;
6188 active->cxsr = wm->cxsr;
6190 for (level = 0; level < active->num_levels; level++) {
6191 struct g4x_pipe_wm *raw =
6192 &crtc_state->wm.vlv.raw[level];
6194 active->sr[level].plane = wm->sr.plane;
6195 active->sr[level].cursor = wm->sr.cursor;
6197 for_each_plane_id_on_crtc(crtc, plane_id) {
6198 active->wm[level].plane[plane_id] =
6199 wm->pipe[pipe].plane[plane_id];
6201 raw->plane[plane_id] =
6202 vlv_invert_wm_value(active->wm[level].plane[plane_id],
6203 fifo_state->plane[plane_id]);
6207 for_each_plane_id_on_crtc(crtc, plane_id)
6208 vlv_raw_plane_wm_set(crtc_state, level,
6209 plane_id, USHRT_MAX);
6210 vlv_invalidate_wms(crtc, active, level);
6212 crtc_state->wm.vlv.optimal = *active;
6213 crtc_state->wm.vlv.intermediate = *active;
6215 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
6217 wm->pipe[pipe].plane[PLANE_PRIMARY],
6218 wm->pipe[pipe].plane[PLANE_CURSOR],
6219 wm->pipe[pipe].plane[PLANE_SPRITE0],
6220 wm->pipe[pipe].plane[PLANE_SPRITE1]);
6223 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
6224 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
6227 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
6229 struct intel_plane *plane;
6230 struct intel_crtc *crtc;
6232 mutex_lock(&dev_priv->wm.wm_mutex);
6234 for_each_intel_plane(&dev_priv->drm, plane) {
6235 struct intel_crtc *crtc =
6236 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6237 struct intel_crtc_state *crtc_state =
6238 to_intel_crtc_state(crtc->base.state);
6239 struct intel_plane_state *plane_state =
6240 to_intel_plane_state(plane->base.state);
6241 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
6242 const struct vlv_fifo_state *fifo_state =
6243 &crtc_state->wm.vlv.fifo_state;
6244 enum plane_id plane_id = plane->id;
6247 if (plane_state->base.visible)
6250 for (level = 0; level < wm_state->num_levels; level++) {
6251 struct g4x_pipe_wm *raw =
6252 &crtc_state->wm.vlv.raw[level];
6254 raw->plane[plane_id] = 0;
6256 wm_state->wm[level].plane[plane_id] =
6257 vlv_invert_wm_value(raw->plane[plane_id],
6258 fifo_state->plane[plane_id]);
6262 for_each_intel_crtc(&dev_priv->drm, crtc) {
6263 struct intel_crtc_state *crtc_state =
6264 to_intel_crtc_state(crtc->base.state);
6266 crtc_state->wm.vlv.intermediate =
6267 crtc_state->wm.vlv.optimal;
6268 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
6271 vlv_program_watermarks(dev_priv);
6273 mutex_unlock(&dev_priv->wm.wm_mutex);
6277 * FIXME should probably kill this and improve
6278 * the real watermark readout/sanitation instead
6280 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
6282 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6283 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6284 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6287 * Don't touch WM1S_LP_EN here.
6288 * Doing so could cause underruns.
6292 void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv)
6294 struct ilk_wm_values *hw = &dev_priv->wm.hw;
6295 struct intel_crtc *crtc;
6297 ilk_init_lp_watermarks(dev_priv);
6299 for_each_intel_crtc(&dev_priv->drm, crtc)
6300 ilk_pipe_wm_get_hw_state(crtc);
6302 hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
6303 hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
6304 hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
6306 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
6307 if (INTEL_GEN(dev_priv) >= 7) {
6308 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
6309 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
6312 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6313 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
6314 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6315 else if (IS_IVYBRIDGE(dev_priv))
6316 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
6317 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6320 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
6324 * intel_update_watermarks - update FIFO watermark values based on current modes
6325 * @crtc: the #intel_crtc on which to compute the WM
6327 * Calculate watermark values for the various WM regs based on current mode
6328 * and plane configuration.
6330 * There are several cases to deal with here:
6331 * - normal (i.e. non-self-refresh)
6332 * - self-refresh (SR) mode
6333 * - lines are large relative to FIFO size (buffer can hold up to 2)
6334 * - lines are small relative to FIFO size (buffer can hold more than 2
6335 * lines), so need to account for TLB latency
6337 * The normal calculation is:
6338 * watermark = dotclock * bytes per pixel * latency
6339 * where latency is platform & configuration dependent (we assume pessimal
6342 * The SR calculation is:
6343 * watermark = (trunc(latency/line time)+1) * surface width *
6346 * line time = htotal / dotclock
6347 * surface width = hdisplay for normal plane and 64 for cursor
6348 * and latency is assumed to be high, as above.
6350 * The final value programmed to the register should always be rounded up,
6351 * and include an extra 2 entries to account for clock crossings.
6353 * We don't use the sprite, so we can ignore that. And on Crestline we have
6354 * to set the non-SR watermarks to 8.
6356 void intel_update_watermarks(struct intel_crtc *crtc)
6358 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
6360 if (dev_priv->display.update_wm)
6361 dev_priv->display.update_wm(crtc);
6364 void intel_enable_ipc(struct drm_i915_private *dev_priv)
6368 if (!HAS_IPC(dev_priv))
6371 val = I915_READ(DISP_ARB_CTL2);
6373 if (dev_priv->ipc_enabled)
6374 val |= DISP_IPC_ENABLE;
6376 val &= ~DISP_IPC_ENABLE;
6378 I915_WRITE(DISP_ARB_CTL2, val);
6381 void intel_init_ipc(struct drm_i915_private *dev_priv)
6383 if (!HAS_IPC(dev_priv))
6386 /* Display WA #1141: SKL:all KBL:all CFL */
6387 if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
6388 dev_priv->ipc_enabled = dev_priv->dram_info.symmetric_memory;
6390 dev_priv->ipc_enabled = true;
6392 intel_enable_ipc(dev_priv);
6396 * Lock protecting IPS related data structures
6398 DEFINE_SPINLOCK(mchdev_lock);
6400 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
6404 lockdep_assert_held(&mchdev_lock);
6406 rgvswctl = I915_READ16(MEMSWCTL);
6407 if (rgvswctl & MEMCTL_CMD_STS) {
6408 DRM_DEBUG("gpu busy, RCS change rejected\n");
6409 return false; /* still busy with another command */
6412 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
6413 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
6414 I915_WRITE16(MEMSWCTL, rgvswctl);
6415 POSTING_READ16(MEMSWCTL);
6417 rgvswctl |= MEMCTL_CMD_STS;
6418 I915_WRITE16(MEMSWCTL, rgvswctl);
6423 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
6426 u8 fmax, fmin, fstart, vstart;
6428 spin_lock_irq(&mchdev_lock);
6430 rgvmodectl = I915_READ(MEMMODECTL);
6432 /* Enable temp reporting */
6433 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
6434 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
6436 /* 100ms RC evaluation intervals */
6437 I915_WRITE(RCUPEI, 100000);
6438 I915_WRITE(RCDNEI, 100000);
6440 /* Set max/min thresholds to 90ms and 80ms respectively */
6441 I915_WRITE(RCBMAXAVG, 90000);
6442 I915_WRITE(RCBMINAVG, 80000);
6444 I915_WRITE(MEMIHYST, 1);
6446 /* Set up min, max, and cur for interrupt handling */
6447 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
6448 fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
6449 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
6450 MEMMODE_FSTART_SHIFT;
6452 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
6455 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
6456 dev_priv->ips.fstart = fstart;
6458 dev_priv->ips.max_delay = fstart;
6459 dev_priv->ips.min_delay = fmin;
6460 dev_priv->ips.cur_delay = fstart;
6462 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6463 fmax, fmin, fstart);
6465 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
6468 * Interrupts will be enabled in ironlake_irq_postinstall
6471 I915_WRITE(VIDSTART, vstart);
6472 POSTING_READ(VIDSTART);
6474 rgvmodectl |= MEMMODE_SWMODE_EN;
6475 I915_WRITE(MEMMODECTL, rgvmodectl);
6477 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
6478 DRM_ERROR("stuck trying to change perf mode\n");
6481 ironlake_set_drps(dev_priv, fstart);
6483 dev_priv->ips.last_count1 = I915_READ(DMIEC) +
6484 I915_READ(DDREC) + I915_READ(CSIEC);
6485 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
6486 dev_priv->ips.last_count2 = I915_READ(GFXEC);
6487 dev_priv->ips.last_time2 = ktime_get_raw_ns();
6489 spin_unlock_irq(&mchdev_lock);
6492 static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
6496 spin_lock_irq(&mchdev_lock);
6498 rgvswctl = I915_READ16(MEMSWCTL);
6500 /* Ack interrupts, disable EFC interrupt */
6501 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
6502 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
6503 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
6504 I915_WRITE(DEIIR, DE_PCU_EVENT);
6505 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
6507 /* Go back to the starting frequency */
6508 ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
6510 rgvswctl |= MEMCTL_CMD_STS;
6511 I915_WRITE(MEMSWCTL, rgvswctl);
6514 spin_unlock_irq(&mchdev_lock);
6517 /* There's a funny hw issue where the hw returns all 0 when reading from
6518 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6519 * ourselves, instead of doing a rmw cycle (which might result in us clearing
6520 * all limits and the gpu stuck at whatever frequency it is at atm).
6522 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
6524 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6527 /* Only set the down limit when we've reached the lowest level to avoid
6528 * getting more interrupts, otherwise leave this clear. This prevents a
6529 * race in the hw when coming out of rc6: There's a tiny window where
6530 * the hw runs at the minimal clock before selecting the desired
6531 * frequency, if the down threshold expires in that window we will not
6532 * receive a down interrupt. */
6533 if (INTEL_GEN(dev_priv) >= 9) {
6534 limits = (rps->max_freq_softlimit) << 23;
6535 if (val <= rps->min_freq_softlimit)
6536 limits |= (rps->min_freq_softlimit) << 14;
6538 limits = rps->max_freq_softlimit << 24;
6539 if (val <= rps->min_freq_softlimit)
6540 limits |= rps->min_freq_softlimit << 16;
6546 static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
6548 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6549 u32 threshold_up = 0, threshold_down = 0; /* in % */
6550 u32 ei_up = 0, ei_down = 0;
6552 lockdep_assert_held(&rps->power.mutex);
6554 if (new_power == rps->power.mode)
6557 /* Note the units here are not exactly 1us, but 1280ns. */
6558 switch (new_power) {
6560 /* Upclock if more than 95% busy over 16ms */
6564 /* Downclock if less than 85% busy over 32ms */
6566 threshold_down = 85;
6570 /* Upclock if more than 90% busy over 13ms */
6574 /* Downclock if less than 75% busy over 32ms */
6576 threshold_down = 75;
6580 /* Upclock if more than 85% busy over 10ms */
6584 /* Downclock if less than 60% busy over 32ms */
6586 threshold_down = 60;
6590 /* When byt can survive without system hang with dynamic
6591 * sw freq adjustments, this restriction can be lifted.
6593 if (IS_VALLEYVIEW(dev_priv))
6596 I915_WRITE(GEN6_RP_UP_EI,
6597 GT_INTERVAL_FROM_US(dev_priv, ei_up));
6598 I915_WRITE(GEN6_RP_UP_THRESHOLD,
6599 GT_INTERVAL_FROM_US(dev_priv,
6600 ei_up * threshold_up / 100));
6602 I915_WRITE(GEN6_RP_DOWN_EI,
6603 GT_INTERVAL_FROM_US(dev_priv, ei_down));
6604 I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
6605 GT_INTERVAL_FROM_US(dev_priv,
6606 ei_down * threshold_down / 100));
6608 I915_WRITE(GEN6_RP_CONTROL,
6609 (INTEL_GEN(dev_priv) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
6610 GEN6_RP_MEDIA_HW_NORMAL_MODE |
6611 GEN6_RP_MEDIA_IS_GFX |
6613 GEN6_RP_UP_BUSY_AVG |
6614 GEN6_RP_DOWN_IDLE_AVG);
6617 rps->power.mode = new_power;
6618 rps->power.up_threshold = threshold_up;
6619 rps->power.down_threshold = threshold_down;
6622 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6624 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6627 new_power = rps->power.mode;
6628 switch (rps->power.mode) {
6630 if (val > rps->efficient_freq + 1 &&
6631 val > rps->cur_freq)
6632 new_power = BETWEEN;
6636 if (val <= rps->efficient_freq &&
6637 val < rps->cur_freq)
6638 new_power = LOW_POWER;
6639 else if (val >= rps->rp0_freq &&
6640 val > rps->cur_freq)
6641 new_power = HIGH_POWER;
6645 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6646 val < rps->cur_freq)
6647 new_power = BETWEEN;
6650 /* Max/min bins are special */
6651 if (val <= rps->min_freq_softlimit)
6652 new_power = LOW_POWER;
6653 if (val >= rps->max_freq_softlimit)
6654 new_power = HIGH_POWER;
6656 mutex_lock(&rps->power.mutex);
6657 if (rps->power.interactive)
6658 new_power = HIGH_POWER;
6659 rps_set_power(dev_priv, new_power);
6660 mutex_unlock(&rps->power.mutex);
6663 void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
6665 struct intel_rps *rps = &i915->gt_pm.rps;
6667 if (INTEL_GEN(i915) < 6)
6670 mutex_lock(&rps->power.mutex);
6672 if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
6673 rps_set_power(i915, HIGH_POWER);
6675 GEM_BUG_ON(!rps->power.interactive);
6676 rps->power.interactive--;
6678 mutex_unlock(&rps->power.mutex);
6681 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6683 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6686 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
6687 if (val > rps->min_freq_softlimit)
6688 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
6689 if (val < rps->max_freq_softlimit)
6690 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
6692 mask &= dev_priv->pm_rps_events;
6694 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
6697 /* gen6_set_rps is called to update the frequency request, but should also be
6698 * called when the range (min_delay and max_delay) is modified so that we can
6699 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
6700 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
6702 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6704 /* min/max delay may still have been modified so be sure to
6705 * write the limits value.
6707 if (val != rps->cur_freq) {
6708 gen6_set_rps_thresholds(dev_priv, val);
6710 if (INTEL_GEN(dev_priv) >= 9)
6711 I915_WRITE(GEN6_RPNSWREQ,
6712 GEN9_FREQUENCY(val));
6713 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6714 I915_WRITE(GEN6_RPNSWREQ,
6715 HSW_FREQUENCY(val));
6717 I915_WRITE(GEN6_RPNSWREQ,
6718 GEN6_FREQUENCY(val) |
6720 GEN6_AGGRESSIVE_TURBO);
6723 /* Make sure we continue to get interrupts
6724 * until we hit the minimum or maximum frequencies.
6726 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
6727 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6729 rps->cur_freq = val;
6730 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6735 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
6739 if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
6740 "Odd GPU freq value\n"))
6743 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6745 if (val != dev_priv->gt_pm.rps.cur_freq) {
6746 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
6750 gen6_set_rps_thresholds(dev_priv, val);
6753 dev_priv->gt_pm.rps.cur_freq = val;
6754 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6759 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
6761 * * If Gfx is Idle, then
6762 * 1. Forcewake Media well.
6763 * 2. Request idle freq.
6764 * 3. Release Forcewake of Media well.
6766 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6768 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6769 u32 val = rps->idle_freq;
6772 if (rps->cur_freq <= val)
6775 /* The punit delays the write of the frequency and voltage until it
6776 * determines the GPU is awake. During normal usage we don't want to
6777 * waste power changing the frequency if the GPU is sleeping (rc6).
6778 * However, the GPU and driver is now idle and we do not want to delay
6779 * switching to minimum voltage (reducing power whilst idle) as we do
6780 * not expect to be woken in the near future and so must flush the
6781 * change by waking the device.
6783 * We choose to take the media powerwell (either would do to trick the
6784 * punit into committing the voltage change) as that takes a lot less
6785 * power than the render powerwell.
6787 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_MEDIA);
6788 err = valleyview_set_rps(dev_priv, val);
6789 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_MEDIA);
6792 DRM_ERROR("Failed to set RPS for idle\n");
6795 void gen6_rps_busy(struct drm_i915_private *dev_priv)
6797 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6799 mutex_lock(&dev_priv->pcu_lock);
6803 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
6804 gen6_rps_reset_ei(dev_priv);
6805 I915_WRITE(GEN6_PMINTRMSK,
6806 gen6_rps_pm_mask(dev_priv, rps->cur_freq));
6808 gen6_enable_rps_interrupts(dev_priv);
6810 /* Use the user's desired frequency as a guide, but for better
6811 * performance, jump directly to RPe as our starting frequency.
6813 freq = max(rps->cur_freq,
6814 rps->efficient_freq);
6816 if (intel_set_rps(dev_priv,
6818 rps->min_freq_softlimit,
6819 rps->max_freq_softlimit)))
6820 DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
6822 mutex_unlock(&dev_priv->pcu_lock);
6825 void gen6_rps_idle(struct drm_i915_private *dev_priv)
6827 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6829 /* Flush our bottom-half so that it does not race with us
6830 * setting the idle frequency and so that it is bounded by
6831 * our rpm wakeref. And then disable the interrupts to stop any
6832 * futher RPS reclocking whilst we are asleep.
6834 gen6_disable_rps_interrupts(dev_priv);
6836 mutex_lock(&dev_priv->pcu_lock);
6838 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6839 vlv_set_rps_idle(dev_priv);
6841 gen6_set_rps(dev_priv, rps->idle_freq);
6843 I915_WRITE(GEN6_PMINTRMSK,
6844 gen6_sanitize_rps_pm_mask(dev_priv, ~0));
6846 mutex_unlock(&dev_priv->pcu_lock);
6849 void gen6_rps_boost(struct i915_request *rq)
6851 struct intel_rps *rps = &rq->i915->gt_pm.rps;
6852 unsigned long flags;
6855 /* This is intentionally racy! We peek at the state here, then
6856 * validate inside the RPS worker.
6861 if (i915_request_signaled(rq))
6864 /* Serializes with i915_request_retire() */
6866 spin_lock_irqsave(&rq->lock, flags);
6867 if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
6868 boost = !atomic_fetch_inc(&rps->num_waiters);
6869 rq->waitboost = true;
6871 spin_unlock_irqrestore(&rq->lock, flags);
6875 if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6876 schedule_work(&rps->work);
6878 atomic_inc(&rps->boosts);
6881 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
6883 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6886 lockdep_assert_held(&dev_priv->pcu_lock);
6887 GEM_BUG_ON(val > rps->max_freq);
6888 GEM_BUG_ON(val < rps->min_freq);
6890 if (!rps->enabled) {
6891 rps->cur_freq = val;
6895 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6896 err = valleyview_set_rps(dev_priv, val);
6898 err = gen6_set_rps(dev_priv, val);
6903 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
6905 I915_WRITE(GEN6_RC_CONTROL, 0);
6906 I915_WRITE(GEN9_PG_ENABLE, 0);
6909 static void gen9_disable_rps(struct drm_i915_private *dev_priv)
6911 I915_WRITE(GEN6_RP_CONTROL, 0);
6914 static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
6916 I915_WRITE(GEN6_RC_CONTROL, 0);
6919 static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6921 I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
6922 I915_WRITE(GEN6_RP_CONTROL, 0);
6925 static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
6927 I915_WRITE(GEN6_RC_CONTROL, 0);
6930 static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6932 I915_WRITE(GEN6_RP_CONTROL, 0);
6935 static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
6937 /* We're doing forcewake before Disabling RC6,
6938 * This what the BIOS expects when going into suspend */
6939 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
6941 I915_WRITE(GEN6_RC_CONTROL, 0);
6943 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
6946 static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6948 I915_WRITE(GEN6_RP_CONTROL, 0);
6951 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
6953 bool enable_rc6 = true;
6954 unsigned long rc6_ctx_base;
6958 rc_ctl = I915_READ(GEN6_RC_CONTROL);
6959 rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6960 RC_SW_TARGET_STATE_SHIFT;
6961 DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6962 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6963 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6964 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6967 if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
6968 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
6973 * The exact context size is not known for BXT, so assume a page size
6976 rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
6977 if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
6978 (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
6979 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
6983 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6984 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
6985 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
6986 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
6987 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
6991 if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
6992 !I915_READ(GEN8_PUSHBUS_ENABLE) ||
6993 !I915_READ(GEN8_PUSHBUS_SHIFT)) {
6994 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6998 if (!I915_READ(GEN6_GFXPAUSE)) {
6999 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
7003 if (!I915_READ(GEN8_MISC_CTRL0)) {
7004 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
7011 static bool sanitize_rc6(struct drm_i915_private *i915)
7013 struct intel_device_info *info = mkwrite_device_info(i915);
7015 /* Powersaving is controlled by the host when inside a VM */
7016 if (intel_vgpu_active(i915))
7019 if (info->has_rc6 &&
7020 IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
7021 DRM_INFO("RC6 disabled by BIOS\n");
7026 * We assume that we do not have any deep rc6 levels if we don't have
7027 * have the previous rc6 level supported, i.e. we use HAS_RC6()
7028 * as the initial coarse check for rc6 in general, moving on to
7029 * progressively finer/deeper levels.
7031 if (!info->has_rc6 && info->has_rc6p)
7034 return info->has_rc6;
7037 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
7039 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7041 /* All of these values are in units of 50MHz */
7043 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
7044 if (IS_GEN9_LP(dev_priv)) {
7045 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
7046 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
7047 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
7048 rps->min_freq = (rp_state_cap >> 0) & 0xff;
7050 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
7051 rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
7052 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
7053 rps->min_freq = (rp_state_cap >> 16) & 0xff;
7055 /* hw_max = RP0 until we check for overclocking */
7056 rps->max_freq = rps->rp0_freq;
7058 rps->efficient_freq = rps->rp1_freq;
7059 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
7060 IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7061 u32 ddcc_status = 0;
7063 if (sandybridge_pcode_read(dev_priv,
7064 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
7066 rps->efficient_freq =
7068 ((ddcc_status >> 8) & 0xff),
7073 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7074 /* Store the frequency values in 16.66 MHZ units, which is
7075 * the natural hardware unit for SKL
7077 rps->rp0_freq *= GEN9_FREQ_SCALER;
7078 rps->rp1_freq *= GEN9_FREQ_SCALER;
7079 rps->min_freq *= GEN9_FREQ_SCALER;
7080 rps->max_freq *= GEN9_FREQ_SCALER;
7081 rps->efficient_freq *= GEN9_FREQ_SCALER;
7085 static void reset_rps(struct drm_i915_private *dev_priv,
7086 int (*set)(struct drm_i915_private *, u8))
7088 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7089 u8 freq = rps->cur_freq;
7092 rps->power.mode = -1;
7095 if (set(dev_priv, freq))
7096 DRM_ERROR("Failed to reset RPS to initial values\n");
7099 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
7100 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
7102 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7104 /* Program defaults and thresholds for RPS */
7105 if (IS_GEN(dev_priv, 9))
7106 I915_WRITE(GEN6_RC_VIDEO_FREQ,
7107 GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
7109 /* 1 second timeout*/
7110 I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
7111 GT_INTERVAL_FROM_US(dev_priv, 1000000));
7113 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
7115 /* Leaning on the below call to gen6_set_rps to program/setup the
7116 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
7117 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
7118 reset_rps(dev_priv, gen6_set_rps);
7120 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7123 static void gen11_enable_rc6(struct drm_i915_private *dev_priv)
7125 struct intel_engine_cs *engine;
7126 enum intel_engine_id id;
7128 /* 1a: Software RC state - RC0 */
7129 I915_WRITE(GEN6_RC_STATE, 0);
7132 * 1b: Get forcewake during program sequence. Although the driver
7133 * hasn't enabled a state yet where we need forcewake, BIOS may have.
7135 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7137 /* 2a: Disable RC states. */
7138 I915_WRITE(GEN6_RC_CONTROL, 0);
7140 /* 2b: Program RC6 thresholds.*/
7141 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
7142 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
7144 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7145 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7146 for_each_engine(engine, dev_priv, id)
7147 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7149 if (HAS_GUC(dev_priv))
7150 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
7152 I915_WRITE(GEN6_RC_SLEEP, 0);
7154 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
7157 * 2c: Program Coarse Power Gating Policies.
7159 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
7160 * use instead is a more conservative estimate for the maximum time
7161 * it takes us to service a CS interrupt and submit a new ELSP - that
7162 * is the time which the GPU is idle waiting for the CPU to select the
7163 * next request to execute. If the idle hysteresis is less than that
7164 * interrupt service latency, the hardware will automatically gate
7165 * the power well and we will then incur the wake up cost on top of
7166 * the service latency. A similar guide from intel_pstate is that we
7167 * do not want the enable hysteresis to less than the wakeup latency.
7169 * igt/gem_exec_nop/sequential provides a rough estimate for the
7170 * service latency, and puts it around 10us for Broadwell (and other
7171 * big core) and around 40us for Broxton (and other low power cores).
7172 * [Note that for legacy ringbuffer submission, this is less than 1us!]
7173 * However, the wakeup latency on Broxton is closer to 100us. To be
7174 * conservative, we have to factor in a context switch on top (due
7177 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
7178 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
7180 /* 3a: Enable RC6 */
7181 I915_WRITE(GEN6_RC_CONTROL,
7182 GEN6_RC_CTL_HW_ENABLE |
7183 GEN6_RC_CTL_RC6_ENABLE |
7184 GEN6_RC_CTL_EI_MODE(1));
7186 /* 3b: Enable Coarse Power Gating only when RC6 is enabled. */
7187 I915_WRITE(GEN9_PG_ENABLE,
7188 GEN9_RENDER_PG_ENABLE |
7189 GEN9_MEDIA_PG_ENABLE |
7190 GEN11_MEDIA_SAMPLER_PG_ENABLE);
7192 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7195 static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
7197 struct intel_engine_cs *engine;
7198 enum intel_engine_id id;
7201 /* 1a: Software RC state - RC0 */
7202 I915_WRITE(GEN6_RC_STATE, 0);
7204 /* 1b: Get forcewake during program sequence. Although the driver
7205 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7206 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7208 /* 2a: Disable RC states. */
7209 I915_WRITE(GEN6_RC_CONTROL, 0);
7211 /* 2b: Program RC6 thresholds.*/
7212 if (INTEL_GEN(dev_priv) >= 10) {
7213 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
7214 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
7215 } else if (IS_SKYLAKE(dev_priv)) {
7217 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
7218 * when CPG is enabled
7220 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
7222 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
7225 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7226 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7227 for_each_engine(engine, dev_priv, id)
7228 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7230 if (HAS_GUC(dev_priv))
7231 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
7233 I915_WRITE(GEN6_RC_SLEEP, 0);
7236 * 2c: Program Coarse Power Gating Policies.
7238 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
7239 * use instead is a more conservative estimate for the maximum time
7240 * it takes us to service a CS interrupt and submit a new ELSP - that
7241 * is the time which the GPU is idle waiting for the CPU to select the
7242 * next request to execute. If the idle hysteresis is less than that
7243 * interrupt service latency, the hardware will automatically gate
7244 * the power well and we will then incur the wake up cost on top of
7245 * the service latency. A similar guide from intel_pstate is that we
7246 * do not want the enable hysteresis to less than the wakeup latency.
7248 * igt/gem_exec_nop/sequential provides a rough estimate for the
7249 * service latency, and puts it around 10us for Broadwell (and other
7250 * big core) and around 40us for Broxton (and other low power cores).
7251 * [Note that for legacy ringbuffer submission, this is less than 1us!]
7252 * However, the wakeup latency on Broxton is closer to 100us. To be
7253 * conservative, we have to factor in a context switch on top (due
7256 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
7257 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
7259 /* 3a: Enable RC6 */
7260 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
7262 /* WaRsUseTimeoutMode:cnl (pre-prod) */
7263 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
7264 rc6_mode = GEN7_RC_CTL_TO_MODE;
7266 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
7268 I915_WRITE(GEN6_RC_CONTROL,
7269 GEN6_RC_CTL_HW_ENABLE |
7270 GEN6_RC_CTL_RC6_ENABLE |
7274 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
7275 * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
7277 if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
7278 I915_WRITE(GEN9_PG_ENABLE, 0);
7280 I915_WRITE(GEN9_PG_ENABLE,
7281 GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
7283 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7286 static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
7288 struct intel_engine_cs *engine;
7289 enum intel_engine_id id;
7291 /* 1a: Software RC state - RC0 */
7292 I915_WRITE(GEN6_RC_STATE, 0);
7294 /* 1b: Get forcewake during program sequence. Although the driver
7295 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7296 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7298 /* 2a: Disable RC states. */
7299 I915_WRITE(GEN6_RC_CONTROL, 0);
7301 /* 2b: Program RC6 thresholds.*/
7302 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7303 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7304 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7305 for_each_engine(engine, dev_priv, id)
7306 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7307 I915_WRITE(GEN6_RC_SLEEP, 0);
7308 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
7312 I915_WRITE(GEN6_RC_CONTROL,
7313 GEN6_RC_CTL_HW_ENABLE |
7314 GEN7_RC_CTL_TO_MODE |
7315 GEN6_RC_CTL_RC6_ENABLE);
7317 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7320 static void gen8_enable_rps(struct drm_i915_private *dev_priv)
7322 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7324 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7326 /* 1 Program defaults and thresholds for RPS*/
7327 I915_WRITE(GEN6_RPNSWREQ,
7328 HSW_FREQUENCY(rps->rp1_freq));
7329 I915_WRITE(GEN6_RC_VIDEO_FREQ,
7330 HSW_FREQUENCY(rps->rp1_freq));
7331 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
7332 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
7334 /* Docs recommend 900MHz, and 300 MHz respectively */
7335 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
7336 rps->max_freq_softlimit << 24 |
7337 rps->min_freq_softlimit << 16);
7339 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
7340 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
7341 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
7342 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
7344 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7347 I915_WRITE(GEN6_RP_CONTROL,
7348 GEN6_RP_MEDIA_TURBO |
7349 GEN6_RP_MEDIA_HW_NORMAL_MODE |
7350 GEN6_RP_MEDIA_IS_GFX |
7352 GEN6_RP_UP_BUSY_AVG |
7353 GEN6_RP_DOWN_IDLE_AVG);
7355 reset_rps(dev_priv, gen6_set_rps);
7357 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7360 static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
7362 struct intel_engine_cs *engine;
7363 enum intel_engine_id id;
7364 u32 rc6vids, rc6_mask;
7368 I915_WRITE(GEN6_RC_STATE, 0);
7370 /* Clear the DBG now so we don't confuse earlier errors */
7371 gtfifodbg = I915_READ(GTFIFODBG);
7373 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
7374 I915_WRITE(GTFIFODBG, gtfifodbg);
7377 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7379 /* disable the counters and set deterministic thresholds */
7380 I915_WRITE(GEN6_RC_CONTROL, 0);
7382 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
7383 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
7384 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
7385 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7386 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7388 for_each_engine(engine, dev_priv, id)
7389 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7391 I915_WRITE(GEN6_RC_SLEEP, 0);
7392 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
7393 if (IS_IVYBRIDGE(dev_priv))
7394 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
7396 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
7397 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
7398 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
7400 /* We don't use those on Haswell */
7401 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
7402 if (HAS_RC6p(dev_priv))
7403 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
7404 if (HAS_RC6pp(dev_priv))
7405 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
7406 I915_WRITE(GEN6_RC_CONTROL,
7408 GEN6_RC_CTL_EI_MODE(1) |
7409 GEN6_RC_CTL_HW_ENABLE);
7412 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
7413 if (IS_GEN(dev_priv, 6) && ret) {
7414 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
7415 } else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
7416 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
7417 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
7418 rc6vids &= 0xffff00;
7419 rc6vids |= GEN6_ENCODE_RC6_VID(450);
7420 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
7422 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
7425 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7428 static void gen6_enable_rps(struct drm_i915_private *dev_priv)
7430 /* Here begins a magic sequence of register writes to enable
7431 * auto-downclocking.
7433 * Perhaps there might be some value in exposing these to
7436 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7438 /* Power down if completely idle for over 50ms */
7439 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
7440 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7442 reset_rps(dev_priv, gen6_set_rps);
7444 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7447 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
7449 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7450 const int min_freq = 15;
7451 const int scaling_factor = 180;
7452 unsigned int gpu_freq;
7453 unsigned int max_ia_freq, min_ring_freq;
7454 unsigned int max_gpu_freq, min_gpu_freq;
7455 struct cpufreq_policy *policy;
7457 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
7459 if (rps->max_freq <= rps->min_freq)
7462 policy = cpufreq_cpu_get(0);
7464 max_ia_freq = policy->cpuinfo.max_freq;
7465 cpufreq_cpu_put(policy);
7468 * Default to measured freq if none found, PCU will ensure we
7471 max_ia_freq = tsc_khz;
7474 /* Convert from kHz to MHz */
7475 max_ia_freq /= 1000;
7477 min_ring_freq = I915_READ(DCLK) & 0xf;
7478 /* convert DDR frequency from units of 266.6MHz to bandwidth */
7479 min_ring_freq = mult_frac(min_ring_freq, 8, 3);
7481 min_gpu_freq = rps->min_freq;
7482 max_gpu_freq = rps->max_freq;
7483 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7484 /* Convert GT frequency to 50 HZ units */
7485 min_gpu_freq /= GEN9_FREQ_SCALER;
7486 max_gpu_freq /= GEN9_FREQ_SCALER;
7490 * For each potential GPU frequency, load a ring frequency we'd like
7491 * to use for memory access. We do this by specifying the IA frequency
7492 * the PCU should use as a reference to determine the ring frequency.
7494 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
7495 const int diff = max_gpu_freq - gpu_freq;
7496 unsigned int ia_freq = 0, ring_freq = 0;
7498 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7500 * ring_freq = 2 * GT. ring_freq is in 100MHz units
7501 * No floor required for ring frequency on SKL.
7503 ring_freq = gpu_freq;
7504 } else if (INTEL_GEN(dev_priv) >= 8) {
7505 /* max(2 * GT, DDR). NB: GT is 50MHz units */
7506 ring_freq = max(min_ring_freq, gpu_freq);
7507 } else if (IS_HASWELL(dev_priv)) {
7508 ring_freq = mult_frac(gpu_freq, 5, 4);
7509 ring_freq = max(min_ring_freq, ring_freq);
7510 /* leave ia_freq as the default, chosen by cpufreq */
7512 /* On older processors, there is no separate ring
7513 * clock domain, so in order to boost the bandwidth
7514 * of the ring, we need to upclock the CPU (ia_freq).
7516 * For GPU frequencies less than 750MHz,
7517 * just use the lowest ring freq.
7519 if (gpu_freq < min_freq)
7522 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
7523 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
7526 sandybridge_pcode_write(dev_priv,
7527 GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
7528 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
7529 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
7534 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
7538 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7540 switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) {
7542 /* (2 * 4) config */
7543 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
7546 /* (2 * 6) config */
7547 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
7550 /* (2 * 8) config */
7552 /* Setting (2 * 8) Min RP0 for any other combination */
7553 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
7557 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
7562 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7566 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7567 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7572 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7576 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7577 rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7582 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7586 val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7587 rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7588 FB_GFX_FREQ_FUSE_MASK);
7593 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7597 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7599 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7604 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
7608 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7610 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7612 rp0 = min_t(u32, rp0, 0xea);
7617 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7621 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
7622 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
7623 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
7624 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7629 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
7633 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7635 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7636 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7637 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7638 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7639 * to make sure it matches what Punit accepts.
7641 return max_t(u32, val, 0xc0);
7644 /* Check that the pctx buffer wasn't move under us. */
7645 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7647 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7649 WARN_ON(pctx_addr != dev_priv->dsm.start +
7650 dev_priv->vlv_pctx->stolen->start);
7654 /* Check that the pcbr address is not empty. */
7655 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7657 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7659 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7662 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
7664 resource_size_t pctx_paddr, paddr;
7665 resource_size_t pctx_size = 32*1024;
7668 pcbr = I915_READ(VLV_PCBR);
7669 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
7670 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7671 paddr = dev_priv->dsm.end + 1 - pctx_size;
7672 GEM_BUG_ON(paddr > U32_MAX);
7674 pctx_paddr = (paddr & (~4095));
7675 I915_WRITE(VLV_PCBR, pctx_paddr);
7678 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7681 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
7683 struct drm_i915_gem_object *pctx;
7684 resource_size_t pctx_paddr;
7685 resource_size_t pctx_size = 24*1024;
7688 pcbr = I915_READ(VLV_PCBR);
7690 /* BIOS set it up already, grab the pre-alloc'd space */
7691 resource_size_t pcbr_offset;
7693 pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
7694 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
7696 I915_GTT_OFFSET_NONE,
7701 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7704 * From the Gunit register HAS:
7705 * The Gfx driver is expected to program this register and ensure
7706 * proper allocation within Gfx stolen memory. For example, this
7707 * register should be programmed such than the PCBR range does not
7708 * overlap with other ranges, such as the frame buffer, protected
7709 * memory, or any other relevant ranges.
7711 pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
7713 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
7717 GEM_BUG_ON(range_overflows_t(u64,
7718 dev_priv->dsm.start,
7719 pctx->stolen->start,
7721 pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
7722 I915_WRITE(VLV_PCBR, pctx_paddr);
7725 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7726 dev_priv->vlv_pctx = pctx;
7729 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
7731 struct drm_i915_gem_object *pctx;
7733 pctx = fetch_and_zero(&dev_priv->vlv_pctx);
7735 i915_gem_object_put(pctx);
7738 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7740 dev_priv->gt_pm.rps.gpll_ref_freq =
7741 vlv_get_cck_clock(dev_priv, "GPLL ref",
7742 CCK_GPLL_CLOCK_CONTROL,
7743 dev_priv->czclk_freq);
7745 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
7746 dev_priv->gt_pm.rps.gpll_ref_freq);
7749 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
7751 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7754 valleyview_setup_pctx(dev_priv);
7756 vlv_init_gpll_ref_freq(dev_priv);
7758 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7759 switch ((val >> 6) & 3) {
7762 dev_priv->mem_freq = 800;
7765 dev_priv->mem_freq = 1066;
7768 dev_priv->mem_freq = 1333;
7771 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7773 rps->max_freq = valleyview_rps_max_freq(dev_priv);
7774 rps->rp0_freq = rps->max_freq;
7775 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7776 intel_gpu_freq(dev_priv, rps->max_freq),
7779 rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
7780 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7781 intel_gpu_freq(dev_priv, rps->efficient_freq),
7782 rps->efficient_freq);
7784 rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
7785 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
7786 intel_gpu_freq(dev_priv, rps->rp1_freq),
7789 rps->min_freq = valleyview_rps_min_freq(dev_priv);
7790 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7791 intel_gpu_freq(dev_priv, rps->min_freq),
7795 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
7797 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7800 cherryview_setup_pctx(dev_priv);
7802 vlv_init_gpll_ref_freq(dev_priv);
7804 mutex_lock(&dev_priv->sb_lock);
7805 val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
7806 mutex_unlock(&dev_priv->sb_lock);
7808 switch ((val >> 2) & 0x7) {
7810 dev_priv->mem_freq = 2000;
7813 dev_priv->mem_freq = 1600;
7816 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7818 rps->max_freq = cherryview_rps_max_freq(dev_priv);
7819 rps->rp0_freq = rps->max_freq;
7820 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7821 intel_gpu_freq(dev_priv, rps->max_freq),
7824 rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
7825 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7826 intel_gpu_freq(dev_priv, rps->efficient_freq),
7827 rps->efficient_freq);
7829 rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7830 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
7831 intel_gpu_freq(dev_priv, rps->rp1_freq),
7834 rps->min_freq = cherryview_rps_min_freq(dev_priv);
7835 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7836 intel_gpu_freq(dev_priv, rps->min_freq),
7839 WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7841 "Odd GPU freq values\n");
7844 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7846 valleyview_cleanup_pctx(dev_priv);
7849 static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
7851 struct intel_engine_cs *engine;
7852 enum intel_engine_id id;
7853 u32 gtfifodbg, rc6_mode, pcbr;
7855 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7856 GT_FIFO_FREE_ENTRIES_CHV);
7858 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7860 I915_WRITE(GTFIFODBG, gtfifodbg);
7863 cherryview_check_pctx(dev_priv);
7865 /* 1a & 1b: Get forcewake during program sequence. Although the driver
7866 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7867 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7869 /* Disable RC states. */
7870 I915_WRITE(GEN6_RC_CONTROL, 0);
7872 /* 2a: Program RC6 thresholds.*/
7873 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7874 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7875 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7877 for_each_engine(engine, dev_priv, id)
7878 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7879 I915_WRITE(GEN6_RC_SLEEP, 0);
7881 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7882 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
7884 /* Allows RC6 residency counter to work */
7885 I915_WRITE(VLV_COUNTER_CONTROL,
7886 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7887 VLV_MEDIA_RC6_COUNT_EN |
7888 VLV_RENDER_RC6_COUNT_EN));
7890 /* For now we assume BIOS is allocating and populating the PCBR */
7891 pcbr = I915_READ(VLV_PCBR);
7895 if (pcbr >> VLV_PCBR_ADDR_SHIFT)
7896 rc6_mode = GEN7_RC_CTL_TO_MODE;
7897 I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7899 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7902 static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7906 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7908 /* 1: Program defaults and thresholds for RPS*/
7909 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7910 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7911 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7912 I915_WRITE(GEN6_RP_UP_EI, 66000);
7913 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7915 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7918 I915_WRITE(GEN6_RP_CONTROL,
7919 GEN6_RP_MEDIA_HW_NORMAL_MODE |
7920 GEN6_RP_MEDIA_IS_GFX |
7922 GEN6_RP_UP_BUSY_AVG |
7923 GEN6_RP_DOWN_IDLE_AVG);
7925 /* Setting Fixed Bias */
7926 val = VLV_OVERRIDE_EN |
7928 CHV_BIAS_CPU_50_SOC_50;
7929 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7931 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7933 /* RPS code assumes GPLL is used */
7934 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7936 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7937 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7939 reset_rps(dev_priv, valleyview_set_rps);
7941 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7944 static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
7946 struct intel_engine_cs *engine;
7947 enum intel_engine_id id;
7950 valleyview_check_pctx(dev_priv);
7952 gtfifodbg = I915_READ(GTFIFODBG);
7954 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7956 I915_WRITE(GTFIFODBG, gtfifodbg);
7959 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7961 /* Disable RC states. */
7962 I915_WRITE(GEN6_RC_CONTROL, 0);
7964 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
7965 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7966 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7968 for_each_engine(engine, dev_priv, id)
7969 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7971 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
7973 /* Allows RC6 residency counter to work */
7974 I915_WRITE(VLV_COUNTER_CONTROL,
7975 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7976 VLV_MEDIA_RC0_COUNT_EN |
7977 VLV_RENDER_RC0_COUNT_EN |
7978 VLV_MEDIA_RC6_COUNT_EN |
7979 VLV_RENDER_RC6_COUNT_EN));
7981 I915_WRITE(GEN6_RC_CONTROL,
7982 GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
7984 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7987 static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
7991 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7993 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7994 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7995 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7996 I915_WRITE(GEN6_RP_UP_EI, 66000);
7997 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7999 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
8001 I915_WRITE(GEN6_RP_CONTROL,
8002 GEN6_RP_MEDIA_TURBO |
8003 GEN6_RP_MEDIA_HW_NORMAL_MODE |
8004 GEN6_RP_MEDIA_IS_GFX |
8006 GEN6_RP_UP_BUSY_AVG |
8007 GEN6_RP_DOWN_IDLE_CONT);
8009 /* Setting Fixed Bias */
8010 val = VLV_OVERRIDE_EN |
8012 VLV_BIAS_CPU_125_SOC_875;
8013 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
8015 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
8017 /* RPS code assumes GPLL is used */
8018 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
8020 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
8021 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
8023 reset_rps(dev_priv, valleyview_set_rps);
8025 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
8028 static unsigned long intel_pxfreq(u32 vidfreq)
8031 int div = (vidfreq & 0x3f0000) >> 16;
8032 int post = (vidfreq & 0x3000) >> 12;
8033 int pre = (vidfreq & 0x7);
8038 freq = ((div * 133333) / ((1<<post) * pre));
8043 static const struct cparams {
8049 { 1, 1333, 301, 28664 },
8050 { 1, 1066, 294, 24460 },
8051 { 1, 800, 294, 25192 },
8052 { 0, 1333, 276, 27605 },
8053 { 0, 1066, 276, 27605 },
8054 { 0, 800, 231, 23784 },
8057 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
8059 u64 total_count, diff, ret;
8060 u32 count1, count2, count3, m = 0, c = 0;
8061 unsigned long now = jiffies_to_msecs(jiffies), diff1;
8064 lockdep_assert_held(&mchdev_lock);
8066 diff1 = now - dev_priv->ips.last_time1;
8068 /* Prevent division-by-zero if we are asking too fast.
8069 * Also, we don't get interesting results if we are polling
8070 * faster than once in 10ms, so just return the saved value
8074 return dev_priv->ips.chipset_power;
8076 count1 = I915_READ(DMIEC);
8077 count2 = I915_READ(DDREC);
8078 count3 = I915_READ(CSIEC);
8080 total_count = count1 + count2 + count3;
8082 /* FIXME: handle per-counter overflow */
8083 if (total_count < dev_priv->ips.last_count1) {
8084 diff = ~0UL - dev_priv->ips.last_count1;
8085 diff += total_count;
8087 diff = total_count - dev_priv->ips.last_count1;
8090 for (i = 0; i < ARRAY_SIZE(cparams); i++) {
8091 if (cparams[i].i == dev_priv->ips.c_m &&
8092 cparams[i].t == dev_priv->ips.r_t) {
8099 diff = div_u64(diff, diff1);
8100 ret = ((m * diff) + c);
8101 ret = div_u64(ret, 10);
8103 dev_priv->ips.last_count1 = total_count;
8104 dev_priv->ips.last_time1 = now;
8106 dev_priv->ips.chipset_power = ret;
8111 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
8113 intel_wakeref_t wakeref;
8114 unsigned long val = 0;
8116 if (!IS_GEN(dev_priv, 5))
8119 with_intel_runtime_pm(dev_priv, wakeref) {
8120 spin_lock_irq(&mchdev_lock);
8121 val = __i915_chipset_val(dev_priv);
8122 spin_unlock_irq(&mchdev_lock);
8128 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
8130 unsigned long m, x, b;
8133 tsfs = I915_READ(TSFS);
8135 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
8136 x = I915_READ8(TR1);
8138 b = tsfs & TSFS_INTR_MASK;
8140 return ((m * x) / 127) - b;
8143 static int _pxvid_to_vd(u8 pxvid)
8148 if (pxvid >= 8 && pxvid < 31)
8151 return (pxvid + 2) * 125;
8154 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
8156 const int vd = _pxvid_to_vd(pxvid);
8157 const int vm = vd - 1125;
8159 if (INTEL_INFO(dev_priv)->is_mobile)
8160 return vm > 0 ? vm : 0;
8165 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
8167 u64 now, diff, diffms;
8170 lockdep_assert_held(&mchdev_lock);
8172 now = ktime_get_raw_ns();
8173 diffms = now - dev_priv->ips.last_time2;
8174 do_div(diffms, NSEC_PER_MSEC);
8176 /* Don't divide by 0 */
8180 count = I915_READ(GFXEC);
8182 if (count < dev_priv->ips.last_count2) {
8183 diff = ~0UL - dev_priv->ips.last_count2;
8186 diff = count - dev_priv->ips.last_count2;
8189 dev_priv->ips.last_count2 = count;
8190 dev_priv->ips.last_time2 = now;
8192 /* More magic constants... */
8194 diff = div_u64(diff, diffms * 10);
8195 dev_priv->ips.gfx_power = diff;
8198 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
8200 intel_wakeref_t wakeref;
8202 if (!IS_GEN(dev_priv, 5))
8205 with_intel_runtime_pm(dev_priv, wakeref) {
8206 spin_lock_irq(&mchdev_lock);
8207 __i915_update_gfx_val(dev_priv);
8208 spin_unlock_irq(&mchdev_lock);
8212 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
8214 unsigned long t, corr, state1, corr2, state2;
8217 lockdep_assert_held(&mchdev_lock);
8219 pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
8220 pxvid = (pxvid >> 24) & 0x7f;
8221 ext_v = pvid_to_extvid(dev_priv, pxvid);
8225 t = i915_mch_val(dev_priv);
8227 /* Revel in the empirically derived constants */
8229 /* Correction factor in 1/100000 units */
8231 corr = ((t * 2349) + 135940);
8233 corr = ((t * 964) + 29317);
8235 corr = ((t * 301) + 1004);
8237 corr = corr * ((150142 * state1) / 10000 - 78642);
8239 corr2 = (corr * dev_priv->ips.corr);
8241 state2 = (corr2 * state1) / 10000;
8242 state2 /= 100; /* convert to mW */
8244 __i915_update_gfx_val(dev_priv);
8246 return dev_priv->ips.gfx_power + state2;
8249 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
8251 intel_wakeref_t wakeref;
8252 unsigned long val = 0;
8254 if (!IS_GEN(dev_priv, 5))
8257 with_intel_runtime_pm(dev_priv, wakeref) {
8258 spin_lock_irq(&mchdev_lock);
8259 val = __i915_gfx_val(dev_priv);
8260 spin_unlock_irq(&mchdev_lock);
8266 static struct drm_i915_private __rcu *i915_mch_dev;
8268 static struct drm_i915_private *mchdev_get(void)
8270 struct drm_i915_private *i915;
8273 i915 = rcu_dereference(i915_mch_dev);
8274 if (!kref_get_unless_zero(&i915->drm.ref))
8282 * i915_read_mch_val - return value for IPS use
8284 * Calculate and return a value for the IPS driver to use when deciding whether
8285 * we have thermal and power headroom to increase CPU or GPU power budget.
8287 unsigned long i915_read_mch_val(void)
8289 struct drm_i915_private *i915;
8290 unsigned long chipset_val = 0;
8291 unsigned long graphics_val = 0;
8292 intel_wakeref_t wakeref;
8294 i915 = mchdev_get();
8298 with_intel_runtime_pm(i915, wakeref) {
8299 spin_lock_irq(&mchdev_lock);
8300 chipset_val = __i915_chipset_val(i915);
8301 graphics_val = __i915_gfx_val(i915);
8302 spin_unlock_irq(&mchdev_lock);
8305 drm_dev_put(&i915->drm);
8306 return chipset_val + graphics_val;
8308 EXPORT_SYMBOL_GPL(i915_read_mch_val);
8311 * i915_gpu_raise - raise GPU frequency limit
8313 * Raise the limit; IPS indicates we have thermal headroom.
8315 bool i915_gpu_raise(void)
8317 struct drm_i915_private *i915;
8319 i915 = mchdev_get();
8323 spin_lock_irq(&mchdev_lock);
8324 if (i915->ips.max_delay > i915->ips.fmax)
8325 i915->ips.max_delay--;
8326 spin_unlock_irq(&mchdev_lock);
8328 drm_dev_put(&i915->drm);
8331 EXPORT_SYMBOL_GPL(i915_gpu_raise);
8334 * i915_gpu_lower - lower GPU frequency limit
8336 * IPS indicates we're close to a thermal limit, so throttle back the GPU
8337 * frequency maximum.
8339 bool i915_gpu_lower(void)
8341 struct drm_i915_private *i915;
8343 i915 = mchdev_get();
8347 spin_lock_irq(&mchdev_lock);
8348 if (i915->ips.max_delay < i915->ips.min_delay)
8349 i915->ips.max_delay++;
8350 spin_unlock_irq(&mchdev_lock);
8352 drm_dev_put(&i915->drm);
8355 EXPORT_SYMBOL_GPL(i915_gpu_lower);
8358 * i915_gpu_busy - indicate GPU business to IPS
8360 * Tell the IPS driver whether or not the GPU is busy.
8362 bool i915_gpu_busy(void)
8364 struct drm_i915_private *i915;
8367 i915 = mchdev_get();
8371 ret = i915->gt.awake;
8373 drm_dev_put(&i915->drm);
8376 EXPORT_SYMBOL_GPL(i915_gpu_busy);
8379 * i915_gpu_turbo_disable - disable graphics turbo
8381 * Disable graphics turbo by resetting the max frequency and setting the
8382 * current frequency to the default.
8384 bool i915_gpu_turbo_disable(void)
8386 struct drm_i915_private *i915;
8389 i915 = mchdev_get();
8393 spin_lock_irq(&mchdev_lock);
8394 i915->ips.max_delay = i915->ips.fstart;
8395 ret = ironlake_set_drps(i915, i915->ips.fstart);
8396 spin_unlock_irq(&mchdev_lock);
8398 drm_dev_put(&i915->drm);
8401 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
8404 * Tells the intel_ips driver that the i915 driver is now loaded, if
8405 * IPS got loaded first.
8407 * This awkward dance is so that neither module has to depend on the
8408 * other in order for IPS to do the appropriate communication of
8409 * GPU turbo limits to i915.
8412 ips_ping_for_i915_load(void)
8416 link = symbol_get(ips_link_to_i915_driver);
8419 symbol_put(ips_link_to_i915_driver);
8423 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
8425 /* We only register the i915 ips part with intel-ips once everything is
8426 * set up, to avoid intel-ips sneaking in and reading bogus values. */
8427 rcu_assign_pointer(i915_mch_dev, dev_priv);
8429 ips_ping_for_i915_load();
8432 void intel_gpu_ips_teardown(void)
8434 rcu_assign_pointer(i915_mch_dev, NULL);
8437 static void intel_init_emon(struct drm_i915_private *dev_priv)
8443 /* Disable to program */
8447 /* Program energy weights for various events */
8448 I915_WRITE(SDEW, 0x15040d00);
8449 I915_WRITE(CSIEW0, 0x007f0000);
8450 I915_WRITE(CSIEW1, 0x1e220004);
8451 I915_WRITE(CSIEW2, 0x04000004);
8453 for (i = 0; i < 5; i++)
8454 I915_WRITE(PEW(i), 0);
8455 for (i = 0; i < 3; i++)
8456 I915_WRITE(DEW(i), 0);
8458 /* Program P-state weights to account for frequency power adjustment */
8459 for (i = 0; i < 16; i++) {
8460 u32 pxvidfreq = I915_READ(PXVFREQ(i));
8461 unsigned long freq = intel_pxfreq(pxvidfreq);
8462 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
8467 val *= (freq / 1000);
8469 val /= (127*127*900);
8471 DRM_ERROR("bad pxval: %ld\n", val);
8474 /* Render standby states get 0 weight */
8478 for (i = 0; i < 4; i++) {
8479 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
8480 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
8481 I915_WRITE(PXW(i), val);
8484 /* Adjust magic regs to magic values (more experimental results) */
8485 I915_WRITE(OGW0, 0);
8486 I915_WRITE(OGW1, 0);
8487 I915_WRITE(EG0, 0x00007f00);
8488 I915_WRITE(EG1, 0x0000000e);
8489 I915_WRITE(EG2, 0x000e0000);
8490 I915_WRITE(EG3, 0x68000300);
8491 I915_WRITE(EG4, 0x42000000);
8492 I915_WRITE(EG5, 0x00140031);
8496 for (i = 0; i < 8; i++)
8497 I915_WRITE(PXWL(i), 0);
8499 /* Enable PMON + select events */
8500 I915_WRITE(ECR, 0x80000019);
8502 lcfuse = I915_READ(LCFUSE02);
8504 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
8507 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
8509 struct intel_rps *rps = &dev_priv->gt_pm.rps;
8512 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8515 if (!sanitize_rc6(dev_priv)) {
8516 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
8517 pm_runtime_get(&dev_priv->drm.pdev->dev);
8520 mutex_lock(&dev_priv->pcu_lock);
8522 /* Initialize RPS limits (for userspace) */
8523 if (IS_CHERRYVIEW(dev_priv))
8524 cherryview_init_gt_powersave(dev_priv);
8525 else if (IS_VALLEYVIEW(dev_priv))
8526 valleyview_init_gt_powersave(dev_priv);
8527 else if (INTEL_GEN(dev_priv) >= 6)
8528 gen6_init_rps_frequencies(dev_priv);
8530 /* Derive initial user preferences/limits from the hardware limits */
8531 rps->idle_freq = rps->min_freq;
8532 rps->cur_freq = rps->idle_freq;
8534 rps->max_freq_softlimit = rps->max_freq;
8535 rps->min_freq_softlimit = rps->min_freq;
8537 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
8538 rps->min_freq_softlimit =
8540 rps->efficient_freq,
8541 intel_freq_opcode(dev_priv, 450));
8543 /* After setting max-softlimit, find the overclock max freq */
8544 if (IS_GEN(dev_priv, 6) ||
8545 IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
8548 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms);
8549 if (params & BIT(31)) { /* OC supported */
8550 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
8551 (rps->max_freq & 0xff) * 50,
8552 (params & 0xff) * 50);
8553 rps->max_freq = params & 0xff;
8557 /* Finally allow us to boost to max by default */
8558 rps->boost_freq = rps->max_freq;
8560 mutex_unlock(&dev_priv->pcu_lock);
8563 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
8565 if (IS_VALLEYVIEW(dev_priv))
8566 valleyview_cleanup_gt_powersave(dev_priv);
8568 if (!HAS_RC6(dev_priv))
8569 pm_runtime_put(&dev_priv->drm.pdev->dev);
8572 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8574 dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8575 dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
8576 intel_disable_gt_powersave(dev_priv);
8578 if (INTEL_GEN(dev_priv) >= 11)
8579 gen11_reset_rps_interrupts(dev_priv);
8580 else if (INTEL_GEN(dev_priv) >= 6)
8581 gen6_reset_rps_interrupts(dev_priv);
8584 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8586 lockdep_assert_held(&i915->pcu_lock);
8588 if (!i915->gt_pm.llc_pstate.enabled)
8591 /* Currently there is no HW configuration to be done to disable. */
8593 i915->gt_pm.llc_pstate.enabled = false;
8596 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8598 lockdep_assert_held(&dev_priv->pcu_lock);
8600 if (!dev_priv->gt_pm.rc6.enabled)
8603 if (INTEL_GEN(dev_priv) >= 9)
8604 gen9_disable_rc6(dev_priv);
8605 else if (IS_CHERRYVIEW(dev_priv))
8606 cherryview_disable_rc6(dev_priv);
8607 else if (IS_VALLEYVIEW(dev_priv))
8608 valleyview_disable_rc6(dev_priv);
8609 else if (INTEL_GEN(dev_priv) >= 6)
8610 gen6_disable_rc6(dev_priv);
8612 dev_priv->gt_pm.rc6.enabled = false;
8615 static void intel_disable_rps(struct drm_i915_private *dev_priv)
8617 lockdep_assert_held(&dev_priv->pcu_lock);
8619 if (!dev_priv->gt_pm.rps.enabled)
8622 if (INTEL_GEN(dev_priv) >= 9)
8623 gen9_disable_rps(dev_priv);
8624 else if (IS_CHERRYVIEW(dev_priv))
8625 cherryview_disable_rps(dev_priv);
8626 else if (IS_VALLEYVIEW(dev_priv))
8627 valleyview_disable_rps(dev_priv);
8628 else if (INTEL_GEN(dev_priv) >= 6)
8629 gen6_disable_rps(dev_priv);
8630 else if (IS_IRONLAKE_M(dev_priv))
8631 ironlake_disable_drps(dev_priv);
8633 dev_priv->gt_pm.rps.enabled = false;
8636 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8638 mutex_lock(&dev_priv->pcu_lock);
8640 intel_disable_rc6(dev_priv);
8641 intel_disable_rps(dev_priv);
8642 if (HAS_LLC(dev_priv))
8643 intel_disable_llc_pstate(dev_priv);
8645 mutex_unlock(&dev_priv->pcu_lock);
8648 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8650 lockdep_assert_held(&i915->pcu_lock);
8652 if (i915->gt_pm.llc_pstate.enabled)
8655 gen6_update_ring_freq(i915);
8657 i915->gt_pm.llc_pstate.enabled = true;
8660 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
8662 lockdep_assert_held(&dev_priv->pcu_lock);
8664 if (dev_priv->gt_pm.rc6.enabled)
8667 if (IS_CHERRYVIEW(dev_priv))
8668 cherryview_enable_rc6(dev_priv);
8669 else if (IS_VALLEYVIEW(dev_priv))
8670 valleyview_enable_rc6(dev_priv);
8671 else if (INTEL_GEN(dev_priv) >= 11)
8672 gen11_enable_rc6(dev_priv);
8673 else if (INTEL_GEN(dev_priv) >= 9)
8674 gen9_enable_rc6(dev_priv);
8675 else if (IS_BROADWELL(dev_priv))
8676 gen8_enable_rc6(dev_priv);
8677 else if (INTEL_GEN(dev_priv) >= 6)
8678 gen6_enable_rc6(dev_priv);
8680 dev_priv->gt_pm.rc6.enabled = true;
8683 static void intel_enable_rps(struct drm_i915_private *dev_priv)
8685 struct intel_rps *rps = &dev_priv->gt_pm.rps;
8687 lockdep_assert_held(&dev_priv->pcu_lock);
8692 if (IS_CHERRYVIEW(dev_priv)) {
8693 cherryview_enable_rps(dev_priv);
8694 } else if (IS_VALLEYVIEW(dev_priv)) {
8695 valleyview_enable_rps(dev_priv);
8696 } else if (INTEL_GEN(dev_priv) >= 9) {
8697 gen9_enable_rps(dev_priv);
8698 } else if (IS_BROADWELL(dev_priv)) {
8699 gen8_enable_rps(dev_priv);
8700 } else if (INTEL_GEN(dev_priv) >= 6) {
8701 gen6_enable_rps(dev_priv);
8702 } else if (IS_IRONLAKE_M(dev_priv)) {
8703 ironlake_enable_drps(dev_priv);
8704 intel_init_emon(dev_priv);
8707 WARN_ON(rps->max_freq < rps->min_freq);
8708 WARN_ON(rps->idle_freq > rps->max_freq);
8710 WARN_ON(rps->efficient_freq < rps->min_freq);
8711 WARN_ON(rps->efficient_freq > rps->max_freq);
8713 rps->enabled = true;
8716 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8718 /* Powersaving is controlled by the host when inside a VM */
8719 if (intel_vgpu_active(dev_priv))
8722 mutex_lock(&dev_priv->pcu_lock);
8724 if (HAS_RC6(dev_priv))
8725 intel_enable_rc6(dev_priv);
8726 intel_enable_rps(dev_priv);
8727 if (HAS_LLC(dev_priv))
8728 intel_enable_llc_pstate(dev_priv);
8730 mutex_unlock(&dev_priv->pcu_lock);
8733 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
8736 * On Ibex Peak and Cougar Point, we need to disable clock
8737 * gating for the panel power sequencer or it will fail to
8738 * start up when no ports are active.
8740 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8743 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
8747 for_each_pipe(dev_priv, pipe) {
8748 I915_WRITE(DSPCNTR(pipe),
8749 I915_READ(DSPCNTR(pipe)) |
8750 DISPPLANE_TRICKLE_FEED_DISABLE);
8752 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8753 POSTING_READ(DSPSURF(pipe));
8757 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
8759 u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8763 * WaFbcDisableDpfcClockGating:ilk
8765 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8766 ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8767 ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
8769 I915_WRITE(PCH_3DCGDIS0,
8770 MARIUNIT_CLOCK_GATE_DISABLE |
8771 SVSMUNIT_CLOCK_GATE_DISABLE);
8772 I915_WRITE(PCH_3DCGDIS1,
8773 VFMUNIT_CLOCK_GATE_DISABLE);
8776 * According to the spec the following bits should be set in
8777 * order to enable memory self-refresh
8778 * The bit 22/21 of 0x42004
8779 * The bit 5 of 0x42020
8780 * The bit 15 of 0x45000
8782 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8783 (I915_READ(ILK_DISPLAY_CHICKEN2) |
8784 ILK_DPARB_GATE | ILK_VSDPFD_FULL));
8785 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
8786 I915_WRITE(DISP_ARB_CTL,
8787 (I915_READ(DISP_ARB_CTL) |
8791 * Based on the document from hardware guys the following bits
8792 * should be set unconditionally in order to enable FBC.
8793 * The bit 22 of 0x42000
8794 * The bit 22 of 0x42004
8795 * The bit 7,8,9 of 0x42020.
8797 if (IS_IRONLAKE_M(dev_priv)) {
8798 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
8799 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8800 I915_READ(ILK_DISPLAY_CHICKEN1) |
8802 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8803 I915_READ(ILK_DISPLAY_CHICKEN2) |
8807 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8809 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8810 I915_READ(ILK_DISPLAY_CHICKEN2) |
8811 ILK_ELPIN_409_SELECT);
8812 I915_WRITE(_3D_CHICKEN2,
8813 _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8814 _3D_CHICKEN2_WM_READ_PIPELINED);
8816 /* WaDisableRenderCachePipelinedFlush:ilk */
8817 I915_WRITE(CACHE_MODE_0,
8818 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
8820 /* WaDisable_RenderCache_OperationalFlush:ilk */
8821 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8823 g4x_disable_trickle_feed(dev_priv);
8825 ibx_init_clock_gating(dev_priv);
8828 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
8834 * On Ibex Peak and Cougar Point, we need to disable clock
8835 * gating for the panel power sequencer or it will fail to
8836 * start up when no ports are active.
8838 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8839 PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8840 PCH_CPUNIT_CLOCK_GATE_DISABLE);
8841 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8842 DPLS_EDP_PPS_FIX_DIS);
8843 /* The below fixes the weird display corruption, a few pixels shifted
8844 * downward, on (only) LVDS of some HP laptops with IVY.
8846 for_each_pipe(dev_priv, pipe) {
8847 val = I915_READ(TRANS_CHICKEN2(pipe));
8848 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8849 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8850 if (dev_priv->vbt.fdi_rx_polarity_inverted)
8851 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8852 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8853 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8854 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
8855 I915_WRITE(TRANS_CHICKEN2(pipe), val);
8857 /* WADP0ClockGatingDisable */
8858 for_each_pipe(dev_priv, pipe) {
8859 I915_WRITE(TRANS_CHICKEN1(pipe),
8860 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8864 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
8868 tmp = I915_READ(MCH_SSKPD);
8869 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8870 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8874 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
8876 u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8878 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8880 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8881 I915_READ(ILK_DISPLAY_CHICKEN2) |
8882 ILK_ELPIN_409_SELECT);
8884 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
8885 I915_WRITE(_3D_CHICKEN,
8886 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8888 /* WaDisable_RenderCache_OperationalFlush:snb */
8889 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8892 * BSpec recoomends 8x4 when MSAA is used,
8893 * however in practice 16x4 seems fastest.
8895 * Note that PS/WM thread counts depend on the WIZ hashing
8896 * disable bit, which we don't touch here, but it's good
8897 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8899 I915_WRITE(GEN6_GT_MODE,
8900 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8902 I915_WRITE(CACHE_MODE_0,
8903 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
8905 I915_WRITE(GEN6_UCGCTL1,
8906 I915_READ(GEN6_UCGCTL1) |
8907 GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8908 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8910 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8911 * gating disable must be set. Failure to set it results in
8912 * flickering pixels due to Z write ordering failures after
8913 * some amount of runtime in the Mesa "fire" demo, and Unigine
8914 * Sanctuary and Tropics, and apparently anything else with
8915 * alpha test or pixel discard.
8917 * According to the spec, bit 11 (RCCUNIT) must also be set,
8918 * but we didn't debug actual testcases to find it out.
8920 * WaDisableRCCUnitClockGating:snb
8921 * WaDisableRCPBUnitClockGating:snb
8923 I915_WRITE(GEN6_UCGCTL2,
8924 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8925 GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8927 /* WaStripsFansDisableFastClipPerformanceFix:snb */
8928 I915_WRITE(_3D_CHICKEN3,
8929 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
8933 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8934 * 3DSTATE_SF number of SF output attributes is more than 16."
8936 I915_WRITE(_3D_CHICKEN3,
8937 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8940 * According to the spec the following bits should be
8941 * set in order to enable memory self-refresh and fbc:
8942 * The bit21 and bit22 of 0x42000
8943 * The bit21 and bit22 of 0x42004
8944 * The bit5 and bit7 of 0x42020
8945 * The bit14 of 0x70180
8946 * The bit14 of 0x71180
8948 * WaFbcAsynchFlipDisableFbcQueue:snb
8950 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8951 I915_READ(ILK_DISPLAY_CHICKEN1) |
8952 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8953 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8954 I915_READ(ILK_DISPLAY_CHICKEN2) |
8955 ILK_DPARB_GATE | ILK_VSDPFD_FULL);
8956 I915_WRITE(ILK_DSPCLK_GATE_D,
8957 I915_READ(ILK_DSPCLK_GATE_D) |
8958 ILK_DPARBUNIT_CLOCK_GATE_ENABLE |
8959 ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
8961 g4x_disable_trickle_feed(dev_priv);
8963 cpt_init_clock_gating(dev_priv);
8965 gen6_check_mch_setup(dev_priv);
8968 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
8970 u32 reg = I915_READ(GEN7_FF_THREAD_MODE);
8973 * WaVSThreadDispatchOverride:ivb,vlv
8975 * This actually overrides the dispatch
8976 * mode for all thread types.
8978 reg &= ~GEN7_FF_SCHED_MASK;
8979 reg |= GEN7_FF_TS_SCHED_HW;
8980 reg |= GEN7_FF_VS_SCHED_HW;
8981 reg |= GEN7_FF_DS_SCHED_HW;
8983 I915_WRITE(GEN7_FF_THREAD_MODE, reg);
8986 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
8989 * TODO: this bit should only be enabled when really needed, then
8990 * disabled when not needed anymore in order to save power.
8992 if (HAS_PCH_LPT_LP(dev_priv))
8993 I915_WRITE(SOUTH_DSPCLK_GATE_D,
8994 I915_READ(SOUTH_DSPCLK_GATE_D) |
8995 PCH_LP_PARTITION_LEVEL_DISABLE);
8997 /* WADPOClockGatingDisable:hsw */
8998 I915_WRITE(TRANS_CHICKEN1(PIPE_A),
8999 I915_READ(TRANS_CHICKEN1(PIPE_A)) |
9000 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
9003 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
9005 if (HAS_PCH_LPT_LP(dev_priv)) {
9006 u32 val = I915_READ(SOUTH_DSPCLK_GATE_D);
9008 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
9009 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
9013 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
9014 int general_prio_credits,
9015 int high_prio_credits)
9020 /* WaTempDisableDOPClkGating:bdw */
9021 misccpctl = I915_READ(GEN7_MISCCPCTL);
9022 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
9024 val = I915_READ(GEN8_L3SQCREG1);
9025 val &= ~L3_PRIO_CREDITS_MASK;
9026 val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
9027 val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
9028 I915_WRITE(GEN8_L3SQCREG1, val);
9031 * Wait at least 100 clocks before re-enabling clock gating.
9032 * See the definition of L3SQCREG1 in BSpec.
9034 POSTING_READ(GEN8_L3SQCREG1);
9036 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
9039 static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
9041 /* This is not an Wa. Enable to reduce Sampler power */
9042 I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
9043 I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
9045 /* WaEnable32PlaneMode:icl */
9046 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
9047 _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE));
9050 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
9052 if (!HAS_PCH_CNP(dev_priv))
9055 /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
9056 I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
9057 CNP_PWM_CGE_GATING_DISABLE);
9060 static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
9063 cnp_init_clock_gating(dev_priv);
9065 /* This is not an Wa. Enable for better image quality */
9066 I915_WRITE(_3D_CHICKEN3,
9067 _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
9069 /* WaEnableChickenDCPR:cnl */
9070 I915_WRITE(GEN8_CHICKEN_DCPR_1,
9071 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
9073 /* WaFbcWakeMemOn:cnl */
9074 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
9075 DISP_FBC_MEMORY_WAKE);
9077 val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
9078 /* ReadHitWriteOnlyDisable:cnl */
9079 val |= RCCUNIT_CLKGATE_DIS;
9080 /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
9081 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
9082 val |= SARBUNIT_CLKGATE_DIS;
9083 I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
9085 /* Wa_2201832410:cnl */
9086 val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
9087 val |= GWUNIT_CLKGATE_DIS;
9088 I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
9090 /* WaDisableVFclkgate:cnl */
9091 /* WaVFUnitClockGatingDisable:cnl */
9092 val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
9093 val |= VFUNIT_CLKGATE_DIS;
9094 I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
9097 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
9099 cnp_init_clock_gating(dev_priv);
9100 gen9_init_clock_gating(dev_priv);
9102 /* WaFbcNukeOnHostModify:cfl */
9103 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9104 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9107 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
9109 gen9_init_clock_gating(dev_priv);
9111 /* WaDisableSDEUnitClockGating:kbl */
9112 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
9113 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9114 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9116 /* WaDisableGamClockGating:kbl */
9117 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
9118 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9119 GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
9121 /* WaFbcNukeOnHostModify:kbl */
9122 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9123 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9126 static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
9128 gen9_init_clock_gating(dev_priv);
9130 /* WAC6entrylatency:skl */
9131 I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
9132 FBC_LLC_FULLY_OPEN);
9134 /* WaFbcNukeOnHostModify:skl */
9135 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9136 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9139 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
9141 /* The GTT cache must be disabled if the system is using 2M pages. */
9142 bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
9143 I915_GTT_PAGE_SIZE_2M);
9146 /* WaSwitchSolVfFArbitrationPriority:bdw */
9147 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
9149 /* WaPsrDPAMaskVBlankInSRD:bdw */
9150 I915_WRITE(CHICKEN_PAR1_1,
9151 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
9153 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
9154 for_each_pipe(dev_priv, pipe) {
9155 I915_WRITE(CHICKEN_PIPESL_1(pipe),
9156 I915_READ(CHICKEN_PIPESL_1(pipe)) |
9157 BDW_DPRS_MASK_VBLANK_SRD);
9160 /* WaVSRefCountFullforceMissDisable:bdw */
9161 /* WaDSRefCountFullforceMissDisable:bdw */
9162 I915_WRITE(GEN7_FF_THREAD_MODE,
9163 I915_READ(GEN7_FF_THREAD_MODE) &
9164 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9166 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9167 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9169 /* WaDisableSDEUnitClockGating:bdw */
9170 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9171 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9173 /* WaProgramL3SqcReg1Default:bdw */
9174 gen8_set_l3sqc_credits(dev_priv, 30, 2);
9176 /* WaGttCachingOffByDefault:bdw */
9177 I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
9179 /* WaKVMNotificationOnConfigChange:bdw */
9180 I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
9181 | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
9183 lpt_init_clock_gating(dev_priv);
9185 /* WaDisableDopClockGating:bdw
9187 * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
9190 I915_WRITE(GEN6_UCGCTL1,
9191 I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
9194 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
9196 /* L3 caching of data atomics doesn't work -- disable it. */
9197 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
9198 I915_WRITE(HSW_ROW_CHICKEN3,
9199 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
9201 /* This is required by WaCatErrorRejectionIssue:hsw */
9202 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9203 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9204 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9206 /* WaVSRefCountFullforceMissDisable:hsw */
9207 I915_WRITE(GEN7_FF_THREAD_MODE,
9208 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
9210 /* WaDisable_RenderCache_OperationalFlush:hsw */
9211 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9213 /* enable HiZ Raw Stall Optimization */
9214 I915_WRITE(CACHE_MODE_0_GEN7,
9215 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9217 /* WaDisable4x2SubspanOptimization:hsw */
9218 I915_WRITE(CACHE_MODE_1,
9219 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9222 * BSpec recommends 8x4 when MSAA is used,
9223 * however in practice 16x4 seems fastest.
9225 * Note that PS/WM thread counts depend on the WIZ hashing
9226 * disable bit, which we don't touch here, but it's good
9227 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9229 I915_WRITE(GEN7_GT_MODE,
9230 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9232 /* WaSampleCChickenBitEnable:hsw */
9233 I915_WRITE(HALF_SLICE_CHICKEN3,
9234 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
9236 /* WaSwitchSolVfFArbitrationPriority:hsw */
9237 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
9239 lpt_init_clock_gating(dev_priv);
9242 static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
9246 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
9248 /* WaDisableEarlyCull:ivb */
9249 I915_WRITE(_3D_CHICKEN3,
9250 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9252 /* WaDisableBackToBackFlipFix:ivb */
9253 I915_WRITE(IVB_CHICKEN3,
9254 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9255 CHICKEN3_DGMG_DONE_FIX_DISABLE);
9257 /* WaDisablePSDDualDispatchEnable:ivb */
9258 if (IS_IVB_GT1(dev_priv))
9259 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9260 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9262 /* WaDisable_RenderCache_OperationalFlush:ivb */
9263 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9265 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
9266 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
9267 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
9269 /* WaApplyL3ControlAndL3ChickenMode:ivb */
9270 I915_WRITE(GEN7_L3CNTLREG1,
9271 GEN7_WA_FOR_GEN7_L3_CONTROL);
9272 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
9273 GEN7_WA_L3_CHICKEN_MODE);
9274 if (IS_IVB_GT1(dev_priv))
9275 I915_WRITE(GEN7_ROW_CHICKEN2,
9276 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9278 /* must write both registers */
9279 I915_WRITE(GEN7_ROW_CHICKEN2,
9280 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9281 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
9282 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9285 /* WaForceL3Serialization:ivb */
9286 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9287 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9290 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9291 * This implements the WaDisableRCZUnitClockGating:ivb workaround.
9293 I915_WRITE(GEN6_UCGCTL2,
9294 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9296 /* This is required by WaCatErrorRejectionIssue:ivb */
9297 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9298 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9299 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9301 g4x_disable_trickle_feed(dev_priv);
9303 gen7_setup_fixed_func_scheduler(dev_priv);
9305 if (0) { /* causes HiZ corruption on ivb:gt1 */
9306 /* enable HiZ Raw Stall Optimization */
9307 I915_WRITE(CACHE_MODE_0_GEN7,
9308 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9311 /* WaDisable4x2SubspanOptimization:ivb */
9312 I915_WRITE(CACHE_MODE_1,
9313 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9316 * BSpec recommends 8x4 when MSAA is used,
9317 * however in practice 16x4 seems fastest.
9319 * Note that PS/WM thread counts depend on the WIZ hashing
9320 * disable bit, which we don't touch here, but it's good
9321 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9323 I915_WRITE(GEN7_GT_MODE,
9324 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9326 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
9327 snpcr &= ~GEN6_MBC_SNPCR_MASK;
9328 snpcr |= GEN6_MBC_SNPCR_MED;
9329 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
9331 if (!HAS_PCH_NOP(dev_priv))
9332 cpt_init_clock_gating(dev_priv);
9334 gen6_check_mch_setup(dev_priv);
9337 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
9339 /* WaDisableEarlyCull:vlv */
9340 I915_WRITE(_3D_CHICKEN3,
9341 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9343 /* WaDisableBackToBackFlipFix:vlv */
9344 I915_WRITE(IVB_CHICKEN3,
9345 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9346 CHICKEN3_DGMG_DONE_FIX_DISABLE);
9348 /* WaPsdDispatchEnable:vlv */
9349 /* WaDisablePSDDualDispatchEnable:vlv */
9350 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9351 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
9352 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9354 /* WaDisable_RenderCache_OperationalFlush:vlv */
9355 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9357 /* WaForceL3Serialization:vlv */
9358 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9359 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9361 /* WaDisableDopClockGating:vlv */
9362 I915_WRITE(GEN7_ROW_CHICKEN2,
9363 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9365 /* This is required by WaCatErrorRejectionIssue:vlv */
9366 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9367 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9368 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9370 gen7_setup_fixed_func_scheduler(dev_priv);
9373 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9374 * This implements the WaDisableRCZUnitClockGating:vlv workaround.
9376 I915_WRITE(GEN6_UCGCTL2,
9377 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9379 /* WaDisableL3Bank2xClockGate:vlv
9380 * Disabling L3 clock gating- MMIO 940c[25] = 1
9381 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
9382 I915_WRITE(GEN7_UCGCTL4,
9383 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
9386 * BSpec says this must be set, even though
9387 * WaDisable4x2SubspanOptimization isn't listed for VLV.
9389 I915_WRITE(CACHE_MODE_1,
9390 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9393 * BSpec recommends 8x4 when MSAA is used,
9394 * however in practice 16x4 seems fastest.
9396 * Note that PS/WM thread counts depend on the WIZ hashing
9397 * disable bit, which we don't touch here, but it's good
9398 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9400 I915_WRITE(GEN7_GT_MODE,
9401 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9404 * WaIncreaseL3CreditsForVLVB0:vlv
9405 * This is the hardware default actually.
9407 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
9410 * WaDisableVLVClockGating_VBIIssue:vlv
9411 * Disable clock gating on th GCFG unit to prevent a delay
9412 * in the reporting of vblank events.
9414 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
9417 static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
9419 /* WaVSRefCountFullforceMissDisable:chv */
9420 /* WaDSRefCountFullforceMissDisable:chv */
9421 I915_WRITE(GEN7_FF_THREAD_MODE,
9422 I915_READ(GEN7_FF_THREAD_MODE) &
9423 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9425 /* WaDisableSemaphoreAndSyncFlipWait:chv */
9426 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9427 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9429 /* WaDisableCSUnitClockGating:chv */
9430 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9431 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
9433 /* WaDisableSDEUnitClockGating:chv */
9434 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9435 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9438 * WaProgramL3SqcReg1Default:chv
9439 * See gfxspecs/Related Documents/Performance Guide/
9440 * LSQC Setting Recommendations.
9442 gen8_set_l3sqc_credits(dev_priv, 38, 2);
9445 * GTT cache may not work with big pages, so if those
9446 * are ever enabled GTT cache may need to be disabled.
9448 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
9451 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
9455 I915_WRITE(RENCLK_GATE_D1, 0);
9456 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
9457 GS_UNIT_CLOCK_GATE_DISABLE |
9458 CL_UNIT_CLOCK_GATE_DISABLE);
9459 I915_WRITE(RAMCLK_GATE_D, 0);
9460 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
9461 OVRUNIT_CLOCK_GATE_DISABLE |
9462 OVCUNIT_CLOCK_GATE_DISABLE;
9463 if (IS_GM45(dev_priv))
9464 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
9465 I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
9467 /* WaDisableRenderCachePipelinedFlush */
9468 I915_WRITE(CACHE_MODE_0,
9469 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
9471 /* WaDisable_RenderCache_OperationalFlush:g4x */
9472 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9474 g4x_disable_trickle_feed(dev_priv);
9477 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
9479 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
9480 I915_WRITE(RENCLK_GATE_D2, 0);
9481 I915_WRITE(DSPCLK_GATE_D, 0);
9482 I915_WRITE(RAMCLK_GATE_D, 0);
9483 I915_WRITE16(DEUC, 0);
9484 I915_WRITE(MI_ARB_STATE,
9485 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9487 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9488 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9491 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
9493 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
9494 I965_RCC_CLOCK_GATE_DISABLE |
9495 I965_RCPB_CLOCK_GATE_DISABLE |
9496 I965_ISC_CLOCK_GATE_DISABLE |
9497 I965_FBC_CLOCK_GATE_DISABLE);
9498 I915_WRITE(RENCLK_GATE_D2, 0);
9499 I915_WRITE(MI_ARB_STATE,
9500 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9502 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9503 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9506 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
9508 u32 dstate = I915_READ(D_STATE);
9510 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
9511 DSTATE_DOT_CLOCK_GATING;
9512 I915_WRITE(D_STATE, dstate);
9514 if (IS_PINEVIEW(dev_priv))
9515 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
9517 /* IIR "flip pending" means done if this bit is set */
9518 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
9520 /* interrupts should cause a wake up from C3 */
9521 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
9523 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9524 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
9526 I915_WRITE(MI_ARB_STATE,
9527 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9530 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
9532 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
9534 /* interrupts should cause a wake up from C3 */
9535 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
9536 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
9538 I915_WRITE(MEM_MODE,
9539 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
9542 static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
9544 I915_WRITE(MEM_MODE,
9545 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
9546 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
9549 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
9551 dev_priv->display.init_clock_gating(dev_priv);
9554 void intel_suspend_hw(struct drm_i915_private *dev_priv)
9556 if (HAS_PCH_LPT(dev_priv))
9557 lpt_suspend_hw(dev_priv);
9560 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
9562 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9566 * intel_init_clock_gating_hooks - setup the clock gating hooks
9567 * @dev_priv: device private
9569 * Setup the hooks that configure which clocks of a given platform can be
9570 * gated and also apply various GT and display specific workarounds for these
9571 * platforms. Note that some GT specific workarounds are applied separately
9572 * when GPU contexts or batchbuffers start their execution.
9574 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9576 if (IS_GEN(dev_priv, 11))
9577 dev_priv->display.init_clock_gating = icl_init_clock_gating;
9578 else if (IS_CANNONLAKE(dev_priv))
9579 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
9580 else if (IS_COFFEELAKE(dev_priv))
9581 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
9582 else if (IS_SKYLAKE(dev_priv))
9583 dev_priv->display.init_clock_gating = skl_init_clock_gating;
9584 else if (IS_KABYLAKE(dev_priv))
9585 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9586 else if (IS_BROXTON(dev_priv))
9587 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9588 else if (IS_GEMINILAKE(dev_priv))
9589 dev_priv->display.init_clock_gating = glk_init_clock_gating;
9590 else if (IS_BROADWELL(dev_priv))
9591 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
9592 else if (IS_CHERRYVIEW(dev_priv))
9593 dev_priv->display.init_clock_gating = chv_init_clock_gating;
9594 else if (IS_HASWELL(dev_priv))
9595 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
9596 else if (IS_IVYBRIDGE(dev_priv))
9597 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
9598 else if (IS_VALLEYVIEW(dev_priv))
9599 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
9600 else if (IS_GEN(dev_priv, 6))
9601 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
9602 else if (IS_GEN(dev_priv, 5))
9603 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
9604 else if (IS_G4X(dev_priv))
9605 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
9606 else if (IS_I965GM(dev_priv))
9607 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
9608 else if (IS_I965G(dev_priv))
9609 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
9610 else if (IS_GEN(dev_priv, 3))
9611 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9612 else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9613 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
9614 else if (IS_GEN(dev_priv, 2))
9615 dev_priv->display.init_clock_gating = i830_init_clock_gating;
9617 MISSING_CASE(INTEL_DEVID(dev_priv));
9618 dev_priv->display.init_clock_gating = nop_init_clock_gating;
9622 /* Set up chip specific power management-related functions */
9623 void intel_init_pm(struct drm_i915_private *dev_priv)
9626 if (IS_PINEVIEW(dev_priv))
9627 i915_pineview_get_mem_freq(dev_priv);
9628 else if (IS_GEN(dev_priv, 5))
9629 i915_ironlake_get_mem_freq(dev_priv);
9631 /* For FIFO watermark updates */
9632 if (INTEL_GEN(dev_priv) >= 9) {
9633 skl_setup_wm_latency(dev_priv);
9634 dev_priv->display.initial_watermarks = skl_initial_wm;
9635 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
9636 dev_priv->display.compute_global_watermarks = skl_compute_wm;
9637 } else if (HAS_PCH_SPLIT(dev_priv)) {
9638 ilk_setup_wm_latency(dev_priv);
9640 if ((IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[1] &&
9641 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
9642 (!IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[0] &&
9643 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
9644 dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
9645 dev_priv->display.compute_intermediate_wm =
9646 ilk_compute_intermediate_wm;
9647 dev_priv->display.initial_watermarks =
9648 ilk_initial_watermarks;
9649 dev_priv->display.optimize_watermarks =
9650 ilk_optimize_watermarks;
9652 DRM_DEBUG_KMS("Failed to read display plane latency. "
9655 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9656 vlv_setup_wm_latency(dev_priv);
9657 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
9658 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
9659 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
9660 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
9661 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
9662 } else if (IS_G4X(dev_priv)) {
9663 g4x_setup_wm_latency(dev_priv);
9664 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9665 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9666 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9667 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9668 } else if (IS_PINEVIEW(dev_priv)) {
9669 if (!intel_get_cxsr_latency(!IS_MOBILE(dev_priv),
9672 dev_priv->mem_freq)) {
9673 DRM_INFO("failed to find known CxSR latency "
9674 "(found ddr%s fsb freq %d, mem freq %d), "
9676 (dev_priv->is_ddr3 == 1) ? "3" : "2",
9677 dev_priv->fsb_freq, dev_priv->mem_freq);
9678 /* Disable CxSR and never update its watermark again */
9679 intel_set_memory_cxsr(dev_priv, false);
9680 dev_priv->display.update_wm = NULL;
9682 dev_priv->display.update_wm = pineview_update_wm;
9683 } else if (IS_GEN(dev_priv, 4)) {
9684 dev_priv->display.update_wm = i965_update_wm;
9685 } else if (IS_GEN(dev_priv, 3)) {
9686 dev_priv->display.update_wm = i9xx_update_wm;
9687 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
9688 } else if (IS_GEN(dev_priv, 2)) {
9689 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
9690 dev_priv->display.update_wm = i845_update_wm;
9691 dev_priv->display.get_fifo_size = i845_get_fifo_size;
9693 dev_priv->display.update_wm = i9xx_update_wm;
9694 dev_priv->display.get_fifo_size = i830_get_fifo_size;
9697 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
9701 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
9704 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9707 case GEN6_PCODE_SUCCESS:
9709 case GEN6_PCODE_UNIMPLEMENTED_CMD:
9711 case GEN6_PCODE_ILLEGAL_CMD:
9713 case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9714 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9716 case GEN6_PCODE_TIMEOUT:
9719 MISSING_CASE(flags);
9724 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
9727 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9730 case GEN6_PCODE_SUCCESS:
9732 case GEN6_PCODE_ILLEGAL_CMD:
9734 case GEN7_PCODE_TIMEOUT:
9736 case GEN7_PCODE_ILLEGAL_DATA:
9738 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9741 MISSING_CASE(flags);
9746 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
9750 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9752 /* GEN6_PCODE_* are outside of the forcewake domain, we can
9753 * use te fw I915_READ variants to reduce the amount of work
9754 * required when reading/writing.
9757 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9758 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
9759 mbox, __builtin_return_address(0));
9763 I915_WRITE_FW(GEN6_PCODE_DATA, *val);
9764 I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9765 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9767 if (__intel_wait_for_register_fw(&dev_priv->uncore,
9768 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9770 DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
9771 mbox, __builtin_return_address(0));
9775 *val = I915_READ_FW(GEN6_PCODE_DATA);
9776 I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9778 if (INTEL_GEN(dev_priv) > 6)
9779 status = gen7_check_mailbox_status(dev_priv);
9781 status = gen6_check_mailbox_status(dev_priv);
9784 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
9785 mbox, __builtin_return_address(0), status);
9792 int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
9794 int fast_timeout_us, int slow_timeout_ms)
9798 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9800 /* GEN6_PCODE_* are outside of the forcewake domain, we can
9801 * use te fw I915_READ variants to reduce the amount of work
9802 * required when reading/writing.
9805 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9806 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
9807 val, mbox, __builtin_return_address(0));
9811 I915_WRITE_FW(GEN6_PCODE_DATA, val);
9812 I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9813 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9815 if (__intel_wait_for_register_fw(&dev_priv->uncore,
9816 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9817 fast_timeout_us, slow_timeout_ms,
9819 DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
9820 val, mbox, __builtin_return_address(0));
9824 I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9826 if (INTEL_GEN(dev_priv) > 6)
9827 status = gen7_check_mailbox_status(dev_priv);
9829 status = gen6_check_mailbox_status(dev_priv);
9832 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
9833 val, mbox, __builtin_return_address(0), status);
9840 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
9841 u32 request, u32 reply_mask, u32 reply,
9846 *status = sandybridge_pcode_read(dev_priv, mbox, &val);
9848 return *status || ((val & reply_mask) == reply);
9852 * skl_pcode_request - send PCODE request until acknowledgment
9853 * @dev_priv: device private
9854 * @mbox: PCODE mailbox ID the request is targeted for
9855 * @request: request ID
9856 * @reply_mask: mask used to check for request acknowledgment
9857 * @reply: value used to check for request acknowledgment
9858 * @timeout_base_ms: timeout for polling with preemption enabled
9860 * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
9861 * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
9862 * The request is acknowledged once the PCODE reply dword equals @reply after
9863 * applying @reply_mask. Polling is first attempted with preemption enabled
9864 * for @timeout_base_ms and if this times out for another 50 ms with
9865 * preemption disabled.
9867 * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
9868 * other error as reported by PCODE.
9870 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
9871 u32 reply_mask, u32 reply, int timeout_base_ms)
9876 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9878 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
9882 * Prime the PCODE by doing a request first. Normally it guarantees
9883 * that a subsequent request, at most @timeout_base_ms later, succeeds.
9884 * _wait_for() doesn't guarantee when its passed condition is evaluated
9885 * first, so send the first request explicitly.
9891 ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
9896 * The above can time out if the number of requests was low (2 in the
9897 * worst case) _and_ PCODE was busy for some reason even after a
9898 * (queued) request and @timeout_base_ms delay. As a workaround retry
9899 * the poll with preemption disabled to maximize the number of
9900 * requests. Increase the timeout from @timeout_base_ms to 50ms to
9901 * account for interrupts that could reduce the number of these
9902 * requests, and for any quirks of the PCODE firmware that delays
9903 * the request completion.
9905 DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
9906 WARN_ON_ONCE(timeout_base_ms > 3);
9908 ret = wait_for_atomic(COND, 50);
9912 return ret ? ret : status;
9916 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9918 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9922 * Slow = Fast = GPLL ref * N
9924 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
9927 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
9929 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9931 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
9934 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
9936 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9940 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9942 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
9945 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
9947 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9949 /* CHV needs even values */
9950 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
9953 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
9955 if (INTEL_GEN(dev_priv) >= 9)
9956 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9958 else if (IS_CHERRYVIEW(dev_priv))
9959 return chv_gpu_freq(dev_priv, val);
9960 else if (IS_VALLEYVIEW(dev_priv))
9961 return byt_gpu_freq(dev_priv, val);
9963 return val * GT_FREQUENCY_MULTIPLIER;
9966 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9968 if (INTEL_GEN(dev_priv) >= 9)
9969 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9970 GT_FREQUENCY_MULTIPLIER);
9971 else if (IS_CHERRYVIEW(dev_priv))
9972 return chv_freq_opcode(dev_priv, val);
9973 else if (IS_VALLEYVIEW(dev_priv))
9974 return byt_freq_opcode(dev_priv, val);
9976 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
9979 void intel_pm_setup(struct drm_i915_private *dev_priv)
9981 mutex_init(&dev_priv->pcu_lock);
9982 mutex_init(&dev_priv->gt_pm.rps.power.mutex);
9984 atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
9986 dev_priv->runtime_pm.suspended = false;
9987 atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
9990 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9991 const i915_reg_t reg)
9993 u32 lower, upper, tmp;
9997 * The register accessed do not need forcewake. We borrow
9998 * uncore lock to prevent concurrent access to range reg.
10000 lockdep_assert_held(&dev_priv->uncore.lock);
10003 * vlv and chv residency counters are 40 bits in width.
10004 * With a control bit, we can choose between upper or lower
10005 * 32bit window into this counter.
10007 * Although we always use the counter in high-range mode elsewhere,
10008 * userspace may attempt to read the value before rc6 is initialised,
10009 * before we have set the default VLV_COUNTER_CONTROL value. So always
10010 * set the high bit to be safe.
10012 I915_WRITE_FW(VLV_COUNTER_CONTROL,
10013 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
10014 upper = I915_READ_FW(reg);
10018 I915_WRITE_FW(VLV_COUNTER_CONTROL,
10019 _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
10020 lower = I915_READ_FW(reg);
10022 I915_WRITE_FW(VLV_COUNTER_CONTROL,
10023 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
10024 upper = I915_READ_FW(reg);
10025 } while (upper != tmp && --loop);
10028 * Everywhere else we always use VLV_COUNTER_CONTROL with the
10029 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
10033 return lower | (u64)upper << 8;
10036 u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
10037 const i915_reg_t reg)
10039 struct intel_uncore *uncore = &dev_priv->uncore;
10040 u64 time_hw, prev_hw, overflow_hw;
10041 unsigned int fw_domains;
10042 unsigned long flags;
10046 if (!HAS_RC6(dev_priv))
10050 * Store previous hw counter values for counter wrap-around handling.
10052 * There are only four interesting registers and they live next to each
10053 * other so we can use the relative address, compared to the smallest
10054 * one as the index into driver storage.
10056 i = (i915_mmio_reg_offset(reg) -
10057 i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
10058 if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
10061 fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
10063 spin_lock_irqsave(&uncore->lock, flags);
10064 intel_uncore_forcewake_get__locked(uncore, fw_domains);
10066 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
10067 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
10069 div = dev_priv->czclk_freq;
10070 overflow_hw = BIT_ULL(40);
10071 time_hw = vlv_residency_raw(dev_priv, reg);
10073 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
10074 if (IS_GEN9_LP(dev_priv)) {
10082 overflow_hw = BIT_ULL(32);
10083 time_hw = intel_uncore_read_fw(uncore, reg);
10087 * Counter wrap handling.
10089 * But relying on a sufficient frequency of queries otherwise counters
10092 prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
10093 dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
10095 /* RC6 delta from last sample. */
10096 if (time_hw >= prev_hw)
10097 time_hw -= prev_hw;
10099 time_hw += overflow_hw - prev_hw;
10101 /* Add delta to RC6 extended raw driver copy. */
10102 time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
10103 dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
10105 intel_uncore_forcewake_put__locked(uncore, fw_domains);
10106 spin_unlock_irqrestore(&uncore->lock, flags);
10108 return mul_u64_u32_div(time_hw, mul, div);
10111 u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
10115 if (INTEL_GEN(dev_priv) >= 9)
10116 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
10117 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
10118 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
10120 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;