drivers/gpu/drm/i915/intel_pm.c

   1 /*
   2  * Copyright © 2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
  25  *
  26  */
  27
  28 #include <linux/cpufreq.h>
  29 #include <drm/drm_plane_helper.h>
  30 #include "i915_drv.h"
  31 #include "intel_drv.h"
  32 #include "../../../platform/x86/intel_ips.h"
  33 #include <linux/module.h>
  34 #include <drm/drm_atomic_helper.h>
  35
  36 /**
  37  * DOC: RC6
  38  *
  39  * RC6 is a special power stage which allows the GPU to enter an very
  40  * low-voltage mode when idle, using down to 0V while at this stage.  This
  41  * stage is entered automatically when the GPU is idle when RC6 support is
  42  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
  43  *
  44  * There are different RC6 modes available in Intel GPU, which differentiate
  45  * among each other with the latency required to enter and leave RC6 and
  46  * voltage consumed by the GPU in different states.
  47  *
  48  * The combination of the following flags define which states GPU is allowed
  49  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
  50  * RC6pp is deepest RC6. Their support by hardware varies according to the
  51  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
  52  * which brings the most power savings; deeper states save more power, but
  53  * require higher latency to switch to and wake up.
  54  */
  55 #define INTEL_RC6_ENABLE                        (1<<0)
  56 #define INTEL_RC6p_ENABLE                       (1<<1)
  57 #define INTEL_RC6pp_ENABLE                      (1<<2)
  58
  59 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
  60 {
  61         /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */
  62         I915_WRITE(CHICKEN_PAR1_1,
  63                    I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
  64
  65         I915_WRITE(GEN8_CONFIG0,
  66                    I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES);
  67
  68         /* WaEnableChickenDCPR:skl,bxt,kbl,glk */
  69         I915_WRITE(GEN8_CHICKEN_DCPR_1,
  70                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
  71
  72         /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */
  73         /* WaFbcWakeMemOn:skl,bxt,kbl,glk */
  74         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
  75                    DISP_FBC_WM_DIS |
  76                    DISP_FBC_MEMORY_WAKE);
  77
  78         /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */
  79         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
  80                    ILK_DPFC_DISABLE_DUMMY0);
  81 }
  82
  83 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
  84 {
  85         gen9_init_clock_gating(dev_priv);
  86
  87         /* WaDisableSDEUnitClockGating:bxt */
  88         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
  89                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
  90
  91         /*
  92          * FIXME:
  93          * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
  94          */
  95         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
  96                    GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
  97
  98         /*
  99          * Wa: Backlight PWM may stop in the asserted state, causing backlight
 100          * to stay fully on.
 101          */
 102         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
 103                    PWM1_GATING_DIS | PWM2_GATING_DIS);
 104 }
 105
 106 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
 107 {
 108         gen9_init_clock_gating(dev_priv);
 109
 110         /*
 111          * WaDisablePWMClockGating:glk
 112          * Backlight PWM may stop in the asserted state, causing backlight
 113          * to stay fully on.
 114          */
 115         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
 116                    PWM1_GATING_DIS | PWM2_GATING_DIS);
 117
 118         /* WaDDIIOTimeout:glk */
 119         if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
 120                 u32 val = I915_READ(CHICKEN_MISC_2);
 121                 val &= ~(GLK_CL0_PWR_DOWN |
 122                          GLK_CL1_PWR_DOWN |
 123                          GLK_CL2_PWR_DOWN);
 124                 I915_WRITE(CHICKEN_MISC_2, val);
 125         }
 126
 127 }
 128
 129 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
 130 {
 131         u32 tmp;
 132
 133         tmp = I915_READ(CLKCFG);
 134
 135         switch (tmp & CLKCFG_FSB_MASK) {
 136         case CLKCFG_FSB_533:
 137                 dev_priv->fsb_freq = 533; /* 133*4 */
 138                 break;
 139         case CLKCFG_FSB_800:
 140                 dev_priv->fsb_freq = 800; /* 200*4 */
 141                 break;
 142         case CLKCFG_FSB_667:
 143                 dev_priv->fsb_freq =  667; /* 167*4 */
 144                 break;
 145         case CLKCFG_FSB_400:
 146                 dev_priv->fsb_freq = 400; /* 100*4 */
 147                 break;
 148         }
 149
 150         switch (tmp & CLKCFG_MEM_MASK) {
 151         case CLKCFG_MEM_533:
 152                 dev_priv->mem_freq = 533;
 153                 break;
 154         case CLKCFG_MEM_667:
 155                 dev_priv->mem_freq = 667;
 156                 break;
 157         case CLKCFG_MEM_800:
 158                 dev_priv->mem_freq = 800;
 159                 break;
 160         }
 161
 162         /* detect pineview DDR3 setting */
 163         tmp = I915_READ(CSHRDDR3CTL);
 164         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
 165 }
 166
 167 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 168 {
 169         u16 ddrpll, csipll;
 170
 171         ddrpll = I915_READ16(DDRMPLL1);
 172         csipll = I915_READ16(CSIPLL0);
 173
 174         switch (ddrpll & 0xff) {
 175         case 0xc:
 176                 dev_priv->mem_freq = 800;
 177                 break;
 178         case 0x10:
 179                 dev_priv->mem_freq = 1066;
 180                 break;
 181         case 0x14:
 182                 dev_priv->mem_freq = 1333;
 183                 break;
 184         case 0x18:
 185                 dev_priv->mem_freq = 1600;
 186                 break;
 187         default:
 188                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
 189                                  ddrpll & 0xff);
 190                 dev_priv->mem_freq = 0;
 191                 break;
 192         }
 193
 194         dev_priv->ips.r_t = dev_priv->mem_freq;
 195
 196         switch (csipll & 0x3ff) {
 197         case 0x00c:
 198                 dev_priv->fsb_freq = 3200;
 199                 break;
 200         case 0x00e:
 201                 dev_priv->fsb_freq = 3733;
 202                 break;
 203         case 0x010:
 204                 dev_priv->fsb_freq = 4266;
 205                 break;
 206         case 0x012:
 207                 dev_priv->fsb_freq = 4800;
 208                 break;
 209         case 0x014:
 210                 dev_priv->fsb_freq = 5333;
 211                 break;
 212         case 0x016:
 213                 dev_priv->fsb_freq = 5866;
 214                 break;
 215         case 0x018:
 216                 dev_priv->fsb_freq = 6400;
 217                 break;
 218         default:
 219                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
 220                                  csipll & 0x3ff);
 221                 dev_priv->fsb_freq = 0;
 222                 break;
 223         }
 224
 225         if (dev_priv->fsb_freq == 3200) {
 226                 dev_priv->ips.c_m = 0;
 227         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
 228                 dev_priv->ips.c_m = 1;
 229         } else {
 230                 dev_priv->ips.c_m = 2;
 231         }
 232 }
 233
 234 static const struct cxsr_latency cxsr_latency_table[] = {
 235         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
 236         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
 237         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
 238         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
 239         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
 240
 241         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
 242         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
 243         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
 244         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
 245         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
 246
 247         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
 248         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
 249         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
 250         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
 251         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
 252
 253         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
 254         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
 255         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
 256         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
 257         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
 258
 259         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
 260         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
 261         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
 262         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
 263         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
 264
 265         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
 266         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
 267         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
 268         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
 269         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
 270 };
 271
 272 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
 273                                                          bool is_ddr3,
 274                                                          int fsb,
 275                                                          int mem)
 276 {
 277         const struct cxsr_latency *latency;
 278         int i;
 279
 280         if (fsb == 0 || mem == 0)
 281                 return NULL;
 282
 283         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
 284                 latency = &cxsr_latency_table[i];
 285                 if (is_desktop == latency->is_desktop &&
 286                     is_ddr3 == latency->is_ddr3 &&
 287                     fsb == latency->fsb_freq && mem == latency->mem_freq)
 288                         return latency;
 289         }
 290
 291         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
 292
 293         return NULL;
 294 }
 295
 296 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 297 {
 298         u32 val;
 299
 300         mutex_lock(&dev_priv->rps.hw_lock);
 301
 302         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
 303         if (enable)
 304                 val &= ~FORCE_DDR_HIGH_FREQ;
 305         else
 306                 val |= FORCE_DDR_HIGH_FREQ;
 307         val &= ~FORCE_DDR_LOW_FREQ;
 308         val |= FORCE_DDR_FREQ_REQ_ACK;
 309         vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
 310
 311         if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
 312                       FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
 313                 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
 314
 315         mutex_unlock(&dev_priv->rps.hw_lock);
 316 }
 317
 318 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 319 {
 320         u32 val;
 321
 322         mutex_lock(&dev_priv->rps.hw_lock);
 323
 324         val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 325         if (enable)
 326                 val |= DSP_MAXFIFO_PM5_ENABLE;
 327         else
 328                 val &= ~DSP_MAXFIFO_PM5_ENABLE;
 329         vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
 330
 331         mutex_unlock(&dev_priv->rps.hw_lock);
 332 }
 333
 334 #define FW_WM(value, plane) \
 335         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
 336
 337 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
 338 {
 339         bool was_enabled;
 340         u32 val;
 341
 342         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 343                 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
 344                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
 345                 POSTING_READ(FW_BLC_SELF_VLV);
 346         } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
 347                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
 348                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
 349                 POSTING_READ(FW_BLC_SELF);
 350         } else if (IS_PINEVIEW(dev_priv)) {
 351                 val = I915_READ(DSPFW3);
 352                 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
 353                 if (enable)
 354                         val |= PINEVIEW_SELF_REFRESH_EN;
 355                 else
 356                         val &= ~PINEVIEW_SELF_REFRESH_EN;
 357                 I915_WRITE(DSPFW3, val);
 358                 POSTING_READ(DSPFW3);
 359         } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
 360                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
 361                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
 362                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
 363                 I915_WRITE(FW_BLC_SELF, val);
 364                 POSTING_READ(FW_BLC_SELF);
 365         } else if (IS_I915GM(dev_priv)) {
 366                 /*
 367                  * FIXME can't find a bit like this for 915G, and
 368                  * and yet it does have the related watermark in
 369                  * FW_BLC_SELF. What's going on?
 370                  */
 371                 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
 372                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
 373                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
 374                 I915_WRITE(INSTPM, val);
 375                 POSTING_READ(INSTPM);
 376         } else {
 377                 return false;
 378         }
 379
 380         trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
 381
 382         DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
 383                       enableddisabled(enable),
 384                       enableddisabled(was_enabled));
 385
 386         return was_enabled;
 387 }
 388
 389 /**
 390  * intel_set_memory_cxsr - Configure CxSR state
 391  * @dev_priv: i915 device
 392  * @enable: Allow vs. disallow CxSR
 393  *
 394  * Allow or disallow the system to enter a special CxSR
 395  * (C-state self refresh) state. What typically happens in CxSR mode
 396  * is that several display FIFOs may get combined into a single larger
 397  * FIFO for a particular plane (so called max FIFO mode) to allow the
 398  * system to defer memory fetches longer, and the memory will enter
 399  * self refresh.
 400  *
 401  * Note that enabling CxSR does not guarantee that the system enter
 402  * this special mode, nor does it guarantee that the system stays
 403  * in that mode once entered. So this just allows/disallows the system
 404  * to autonomously utilize the CxSR mode. Other factors such as core
 405  * C-states will affect when/if the system actually enters/exits the
 406  * CxSR mode.
 407  *
 408  * Note that on VLV/CHV this actually only controls the max FIFO mode,
 409  * and the system is free to enter/exit memory self refresh at any time
 410  * even when the use of CxSR has been disallowed.
 411  *
 412  * While the system is actually in the CxSR/max FIFO mode, some plane
 413  * control registers will not get latched on vblank. Thus in order to
 414  * guarantee the system will respond to changes in the plane registers
 415  * we must always disallow CxSR prior to making changes to those registers.
 416  * Unfortunately the system will re-evaluate the CxSR conditions at
 417  * frame start which happens after vblank start (which is when the plane
 418  * registers would get latched), so we can't proceed with the plane update
 419  * during the same frame where we disallowed CxSR.
 420  *
 421  * Certain platforms also have a deeper HPLL SR mode. Fortunately the
 422  * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
 423  * the hardware w.r.t. HPLL SR when writing to plane registers.
 424  * Disallowing just CxSR is sufficient.
 425  */
 426 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
 427 {
 428         bool ret;
 429
 430         mutex_lock(&dev_priv->wm.wm_mutex);
 431         ret = _intel_set_memory_cxsr(dev_priv, enable);
 432         dev_priv->wm.vlv.cxsr = enable;
 433         mutex_unlock(&dev_priv->wm.wm_mutex);
 434
 435         return ret;
 436 }
 437
 438 /*
 439  * Latency for FIFO fetches is dependent on several factors:
 440  *   - memory configuration (speed, channels)
 441  *   - chipset
 442  *   - current MCH state
 443  * It can be fairly high in some situations, so here we assume a fairly
 444  * pessimal value.  It's a tradeoff between extra memory fetches (if we
 445  * set this value too high, the FIFO will fetch frequently to stay full)
 446  * and power consumption (set it too low to save power and we might see
 447  * FIFO underruns and display "flicker").
 448  *
 449  * A value of 5us seems to be a good balance; safe for very low end
 450  * platforms but not overly aggressive on lower latency configs.
 451  */
 452 static const int pessimal_latency_ns = 5000;
 453
 454 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
 455         ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
 456
 457 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
 458 {
 459         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
 460         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 461         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
 462         enum pipe pipe = crtc->pipe;
 463         int sprite0_start, sprite1_start;
 464
 465         switch (pipe) {
 466                 uint32_t dsparb, dsparb2, dsparb3;
 467         case PIPE_A:
 468                 dsparb = I915_READ(DSPARB);
 469                 dsparb2 = I915_READ(DSPARB2);
 470                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
 471                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
 472                 break;
 473         case PIPE_B:
 474                 dsparb = I915_READ(DSPARB);
 475                 dsparb2 = I915_READ(DSPARB2);
 476                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
 477                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
 478                 break;
 479         case PIPE_C:
 480                 dsparb2 = I915_READ(DSPARB2);
 481                 dsparb3 = I915_READ(DSPARB3);
 482                 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
 483                 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
 484                 break;
 485         default:
 486                 MISSING_CASE(pipe);
 487                 return;
 488         }
 489
 490         fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
 491         fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
 492         fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
 493         fifo_state->plane[PLANE_CURSOR] = 63;
 494 }
 495
 496 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
 497 {
 498         uint32_t dsparb = I915_READ(DSPARB);
 499         int size;
 500
 501         size = dsparb & 0x7f;
 502         if (plane)
 503                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
 504
 505         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
 506                       plane ? "B" : "A", size);
 507
 508         return size;
 509 }
 510
 511 static int i830_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
 512 {
 513         uint32_t dsparb = I915_READ(DSPARB);
 514         int size;
 515
 516         size = dsparb & 0x1ff;
 517         if (plane)
 518                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
 519         size >>= 1; /* Convert to cachelines */
 520
 521         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
 522                       plane ? "B" : "A", size);
 523
 524         return size;
 525 }
 526
 527 static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
 528 {
 529         uint32_t dsparb = I915_READ(DSPARB);
 530         int size;
 531
 532         size = dsparb & 0x7f;
 533         size >>= 2; /* Convert to cachelines */
 534
 535         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
 536                       plane ? "B" : "A",
 537                       size);
 538
 539         return size;
 540 }
 541
 542 /* Pineview has different values for various configs */
 543 static const struct intel_watermark_params pineview_display_wm = {
 544         .fifo_size = PINEVIEW_DISPLAY_FIFO,
 545         .max_wm = PINEVIEW_MAX_WM,
 546         .default_wm = PINEVIEW_DFT_WM,
 547         .guard_size = PINEVIEW_GUARD_WM,
 548         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 549 };
 550 static const struct intel_watermark_params pineview_display_hplloff_wm = {
 551         .fifo_size = PINEVIEW_DISPLAY_FIFO,
 552         .max_wm = PINEVIEW_MAX_WM,
 553         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
 554         .guard_size = PINEVIEW_GUARD_WM,
 555         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 556 };
 557 static const struct intel_watermark_params pineview_cursor_wm = {
 558         .fifo_size = PINEVIEW_CURSOR_FIFO,
 559         .max_wm = PINEVIEW_CURSOR_MAX_WM,
 560         .default_wm = PINEVIEW_CURSOR_DFT_WM,
 561         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
 562         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 563 };
 564 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
 565         .fifo_size = PINEVIEW_CURSOR_FIFO,
 566         .max_wm = PINEVIEW_CURSOR_MAX_WM,
 567         .default_wm = PINEVIEW_CURSOR_DFT_WM,
 568         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
 569         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 570 };
 571 static const struct intel_watermark_params g4x_wm_info = {
 572         .fifo_size = G4X_FIFO_SIZE,
 573         .max_wm = G4X_MAX_WM,
 574         .default_wm = G4X_MAX_WM,
 575         .guard_size = 2,
 576         .cacheline_size = G4X_FIFO_LINE_SIZE,
 577 };
 578 static const struct intel_watermark_params g4x_cursor_wm_info = {
 579         .fifo_size = I965_CURSOR_FIFO,
 580         .max_wm = I965_CURSOR_MAX_WM,
 581         .default_wm = I965_CURSOR_DFT_WM,
 582         .guard_size = 2,
 583         .cacheline_size = G4X_FIFO_LINE_SIZE,
 584 };
 585 static const struct intel_watermark_params i965_cursor_wm_info = {
 586         .fifo_size = I965_CURSOR_FIFO,
 587         .max_wm = I965_CURSOR_MAX_WM,
 588         .default_wm = I965_CURSOR_DFT_WM,
 589         .guard_size = 2,
 590         .cacheline_size = I915_FIFO_LINE_SIZE,
 591 };
 592 static const struct intel_watermark_params i945_wm_info = {
 593         .fifo_size = I945_FIFO_SIZE,
 594         .max_wm = I915_MAX_WM,
 595         .default_wm = 1,
 596         .guard_size = 2,
 597         .cacheline_size = I915_FIFO_LINE_SIZE,
 598 };
 599 static const struct intel_watermark_params i915_wm_info = {
 600         .fifo_size = I915_FIFO_SIZE,
 601         .max_wm = I915_MAX_WM,
 602         .default_wm = 1,
 603         .guard_size = 2,
 604         .cacheline_size = I915_FIFO_LINE_SIZE,
 605 };
 606 static const struct intel_watermark_params i830_a_wm_info = {
 607         .fifo_size = I855GM_FIFO_SIZE,
 608         .max_wm = I915_MAX_WM,
 609         .default_wm = 1,
 610         .guard_size = 2,
 611         .cacheline_size = I830_FIFO_LINE_SIZE,
 612 };
 613 static const struct intel_watermark_params i830_bc_wm_info = {
 614         .fifo_size = I855GM_FIFO_SIZE,
 615         .max_wm = I915_MAX_WM/2,
 616         .default_wm = 1,
 617         .guard_size = 2,
 618         .cacheline_size = I830_FIFO_LINE_SIZE,
 619 };
 620 static const struct intel_watermark_params i845_wm_info = {
 621         .fifo_size = I830_FIFO_SIZE,
 622         .max_wm = I915_MAX_WM,
 623         .default_wm = 1,
 624         .guard_size = 2,
 625         .cacheline_size = I830_FIFO_LINE_SIZE,
 626 };
 627
 628 /**
 629  * intel_wm_method1 - Method 1 / "small buffer" watermark formula
 630  * @pixel_rate: Pipe pixel rate in kHz
 631  * @cpp: Plane bytes per pixel
 632  * @latency: Memory wakeup latency in 0.1us units
 633  *
 634  * Compute the watermark using the method 1 or "small buffer"
 635  * formula. The caller may additonally add extra cachelines
 636  * to account for TLB misses and clock crossings.
 637  *
 638  * This method is concerned with the short term drain rate
 639  * of the FIFO, ie. it does not account for blanking periods
 640  * which would effectively reduce the average drain rate across
 641  * a longer period. The name "small" refers to the fact the
 642  * FIFO is relatively small compared to the amount of data
 643  * fetched.
 644  *
 645  * The FIFO level vs. time graph might look something like:
 646  *
 647  *   |\   |\
 648  *   | \  | \
 649  * __---__---__ (- plane active, _ blanking)
 650  * -> time
 651  *
 652  * or perhaps like this:
 653  *
 654  *   |\|\  |\|\
 655  * __----__----__ (- plane active, _ blanking)
 656  * -> time
 657  *
 658  * Returns:
 659  * The watermark in bytes
 660  */
 661 static unsigned int intel_wm_method1(unsigned int pixel_rate,
 662                                      unsigned int cpp,
 663                                      unsigned int latency)
 664 {
 665         uint64_t ret;
 666
 667         ret = (uint64_t) pixel_rate * cpp * latency;
 668         ret = DIV_ROUND_UP_ULL(ret, 10000);
 669
 670         return ret;
 671 }
 672
 673 /**
 674  * intel_wm_method2 - Method 2 / "large buffer" watermark formula
 675  * @pixel_rate: Pipe pixel rate in kHz
 676  * @htotal: Pipe horizontal total
 677  * @width: Plane width in pixels
 678  * @cpp: Plane bytes per pixel
 679  * @latency: Memory wakeup latency in 0.1us units
 680  *
 681  * Compute the watermark using the method 2 or "large buffer"
 682  * formula. The caller may additonally add extra cachelines
 683  * to account for TLB misses and clock crossings.
 684  *
 685  * This method is concerned with the long term drain rate
 686  * of the FIFO, ie. it does account for blanking periods
 687  * which effectively reduce the average drain rate across
 688  * a longer period. The name "large" refers to the fact the
 689  * FIFO is relatively large compared to the amount of data
 690  * fetched.
 691  *
 692  * The FIFO level vs. time graph might look something like:
 693  *
 694  *    |\___       |\___
 695  *    |    \___   |    \___
 696  *    |        \  |        \
 697  * __ --__--__--__--__--__--__ (- plane active, _ blanking)
 698  * -> time
 699  *
 700  * Returns:
 701  * The watermark in bytes
 702  */
 703 static unsigned int intel_wm_method2(unsigned int pixel_rate,
 704                                      unsigned int htotal,
 705                                      unsigned int width,
 706                                      unsigned int cpp,
 707                                      unsigned int latency)
 708 {
 709         unsigned int ret;
 710
 711         /*
 712          * FIXME remove once all users are computing
 713          * watermarks in the correct place.
 714          */
 715         if (WARN_ON_ONCE(htotal == 0))
 716                 htotal = 1;
 717
 718         ret = (latency * pixel_rate) / (htotal * 10000);
 719         ret = (ret + 1) * width * cpp;
 720
 721         return ret;
 722 }
 723
 724 /**
 725  * intel_calculate_wm - calculate watermark level
 726  * @pixel_rate: pixel clock
 727  * @wm: chip FIFO params
 728  * @cpp: bytes per pixel
 729  * @latency_ns: memory latency for the platform
 730  *
 731  * Calculate the watermark level (the level at which the display plane will
 732  * start fetching from memory again).  Each chip has a different display
 733  * FIFO size and allocation, so the caller needs to figure that out and pass
 734  * in the correct intel_watermark_params structure.
 735  *
 736  * As the pixel clock runs, the FIFO will be drained at a rate that depends
 737  * on the pixel size.  When it reaches the watermark level, it'll start
 738  * fetching FIFO line sized based chunks from memory until the FIFO fills
 739  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
 740  * will occur, and a display engine hang could result.
 741  */
 742 static unsigned int intel_calculate_wm(int pixel_rate,
 743                                        const struct intel_watermark_params *wm,
 744                                        int fifo_size, int cpp,
 745                                        unsigned int latency_ns)
 746 {
 747         int entries, wm_size;
 748
 749         /*
 750          * Note: we need to make sure we don't overflow for various clock &
 751          * latency values.
 752          * clocks go from a few thousand to several hundred thousand.
 753          * latency is usually a few thousand
 754          */
 755         entries = intel_wm_method1(pixel_rate, cpp,
 756                                    latency_ns / 100);
 757         entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
 758                 wm->guard_size;
 759         DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
 760
 761         wm_size = fifo_size - entries;
 762         DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
 763
 764         /* Don't promote wm_size to unsigned... */
 765         if (wm_size > wm->max_wm)
 766                 wm_size = wm->max_wm;
 767         if (wm_size <= 0)
 768                 wm_size = wm->default_wm;
 769
 770         /*
 771          * Bspec seems to indicate that the value shouldn't be lower than
 772          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
 773          * Lets go for 8 which is the burst size since certain platforms
 774          * already use a hardcoded 8 (which is what the spec says should be
 775          * done).
 776          */
 777         if (wm_size <= 8)
 778                 wm_size = 8;
 779
 780         return wm_size;
 781 }
 782
 783 static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
 784 {
 785         return dev_priv->wm.max_level + 1;
 786 }
 787
 788 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
 789                                    const struct intel_plane_state *plane_state)
 790 {
 791         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
 792
 793         /* FIXME check the 'enable' instead */
 794         if (!crtc_state->base.active)
 795                 return false;
 796
 797         /*
 798          * Treat cursor with fb as always visible since cursor updates
 799          * can happen faster than the vrefresh rate, and the current
 800          * watermark code doesn't handle that correctly. Cursor updates
 801          * which set/clear the fb or change the cursor size are going
 802          * to get throttled by intel_legacy_cursor_update() to work
 803          * around this problem with the watermark code.
 804          */
 805         if (plane->id == PLANE_CURSOR)
 806                 return plane_state->base.fb != NULL;
 807         else
 808                 return plane_state->base.visible;
 809 }
 810
 811 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
 812 {
 813         struct intel_crtc *crtc, *enabled = NULL;
 814
 815         for_each_intel_crtc(&dev_priv->drm, crtc) {
 816                 if (intel_crtc_active(crtc)) {
 817                         if (enabled)
 818                                 return NULL;
 819                         enabled = crtc;
 820                 }
 821         }
 822
 823         return enabled;
 824 }
 825
 826 static void pineview_update_wm(struct intel_crtc *unused_crtc)
 827 {
 828         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
 829         struct intel_crtc *crtc;
 830         const struct cxsr_latency *latency;
 831         u32 reg;
 832         unsigned int wm;
 833
 834         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
 835                                          dev_priv->is_ddr3,
 836                                          dev_priv->fsb_freq,
 837                                          dev_priv->mem_freq);
 838         if (!latency) {
 839                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
 840                 intel_set_memory_cxsr(dev_priv, false);
 841                 return;
 842         }
 843
 844         crtc = single_enabled_crtc(dev_priv);
 845         if (crtc) {
 846                 const struct drm_display_mode *adjusted_mode =
 847                         &crtc->config->base.adjusted_mode;
 848                 const struct drm_framebuffer *fb =
 849                         crtc->base.primary->state->fb;
 850                 int cpp = fb->format->cpp[0];
 851                 int clock = adjusted_mode->crtc_clock;
 852
 853                 /* Display SR */
 854                 wm = intel_calculate_wm(clock, &pineview_display_wm,
 855                                         pineview_display_wm.fifo_size,
 856                                         cpp, latency->display_sr);
 857                 reg = I915_READ(DSPFW1);
 858                 reg &= ~DSPFW_SR_MASK;
 859                 reg |= FW_WM(wm, SR);
 860                 I915_WRITE(DSPFW1, reg);
 861                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
 862
 863                 /* cursor SR */
 864                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
 865                                         pineview_display_wm.fifo_size,
 866                                         4, latency->cursor_sr);
 867                 reg = I915_READ(DSPFW3);
 868                 reg &= ~DSPFW_CURSOR_SR_MASK;
 869                 reg |= FW_WM(wm, CURSOR_SR);
 870                 I915_WRITE(DSPFW3, reg);
 871
 872                 /* Display HPLL off SR */
 873                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
 874                                         pineview_display_hplloff_wm.fifo_size,
 875                                         cpp, latency->display_hpll_disable);
 876                 reg = I915_READ(DSPFW3);
 877                 reg &= ~DSPFW_HPLL_SR_MASK;
 878                 reg |= FW_WM(wm, HPLL_SR);
 879                 I915_WRITE(DSPFW3, reg);
 880
 881                 /* cursor HPLL off SR */
 882                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
 883                                         pineview_display_hplloff_wm.fifo_size,
 884                                         4, latency->cursor_hpll_disable);
 885                 reg = I915_READ(DSPFW3);
 886                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
 887                 reg |= FW_WM(wm, HPLL_CURSOR);
 888                 I915_WRITE(DSPFW3, reg);
 889                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
 890
 891                 intel_set_memory_cxsr(dev_priv, true);
 892         } else {
 893                 intel_set_memory_cxsr(dev_priv, false);
 894         }
 895 }
 896
 897 /*
 898  * Documentation says:
 899  * "If the line size is small, the TLB fetches can get in the way of the
 900  *  data fetches, causing some lag in the pixel data return which is not
 901  *  accounted for in the above formulas. The following adjustment only
 902  *  needs to be applied if eight whole lines fit in the buffer at once.
 903  *  The WM is adjusted upwards by the difference between the FIFO size
 904  *  and the size of 8 whole lines. This adjustment is always performed
 905  *  in the actual pixel depth regardless of whether FBC is enabled or not."
 906  */
 907 static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
 908 {
 909         int tlb_miss = fifo_size * 64 - width * cpp * 8;
 910
 911         return max(0, tlb_miss);
 912 }
 913
 914 static bool g4x_compute_wm0(struct drm_i915_private *dev_priv,
 915                             int plane,
 916                             const struct intel_watermark_params *display,
 917                             int display_latency_ns,
 918                             const struct intel_watermark_params *cursor,
 919                             int cursor_latency_ns,
 920                             int *plane_wm,
 921                             int *cursor_wm)
 922 {
 923         struct intel_crtc *crtc;
 924         const struct drm_display_mode *adjusted_mode;
 925         const struct drm_framebuffer *fb;
 926         int htotal, plane_width, cursor_width, clock, cpp;
 927         int entries;
 928
 929         crtc = intel_get_crtc_for_plane(dev_priv, plane);
 930         if (!intel_crtc_active(crtc)) {
 931                 *cursor_wm = cursor->guard_size;
 932                 *plane_wm = display->guard_size;
 933                 return false;
 934         }
 935
 936         adjusted_mode = &crtc->config->base.adjusted_mode;
 937         fb = crtc->base.primary->state->fb;
 938         clock = adjusted_mode->crtc_clock;
 939         htotal = adjusted_mode->crtc_htotal;
 940         plane_width = crtc->config->pipe_src_w;
 941         cursor_width = crtc->base.cursor->state->crtc_w;
 942         cpp = fb->format->cpp[0];
 943
 944         /* Use the small buffer method to calculate plane watermark */
 945         entries = intel_wm_method1(clock, cpp, display_latency_ns / 100);
 946         entries += g4x_tlb_miss_wa(display->fifo_size, plane_width, cpp);
 947         entries = DIV_ROUND_UP(entries, display->cacheline_size);
 948         *plane_wm = entries + display->guard_size;
 949         if (*plane_wm > (int)display->max_wm)
 950                 *plane_wm = display->max_wm;
 951
 952         /* Use the large buffer method to calculate cursor watermark */
 953         entries = intel_wm_method2(clock, htotal, cursor_width, 4,
 954                                    cursor_latency_ns / 100);
 955         entries += g4x_tlb_miss_wa(cursor->fifo_size, cursor_width, 4);
 956         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
 957         *cursor_wm = entries + cursor->guard_size;
 958         if (*cursor_wm > (int)cursor->max_wm)
 959                 *cursor_wm = (int)cursor->max_wm;
 960
 961         return true;
 962 }
 963
 964 /*
 965  * Check the wm result.
 966  *
 967  * If any calculated watermark values is larger than the maximum value that
 968  * can be programmed into the associated watermark register, that watermark
 969  * must be disabled.
 970  */
 971 static bool g4x_check_srwm(struct drm_i915_private *dev_priv,
 972                            int display_wm, int cursor_wm,
 973                            const struct intel_watermark_params *display,
 974                            const struct intel_watermark_params *cursor)
 975 {
 976         DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
 977                       display_wm, cursor_wm);
 978
 979         if (display_wm > display->max_wm) {
 980                 DRM_DEBUG_KMS("display watermark is too large(%d/%u), disabling\n",
 981                               display_wm, display->max_wm);
 982                 return false;
 983         }
 984
 985         if (cursor_wm > cursor->max_wm) {
 986                 DRM_DEBUG_KMS("cursor watermark is too large(%d/%u), disabling\n",
 987                               cursor_wm, cursor->max_wm);
 988                 return false;
 989         }
 990
 991         if (!(display_wm || cursor_wm)) {
 992                 DRM_DEBUG_KMS("SR latency is 0, disabling\n");
 993                 return false;
 994         }
 995
 996         return true;
 997 }
 998
 999 static bool g4x_compute_srwm(struct drm_i915_private *dev_priv,
1000                              int plane,
1001                              int latency_ns,
1002                              const struct intel_watermark_params *display,
1003                              const struct intel_watermark_params *cursor,
1004                              int *display_wm, int *cursor_wm)
1005 {
1006         struct intel_crtc *crtc;
1007         const struct drm_display_mode *adjusted_mode;
1008         const struct drm_framebuffer *fb;
1009         int plane_width, cursor_width, htotal, cpp, clock;
1010         int small, large;
1011         int entries;
1012
1013         if (!latency_ns) {
1014                 *display_wm = *cursor_wm = 0;
1015                 return false;
1016         }
1017
1018         crtc = intel_get_crtc_for_plane(dev_priv, plane);
1019         adjusted_mode = &crtc->config->base.adjusted_mode;
1020         fb = crtc->base.primary->state->fb;
1021         clock = adjusted_mode->crtc_clock;
1022         htotal = adjusted_mode->crtc_htotal;
1023         plane_width = crtc->config->pipe_src_w;
1024         cursor_width = crtc->base.cursor->state->crtc_w;
1025         cpp = fb->format->cpp[0];
1026
1027         /* Use the minimum of the small and large buffer method for primary */
1028         small = intel_wm_method1(clock, cpp, latency_ns / 100);
1029         large = intel_wm_method2(clock, htotal, plane_width, cpp,
1030                                  latency_ns / 100);
1031         entries = min(small, large);
1032         entries += g4x_tlb_miss_wa(display->fifo_size, plane_width, cpp);
1033         entries = DIV_ROUND_UP(entries, display->cacheline_size);
1034         *display_wm = entries + display->guard_size;
1035
1036         /* calculate the self-refresh watermark for display cursor */
1037         entries = intel_wm_method2(clock, htotal, cursor_width, 4,
1038                                    latency_ns / 100);
1039         entries += g4x_tlb_miss_wa(cursor->fifo_size, cursor_width, 4);
1040         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
1041         *cursor_wm = entries + cursor->guard_size;
1042
1043         return g4x_check_srwm(dev_priv,
1044                               *display_wm, *cursor_wm,
1045                               display, cursor);
1046 }
1047
1048 #define FW_WM_VLV(value, plane) \
1049         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
1050
1051 static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
1052                                 const struct vlv_wm_values *wm)
1053 {
1054         enum pipe pipe;
1055
1056         for_each_pipe(dev_priv, pipe) {
1057                 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
1058
1059                 I915_WRITE(VLV_DDL(pipe),
1060                            (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
1061                            (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
1062                            (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
1063                            (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
1064         }
1065
1066         /*
1067          * Zero the (unused) WM1 watermarks, and also clear all the
1068          * high order bits so that there are no out of bounds values
1069          * present in the registers during the reprogramming.
1070          */
1071         I915_WRITE(DSPHOWM, 0);
1072         I915_WRITE(DSPHOWM1, 0);
1073         I915_WRITE(DSPFW4, 0);
1074         I915_WRITE(DSPFW5, 0);
1075         I915_WRITE(DSPFW6, 0);
1076
1077         I915_WRITE(DSPFW1,
1078                    FW_WM(wm->sr.plane, SR) |
1079                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
1080                    FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
1081                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
1082         I915_WRITE(DSPFW2,
1083                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
1084                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
1085                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
1086         I915_WRITE(DSPFW3,
1087                    FW_WM(wm->sr.cursor, CURSOR_SR));
1088
1089         if (IS_CHERRYVIEW(dev_priv)) {
1090                 I915_WRITE(DSPFW7_CHV,
1091                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1092                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1093                 I915_WRITE(DSPFW8_CHV,
1094                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1095                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1096                 I915_WRITE(DSPFW9_CHV,
1097                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1098                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1099                 I915_WRITE(DSPHOWM,
1100                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1101                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1102                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1103                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1104                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1105                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1106                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1107                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1108                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1109                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1110         } else {
1111                 I915_WRITE(DSPFW7,
1112                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1113                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1114                 I915_WRITE(DSPHOWM,
1115                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1116                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1117                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1118                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1119                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1120                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1121                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1122         }
1123
1124         POSTING_READ(DSPFW1);
1125 }
1126
1127 #undef FW_WM_VLV
1128
1129 /* latency must be in 0.1us units. */
1130 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1131                                    unsigned int htotal,
1132                                    unsigned int width,
1133                                    unsigned int cpp,
1134                                    unsigned int latency)
1135 {
1136         unsigned int ret;
1137
1138         ret = intel_wm_method2(pixel_rate, htotal,
1139                                width, cpp, latency);
1140         ret = DIV_ROUND_UP(ret, 64);
1141
1142         return ret;
1143 }
1144
1145 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1146 {
1147         /* all latencies in usec */
1148         dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1149
1150         dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1151
1152         if (IS_CHERRYVIEW(dev_priv)) {
1153                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1154                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1155
1156                 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1157         }
1158 }
1159
1160 static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1161                                      const struct intel_plane_state *plane_state,
1162                                      int level)
1163 {
1164         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1165         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1166         const struct drm_display_mode *adjusted_mode =
1167                 &crtc_state->base.adjusted_mode;
1168         int clock, htotal, cpp, width, wm;
1169
1170         if (dev_priv->wm.pri_latency[level] == 0)
1171                 return USHRT_MAX;
1172
1173         if (!intel_wm_plane_visible(crtc_state, plane_state))
1174                 return 0;
1175
1176         cpp = plane_state->base.fb->format->cpp[0];
1177         clock = adjusted_mode->crtc_clock;
1178         htotal = adjusted_mode->crtc_htotal;
1179         width = crtc_state->pipe_src_w;
1180
1181         if (plane->id == PLANE_CURSOR) {
1182                 /*
1183                  * FIXME the formula gives values that are
1184                  * too big for the cursor FIFO, and hence we
1185                  * would never be able to use cursors. For
1186                  * now just hardcode the watermark.
1187                  */
1188                 wm = 63;
1189         } else {
1190                 wm = vlv_wm_method2(clock, htotal, width, cpp,
1191                                     dev_priv->wm.pri_latency[level] * 10);
1192         }
1193
1194         return min_t(int, wm, USHRT_MAX);
1195 }
1196
1197 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1198 {
1199         return (active_planes & (BIT(PLANE_SPRITE0) |
1200                                  BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1201 }
1202
1203 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1204 {
1205         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1206         const struct g4x_pipe_wm *raw =
1207                 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1208         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1209         unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1210         int num_active_planes = hweight32(active_planes);
1211         const int fifo_size = 511;
1212         int fifo_extra, fifo_left = fifo_size;
1213         int sprite0_fifo_extra = 0;
1214         unsigned int total_rate;
1215         enum plane_id plane_id;
1216
1217         /*
1218          * When enabling sprite0 after sprite1 has already been enabled
1219          * we tend to get an underrun unless sprite0 already has some
1220          * FIFO space allcoated. Hence we always allocate at least one
1221          * cacheline for sprite0 whenever sprite1 is enabled.
1222          *
1223          * All other plane enable sequences appear immune to this problem.
1224          */
1225         if (vlv_need_sprite0_fifo_workaround(active_planes))
1226                 sprite0_fifo_extra = 1;
1227
1228         total_rate = raw->plane[PLANE_PRIMARY] +
1229                 raw->plane[PLANE_SPRITE0] +
1230                 raw->plane[PLANE_SPRITE1] +
1231                 sprite0_fifo_extra;
1232
1233         if (total_rate > fifo_size)
1234                 return -EINVAL;
1235
1236         if (total_rate == 0)
1237                 total_rate = 1;
1238
1239         for_each_plane_id_on_crtc(crtc, plane_id) {
1240                 unsigned int rate;
1241
1242                 if ((active_planes & BIT(plane_id)) == 0) {
1243                         fifo_state->plane[plane_id] = 0;
1244                         continue;
1245                 }
1246
1247                 rate = raw->plane[plane_id];
1248                 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1249                 fifo_left -= fifo_state->plane[plane_id];
1250         }
1251
1252         fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1253         fifo_left -= sprite0_fifo_extra;
1254
1255         fifo_state->plane[PLANE_CURSOR] = 63;
1256
1257         fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1258
1259         /* spread the remainder evenly */
1260         for_each_plane_id_on_crtc(crtc, plane_id) {
1261                 int plane_extra;
1262
1263                 if (fifo_left == 0)
1264                         break;
1265
1266                 if ((active_planes & BIT(plane_id)) == 0)
1267                         continue;
1268
1269                 plane_extra = min(fifo_extra, fifo_left);
1270                 fifo_state->plane[plane_id] += plane_extra;
1271                 fifo_left -= plane_extra;
1272         }
1273
1274         WARN_ON(active_planes != 0 && fifo_left != 0);
1275
1276         /* give it all to the first plane if none are active */
1277         if (active_planes == 0) {
1278                 WARN_ON(fifo_left != fifo_size);
1279                 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1280         }
1281
1282         return 0;
1283 }
1284
1285 /* mark all levels starting from 'level' as invalid */
1286 static void vlv_invalidate_wms(struct intel_crtc *crtc,
1287                                struct vlv_wm_state *wm_state, int level)
1288 {
1289         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1290
1291         for (; level < intel_wm_num_levels(dev_priv); level++) {
1292                 enum plane_id plane_id;
1293
1294                 for_each_plane_id_on_crtc(crtc, plane_id)
1295                         wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1296
1297                 wm_state->sr[level].cursor = USHRT_MAX;
1298                 wm_state->sr[level].plane = USHRT_MAX;
1299         }
1300 }
1301
1302 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1303 {
1304         if (wm > fifo_size)
1305                 return USHRT_MAX;
1306         else
1307                 return fifo_size - wm;
1308 }
1309
1310 /*
1311  * Starting from 'level' set all higher
1312  * levels to 'value' in the "raw" watermarks.
1313  */
1314 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1315                                  int level, enum plane_id plane_id, u16 value)
1316 {
1317         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1318         int num_levels = intel_wm_num_levels(dev_priv);
1319         bool dirty = false;
1320
1321         for (; level < num_levels; level++) {
1322                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1323
1324                 dirty |= raw->plane[plane_id] != value;
1325                 raw->plane[plane_id] = value;
1326         }
1327
1328         return dirty;
1329 }
1330
1331 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1332                                      const struct intel_plane_state *plane_state)
1333 {
1334         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1335         enum plane_id plane_id = plane->id;
1336         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1337         int level;
1338         bool dirty = false;
1339
1340         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1341                 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1342                 goto out;
1343         }
1344
1345         for (level = 0; level < num_levels; level++) {
1346                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1347                 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1348                 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1349
1350                 if (wm > max_wm)
1351                         break;
1352
1353                 dirty |= raw->plane[plane_id] != wm;
1354                 raw->plane[plane_id] = wm;
1355         }
1356
1357         /* mark all higher levels as invalid */
1358         dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1359
1360 out:
1361         if (dirty)
1362                 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1363                               plane->base.name,
1364                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1365                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1366                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1367
1368         return dirty;
1369 }
1370
1371 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1372                                       enum plane_id plane_id, int level)
1373 {
1374         const struct g4x_pipe_wm *raw =
1375                 &crtc_state->wm.vlv.raw[level];
1376         const struct vlv_fifo_state *fifo_state =
1377                 &crtc_state->wm.vlv.fifo_state;
1378
1379         return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1380 }
1381
1382 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1383 {
1384         return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1385                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1386                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1387                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1388 }
1389
1390 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1391 {
1392         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1393         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1394         struct intel_atomic_state *state =
1395                 to_intel_atomic_state(crtc_state->base.state);
1396         struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1397         const struct vlv_fifo_state *fifo_state =
1398                 &crtc_state->wm.vlv.fifo_state;
1399         int num_active_planes = hweight32(crtc_state->active_planes &
1400                                           ~BIT(PLANE_CURSOR));
1401         bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
1402         struct intel_plane_state *plane_state;
1403         struct intel_plane *plane;
1404         enum plane_id plane_id;
1405         int level, ret, i;
1406         unsigned int dirty = 0;
1407
1408         for_each_intel_plane_in_state(state, plane, plane_state, i) {
1409                 const struct intel_plane_state *old_plane_state =
1410                         to_intel_plane_state(plane->base.state);
1411
1412                 if (plane_state->base.crtc != &crtc->base &&
1413                     old_plane_state->base.crtc != &crtc->base)
1414                         continue;
1415
1416                 if (vlv_raw_plane_wm_compute(crtc_state, plane_state))
1417                         dirty |= BIT(plane->id);
1418         }
1419
1420         /*
1421          * DSPARB registers may have been reset due to the
1422          * power well being turned off. Make sure we restore
1423          * them to a consistent state even if no primary/sprite
1424          * planes are initially active.
1425          */
1426         if (needs_modeset)
1427                 crtc_state->fifo_changed = true;
1428
1429         if (!dirty)
1430                 return 0;
1431
1432         /* cursor changes don't warrant a FIFO recompute */
1433         if (dirty & ~BIT(PLANE_CURSOR)) {
1434                 const struct intel_crtc_state *old_crtc_state =
1435                         to_intel_crtc_state(crtc->base.state);
1436                 const struct vlv_fifo_state *old_fifo_state =
1437                         &old_crtc_state->wm.vlv.fifo_state;
1438
1439                 ret = vlv_compute_fifo(crtc_state);
1440                 if (ret)
1441                         return ret;
1442
1443                 if (needs_modeset ||
1444                     memcmp(old_fifo_state, fifo_state,
1445                            sizeof(*fifo_state)) != 0)
1446                         crtc_state->fifo_changed = true;
1447         }
1448
1449         /* initially allow all levels */
1450         wm_state->num_levels = intel_wm_num_levels(dev_priv);
1451         /*
1452          * Note that enabling cxsr with no primary/sprite planes
1453          * enabled can wedge the pipe. Hence we only allow cxsr
1454          * with exactly one enabled primary/sprite plane.
1455          */
1456         wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1457
1458         for (level = 0; level < wm_state->num_levels; level++) {
1459                 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1460                 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1461
1462                 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1463                         break;
1464
1465                 for_each_plane_id_on_crtc(crtc, plane_id) {
1466                         wm_state->wm[level].plane[plane_id] =
1467                                 vlv_invert_wm_value(raw->plane[plane_id],
1468                                                     fifo_state->plane[plane_id]);
1469                 }
1470
1471                 wm_state->sr[level].plane =
1472                         vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1473                                                  raw->plane[PLANE_SPRITE0],
1474                                                  raw->plane[PLANE_SPRITE1]),
1475                                             sr_fifo_size);
1476
1477                 wm_state->sr[level].cursor =
1478                         vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1479                                             63);
1480         }
1481
1482         if (level == 0)
1483                 return -EINVAL;
1484
1485         /* limit to only levels we can actually handle */
1486         wm_state->num_levels = level;
1487
1488         /* invalidate the higher levels */
1489         vlv_invalidate_wms(crtc, wm_state, level);
1490
1491         return 0;
1492 }
1493
1494 #define VLV_FIFO(plane, value) \
1495         (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1496
1497 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1498                                    struct intel_crtc_state *crtc_state)
1499 {
1500         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1501         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1502         const struct vlv_fifo_state *fifo_state =
1503                 &crtc_state->wm.vlv.fifo_state;
1504         int sprite0_start, sprite1_start, fifo_size;
1505
1506         if (!crtc_state->fifo_changed)
1507                 return;
1508
1509         sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1510         sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1511         fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1512
1513         WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1514         WARN_ON(fifo_size != 511);
1515
1516         trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1517
1518         /*
1519          * uncore.lock serves a double purpose here. It allows us to
1520          * use the less expensive I915_{READ,WRITE}_FW() functions, and
1521          * it protects the DSPARB registers from getting clobbered by
1522          * parallel updates from multiple pipes.
1523          *
1524          * intel_pipe_update_start() has already disabled interrupts
1525          * for us, so a plain spin_lock() is sufficient here.
1526          */
1527         spin_lock(&dev_priv->uncore.lock);
1528
1529         switch (crtc->pipe) {
1530                 uint32_t dsparb, dsparb2, dsparb3;
1531         case PIPE_A:
1532                 dsparb = I915_READ_FW(DSPARB);
1533                 dsparb2 = I915_READ_FW(DSPARB2);
1534
1535                 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1536                             VLV_FIFO(SPRITEB, 0xff));
1537                 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1538                            VLV_FIFO(SPRITEB, sprite1_start));
1539
1540                 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1541                              VLV_FIFO(SPRITEB_HI, 0x1));
1542                 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1543                            VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1544
1545                 I915_WRITE_FW(DSPARB, dsparb);
1546                 I915_WRITE_FW(DSPARB2, dsparb2);
1547                 break;
1548         case PIPE_B:
1549                 dsparb = I915_READ_FW(DSPARB);
1550                 dsparb2 = I915_READ_FW(DSPARB2);
1551
1552                 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1553                             VLV_FIFO(SPRITED, 0xff));
1554                 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1555                            VLV_FIFO(SPRITED, sprite1_start));
1556
1557                 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
1558                              VLV_FIFO(SPRITED_HI, 0xff));
1559                 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
1560                            VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
1561
1562                 I915_WRITE_FW(DSPARB, dsparb);
1563                 I915_WRITE_FW(DSPARB2, dsparb2);
1564                 break;
1565         case PIPE_C:
1566                 dsparb3 = I915_READ_FW(DSPARB3);
1567                 dsparb2 = I915_READ_FW(DSPARB2);
1568
1569                 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
1570                              VLV_FIFO(SPRITEF, 0xff));
1571                 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
1572                             VLV_FIFO(SPRITEF, sprite1_start));
1573
1574                 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
1575                              VLV_FIFO(SPRITEF_HI, 0xff));
1576                 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
1577                            VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
1578
1579                 I915_WRITE_FW(DSPARB3, dsparb3);
1580                 I915_WRITE_FW(DSPARB2, dsparb2);
1581                 break;
1582         default:
1583                 break;
1584         }
1585
1586         POSTING_READ_FW(DSPARB);
1587
1588         spin_unlock(&dev_priv->uncore.lock);
1589 }
1590
1591 #undef VLV_FIFO
1592
1593 static int vlv_compute_intermediate_wm(struct drm_device *dev,
1594                                        struct intel_crtc *crtc,
1595                                        struct intel_crtc_state *crtc_state)
1596 {
1597         struct vlv_wm_state *intermediate = &crtc_state->wm.vlv.intermediate;
1598         const struct vlv_wm_state *optimal = &crtc_state->wm.vlv.optimal;
1599         const struct vlv_wm_state *active = &crtc->wm.active.vlv;
1600         int level;
1601
1602         intermediate->num_levels = min(optimal->num_levels, active->num_levels);
1603         intermediate->cxsr = optimal->cxsr && active->cxsr &&
1604                 !crtc_state->disable_cxsr;
1605
1606         for (level = 0; level < intermediate->num_levels; level++) {
1607                 enum plane_id plane_id;
1608
1609                 for_each_plane_id_on_crtc(crtc, plane_id) {
1610                         intermediate->wm[level].plane[plane_id] =
1611                                 min(optimal->wm[level].plane[plane_id],
1612                                     active->wm[level].plane[plane_id]);
1613                 }
1614
1615                 intermediate->sr[level].plane = min(optimal->sr[level].plane,
1616                                                     active->sr[level].plane);
1617                 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
1618                                                      active->sr[level].cursor);
1619         }
1620
1621         vlv_invalidate_wms(crtc, intermediate, level);
1622
1623         /*
1624          * If our intermediate WM are identical to the final WM, then we can
1625          * omit the post-vblank programming; only update if it's different.
1626          */
1627         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1628                 crtc_state->wm.need_postvbl_update = true;
1629
1630         return 0;
1631 }
1632
1633 static void vlv_merge_wm(struct drm_i915_private *dev_priv,
1634                          struct vlv_wm_values *wm)
1635 {
1636         struct intel_crtc *crtc;
1637         int num_active_crtcs = 0;
1638
1639         wm->level = dev_priv->wm.max_level;
1640         wm->cxsr = true;
1641
1642         for_each_intel_crtc(&dev_priv->drm, crtc) {
1643                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
1644
1645                 if (!crtc->active)
1646                         continue;
1647
1648                 if (!wm_state->cxsr)
1649                         wm->cxsr = false;
1650
1651                 num_active_crtcs++;
1652                 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
1653         }
1654
1655         if (num_active_crtcs != 1)
1656                 wm->cxsr = false;
1657
1658         if (num_active_crtcs > 1)
1659                 wm->level = VLV_WM_LEVEL_PM2;
1660
1661         for_each_intel_crtc(&dev_priv->drm, crtc) {
1662                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
1663                 enum pipe pipe = crtc->pipe;
1664
1665                 wm->pipe[pipe] = wm_state->wm[wm->level];
1666                 if (crtc->active && wm->cxsr)
1667                         wm->sr = wm_state->sr[wm->level];
1668
1669                 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
1670                 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
1671                 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
1672                 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
1673         }
1674 }
1675
1676 static bool is_disabling(int old, int new, int threshold)
1677 {
1678         return old >= threshold && new < threshold;
1679 }
1680
1681 static bool is_enabling(int old, int new, int threshold)
1682 {
1683         return old < threshold && new >= threshold;
1684 }
1685
1686 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
1687 {
1688         struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
1689         struct vlv_wm_values new_wm = {};
1690
1691         vlv_merge_wm(dev_priv, &new_wm);
1692
1693         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1694                 return;
1695
1696         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
1697                 chv_set_memory_dvfs(dev_priv, false);
1698
1699         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
1700                 chv_set_memory_pm5(dev_priv, false);
1701
1702         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1703                 _intel_set_memory_cxsr(dev_priv, false);
1704
1705         vlv_write_wm_values(dev_priv, &new_wm);
1706
1707         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1708                 _intel_set_memory_cxsr(dev_priv, true);
1709
1710         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
1711                 chv_set_memory_pm5(dev_priv, true);
1712
1713         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
1714                 chv_set_memory_dvfs(dev_priv, true);
1715
1716         *old_wm = new_wm;
1717 }
1718
1719 static void vlv_initial_watermarks(struct intel_atomic_state *state,
1720                                    struct intel_crtc_state *crtc_state)
1721 {
1722         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1723         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1724
1725         mutex_lock(&dev_priv->wm.wm_mutex);
1726         crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
1727         vlv_program_watermarks(dev_priv);
1728         mutex_unlock(&dev_priv->wm.wm_mutex);
1729 }
1730
1731 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
1732                                     struct intel_crtc_state *crtc_state)
1733 {
1734         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1735         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1736
1737         if (!crtc_state->wm.need_postvbl_update)
1738                 return;
1739
1740         mutex_lock(&dev_priv->wm.wm_mutex);
1741         intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
1742         vlv_program_watermarks(dev_priv);
1743         mutex_unlock(&dev_priv->wm.wm_mutex);
1744 }
1745
1746 #define single_plane_enabled(mask) is_power_of_2(mask)
1747
1748 static void g4x_update_wm(struct intel_crtc *crtc)
1749 {
1750         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1751         static const int sr_latency_ns = 12000;
1752         int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1753         int plane_sr, cursor_sr;
1754         unsigned int enabled = 0;
1755         bool cxsr_enabled;
1756
1757         if (g4x_compute_wm0(dev_priv, PIPE_A,
1758                             &g4x_wm_info, pessimal_latency_ns,
1759                             &g4x_cursor_wm_info, pessimal_latency_ns,
1760                             &planea_wm, &cursora_wm))
1761                 enabled |= 1 << PIPE_A;
1762
1763         if (g4x_compute_wm0(dev_priv, PIPE_B,
1764                             &g4x_wm_info, pessimal_latency_ns,
1765                             &g4x_cursor_wm_info, pessimal_latency_ns,
1766                             &planeb_wm, &cursorb_wm))
1767                 enabled |= 1 << PIPE_B;
1768
1769         if (single_plane_enabled(enabled) &&
1770             g4x_compute_srwm(dev_priv, ffs(enabled) - 1,
1771                              sr_latency_ns,
1772                              &g4x_wm_info,
1773                              &g4x_cursor_wm_info,
1774                              &plane_sr, &cursor_sr)) {
1775                 cxsr_enabled = true;
1776         } else {
1777                 cxsr_enabled = false;
1778                 intel_set_memory_cxsr(dev_priv, false);
1779                 plane_sr = cursor_sr = 0;
1780         }
1781
1782         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
1783                       "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1784                       planea_wm, cursora_wm,
1785                       planeb_wm, cursorb_wm,
1786                       plane_sr, cursor_sr);
1787
1788         I915_WRITE(DSPFW1,
1789                    FW_WM(plane_sr, SR) |
1790                    FW_WM(cursorb_wm, CURSORB) |
1791                    FW_WM(planeb_wm, PLANEB) |
1792                    FW_WM(planea_wm, PLANEA));
1793         I915_WRITE(DSPFW2,
1794                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1795                    FW_WM(cursora_wm, CURSORA));
1796         /* HPLL off in SR has some issues on G4x... disable it */
1797         I915_WRITE(DSPFW3,
1798                    (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1799                    FW_WM(cursor_sr, CURSOR_SR));
1800
1801         if (cxsr_enabled)
1802                 intel_set_memory_cxsr(dev_priv, true);
1803 }
1804
1805 static void i965_update_wm(struct intel_crtc *unused_crtc)
1806 {
1807         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
1808         struct intel_crtc *crtc;
1809         int srwm = 1;
1810         int cursor_sr = 16;
1811         bool cxsr_enabled;
1812
1813         /* Calc sr entries for one plane configs */
1814         crtc = single_enabled_crtc(dev_priv);
1815         if (crtc) {
1816                 /* self-refresh has much higher latency */
1817                 static const int sr_latency_ns = 12000;
1818                 const struct drm_display_mode *adjusted_mode =
1819                         &crtc->config->base.adjusted_mode;
1820                 const struct drm_framebuffer *fb =
1821                         crtc->base.primary->state->fb;
1822                 int clock = adjusted_mode->crtc_clock;
1823                 int htotal = adjusted_mode->crtc_htotal;
1824                 int hdisplay = crtc->config->pipe_src_w;
1825                 int cpp = fb->format->cpp[0];
1826                 int entries;
1827
1828                 entries = intel_wm_method2(clock, htotal,
1829                                            hdisplay, cpp, sr_latency_ns / 100);
1830                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1831                 srwm = I965_FIFO_SIZE - entries;
1832                 if (srwm < 0)
1833                         srwm = 1;
1834                 srwm &= 0x1ff;
1835                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1836                               entries, srwm);
1837
1838                 entries = intel_wm_method2(clock, htotal,
1839                                            crtc->base.cursor->state->crtc_w, 4,
1840                                            sr_latency_ns / 100);
1841                 entries = DIV_ROUND_UP(entries,
1842                                        i965_cursor_wm_info.cacheline_size) +
1843                         i965_cursor_wm_info.guard_size;
1844
1845                 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
1846                 if (cursor_sr > i965_cursor_wm_info.max_wm)
1847                         cursor_sr = i965_cursor_wm_info.max_wm;
1848
1849                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1850                               "cursor %d\n", srwm, cursor_sr);
1851
1852                 cxsr_enabled = true;
1853         } else {
1854                 cxsr_enabled = false;
1855                 /* Turn off self refresh if both pipes are enabled */
1856                 intel_set_memory_cxsr(dev_priv, false);
1857         }
1858
1859         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1860                       srwm);
1861
1862         /* 965 has limitations... */
1863         I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
1864                    FW_WM(8, CURSORB) |
1865                    FW_WM(8, PLANEB) |
1866                    FW_WM(8, PLANEA));
1867         I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
1868                    FW_WM(8, PLANEC_OLD));
1869         /* update cursor SR watermark */
1870         I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
1871
1872         if (cxsr_enabled)
1873                 intel_set_memory_cxsr(dev_priv, true);
1874 }
1875
1876 #undef FW_WM
1877
1878 static void i9xx_update_wm(struct intel_crtc *unused_crtc)
1879 {
1880         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
1881         const struct intel_watermark_params *wm_info;
1882         uint32_t fwater_lo;
1883         uint32_t fwater_hi;
1884         int cwm, srwm = 1;
1885         int fifo_size;
1886         int planea_wm, planeb_wm;
1887         struct intel_crtc *crtc, *enabled = NULL;
1888
1889         if (IS_I945GM(dev_priv))
1890                 wm_info = &i945_wm_info;
1891         else if (!IS_GEN2(dev_priv))
1892                 wm_info = &i915_wm_info;
1893         else
1894                 wm_info = &i830_a_wm_info;
1895
1896         fifo_size = dev_priv->display.get_fifo_size(dev_priv, 0);
1897         crtc = intel_get_crtc_for_plane(dev_priv, 0);
1898         if (intel_crtc_active(crtc)) {
1899                 const struct drm_display_mode *adjusted_mode =
1900                         &crtc->config->base.adjusted_mode;
1901                 const struct drm_framebuffer *fb =
1902                         crtc->base.primary->state->fb;
1903                 int cpp;
1904
1905                 if (IS_GEN2(dev_priv))
1906                         cpp = 4;
1907                 else
1908                         cpp = fb->format->cpp[0];
1909
1910                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1911                                                wm_info, fifo_size, cpp,
1912                                                pessimal_latency_ns);
1913                 enabled = crtc;
1914         } else {
1915                 planea_wm = fifo_size - wm_info->guard_size;
1916                 if (planea_wm > (long)wm_info->max_wm)
1917                         planea_wm = wm_info->max_wm;
1918         }
1919
1920         if (IS_GEN2(dev_priv))
1921                 wm_info = &i830_bc_wm_info;
1922
1923         fifo_size = dev_priv->display.get_fifo_size(dev_priv, 1);
1924         crtc = intel_get_crtc_for_plane(dev_priv, 1);
1925         if (intel_crtc_active(crtc)) {
1926                 const struct drm_display_mode *adjusted_mode =
1927                         &crtc->config->base.adjusted_mode;
1928                 const struct drm_framebuffer *fb =
1929                         crtc->base.primary->state->fb;
1930                 int cpp;
1931
1932                 if (IS_GEN2(dev_priv))
1933                         cpp = 4;
1934                 else
1935                         cpp = fb->format->cpp[0];
1936
1937                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1938                                                wm_info, fifo_size, cpp,
1939                                                pessimal_latency_ns);
1940                 if (enabled == NULL)
1941                         enabled = crtc;
1942                 else
1943                         enabled = NULL;
1944         } else {
1945                 planeb_wm = fifo_size - wm_info->guard_size;
1946                 if (planeb_wm > (long)wm_info->max_wm)
1947                         planeb_wm = wm_info->max_wm;
1948         }
1949
1950         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1951
1952         if (IS_I915GM(dev_priv) && enabled) {
1953                 struct drm_i915_gem_object *obj;
1954
1955                 obj = intel_fb_obj(enabled->base.primary->state->fb);
1956
1957                 /* self-refresh seems busted with untiled */
1958                 if (!i915_gem_object_is_tiled(obj))
1959                         enabled = NULL;
1960         }
1961
1962         /*
1963          * Overlay gets an aggressive default since video jitter is bad.
1964          */
1965         cwm = 2;
1966
1967         /* Play safe and disable self-refresh before adjusting watermarks. */
1968         intel_set_memory_cxsr(dev_priv, false);
1969
1970         /* Calc sr entries for one plane configs */
1971         if (HAS_FW_BLC(dev_priv) && enabled) {
1972                 /* self-refresh has much higher latency */
1973                 static const int sr_latency_ns = 6000;
1974                 const struct drm_display_mode *adjusted_mode =
1975                         &enabled->config->base.adjusted_mode;
1976                 const struct drm_framebuffer *fb =
1977                         enabled->base.primary->state->fb;
1978                 int clock = adjusted_mode->crtc_clock;
1979                 int htotal = adjusted_mode->crtc_htotal;
1980                 int hdisplay = enabled->config->pipe_src_w;
1981                 int cpp;
1982                 int entries;
1983
1984                 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
1985                         cpp = 4;
1986                 else
1987                         cpp = fb->format->cpp[0];
1988
1989                 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
1990                                            sr_latency_ns / 100);
1991                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1992                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1993                 srwm = wm_info->fifo_size - entries;
1994                 if (srwm < 0)
1995                         srwm = 1;
1996
1997                 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
1998                         I915_WRITE(FW_BLC_SELF,
1999                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2000                 else
2001                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2002         }
2003
2004         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2005                       planea_wm, planeb_wm, cwm, srwm);
2006
2007         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2008         fwater_hi = (cwm & 0x1f);
2009
2010         /* Set request length to 8 cachelines per fetch */
2011         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2012         fwater_hi = fwater_hi | (1 << 8);
2013
2014         I915_WRITE(FW_BLC, fwater_lo);
2015         I915_WRITE(FW_BLC2, fwater_hi);
2016
2017         if (enabled)
2018                 intel_set_memory_cxsr(dev_priv, true);
2019 }
2020
2021 static void i845_update_wm(struct intel_crtc *unused_crtc)
2022 {
2023         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2024         struct intel_crtc *crtc;
2025         const struct drm_display_mode *adjusted_mode;
2026         uint32_t fwater_lo;
2027         int planea_wm;
2028
2029         crtc = single_enabled_crtc(dev_priv);
2030         if (crtc == NULL)
2031                 return;
2032
2033         adjusted_mode = &crtc->config->base.adjusted_mode;
2034         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2035                                        &i845_wm_info,
2036                                        dev_priv->display.get_fifo_size(dev_priv, 0),
2037                                        4, pessimal_latency_ns);
2038         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2039         fwater_lo |= (3<<8) | planea_wm;
2040
2041         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2042
2043         I915_WRITE(FW_BLC, fwater_lo);
2044 }
2045
2046 /* latency must be in 0.1us units. */
2047 static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2048                                    unsigned int cpp,
2049                                    unsigned int latency)
2050 {
2051         unsigned int ret;
2052
2053         ret = intel_wm_method1(pixel_rate, cpp, latency);
2054         ret = DIV_ROUND_UP(ret, 64) + 2;
2055
2056         return ret;
2057 }
2058
2059 /* latency must be in 0.1us units. */
2060 static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2061                                    unsigned int htotal,
2062                                    unsigned int width,
2063                                    unsigned int cpp,
2064                                    unsigned int latency)
2065 {
2066         unsigned int ret;
2067
2068         ret = intel_wm_method2(pixel_rate, htotal,
2069                                width, cpp, latency);
2070         ret = DIV_ROUND_UP(ret, 64) + 2;
2071
2072         return ret;
2073 }
2074
2075 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
2076                            uint8_t cpp)
2077 {
2078         /*
2079          * Neither of these should be possible since this function shouldn't be
2080          * called if the CRTC is off or the plane is invisible.  But let's be
2081          * extra paranoid to avoid a potential divide-by-zero if we screw up
2082          * elsewhere in the driver.
2083          */
2084         if (WARN_ON(!cpp))
2085                 return 0;
2086         if (WARN_ON(!horiz_pixels))
2087                 return 0;
2088
2089         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2090 }
2091
2092 struct ilk_wm_maximums {
2093         uint16_t pri;
2094         uint16_t spr;
2095         uint16_t cur;
2096         uint16_t fbc;
2097 };
2098
2099 /*
2100  * For both WM_PIPE and WM_LP.
2101  * mem_value must be in 0.1us units.
2102  */
2103 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
2104                                    const struct intel_plane_state *pstate,
2105                                    uint32_t mem_value,
2106                                    bool is_lp)
2107 {
2108         uint32_t method1, method2;
2109         int cpp;
2110
2111         if (!intel_wm_plane_visible(cstate, pstate))
2112                 return 0;
2113
2114         cpp = pstate->base.fb->format->cpp[0];
2115
2116         method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2117
2118         if (!is_lp)
2119                 return method1;
2120
2121         method2 = ilk_wm_method2(cstate->pixel_rate,
2122                                  cstate->base.adjusted_mode.crtc_htotal,
2123                                  drm_rect_width(&pstate->base.dst),
2124                                  cpp, mem_value);
2125
2126         return min(method1, method2);
2127 }
2128
2129 /*
2130  * For both WM_PIPE and WM_LP.
2131  * mem_value must be in 0.1us units.
2132  */
2133 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
2134                                    const struct intel_plane_state *pstate,
2135                                    uint32_t mem_value)
2136 {
2137         uint32_t method1, method2;
2138         int cpp;
2139
2140         if (!intel_wm_plane_visible(cstate, pstate))
2141                 return 0;
2142
2143         cpp = pstate->base.fb->format->cpp[0];
2144
2145         method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2146         method2 = ilk_wm_method2(cstate->pixel_rate,
2147                                  cstate->base.adjusted_mode.crtc_htotal,
2148                                  drm_rect_width(&pstate->base.dst),
2149                                  cpp, mem_value);
2150         return min(method1, method2);
2151 }
2152
2153 /*
2154  * For both WM_PIPE and WM_LP.
2155  * mem_value must be in 0.1us units.
2156  */
2157 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
2158                                    const struct intel_plane_state *pstate,
2159                                    uint32_t mem_value)
2160 {
2161         int cpp;
2162
2163         if (!intel_wm_plane_visible(cstate, pstate))
2164                 return 0;
2165
2166         cpp = pstate->base.fb->format->cpp[0];
2167
2168         return ilk_wm_method2(cstate->pixel_rate,
2169                               cstate->base.adjusted_mode.crtc_htotal,
2170                               pstate->base.crtc_w, cpp, mem_value);
2171 }
2172
2173 /* Only for WM_LP. */
2174 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
2175                                    const struct intel_plane_state *pstate,
2176                                    uint32_t pri_val)
2177 {
2178         int cpp;
2179
2180         if (!intel_wm_plane_visible(cstate, pstate))
2181                 return 0;
2182
2183         cpp = pstate->base.fb->format->cpp[0];
2184
2185         return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
2186 }
2187
2188 static unsigned int
2189 ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2190 {
2191         if (INTEL_GEN(dev_priv) >= 8)
2192                 return 3072;
2193         else if (INTEL_GEN(dev_priv) >= 7)
2194                 return 768;
2195         else
2196                 return 512;
2197 }
2198
2199 static unsigned int
2200 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2201                      int level, bool is_sprite)
2202 {
2203         if (INTEL_GEN(dev_priv) >= 8)
2204                 /* BDW primary/sprite plane watermarks */
2205                 return level == 0 ? 255 : 2047;
2206         else if (INTEL_GEN(dev_priv) >= 7)
2207                 /* IVB/HSW primary/sprite plane watermarks */
2208                 return level == 0 ? 127 : 1023;
2209         else if (!is_sprite)
2210                 /* ILK/SNB primary plane watermarks */
2211                 return level == 0 ? 127 : 511;
2212         else
2213                 /* ILK/SNB sprite plane watermarks */
2214                 return level == 0 ? 63 : 255;
2215 }
2216
2217 static unsigned int
2218 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2219 {
2220         if (INTEL_GEN(dev_priv) >= 7)
2221                 return level == 0 ? 63 : 255;
2222         else
2223                 return level == 0 ? 31 : 63;
2224 }
2225
2226 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2227 {
2228         if (INTEL_GEN(dev_priv) >= 8)
2229                 return 31;
2230         else
2231                 return 15;
2232 }
2233
2234 /* Calculate the maximum primary/sprite plane watermark */
2235 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
2236                                      int level,
2237                                      const struct intel_wm_config *config,
2238                                      enum intel_ddb_partitioning ddb_partitioning,
2239                                      bool is_sprite)
2240 {
2241         struct drm_i915_private *dev_priv = to_i915(dev);
2242         unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2243
2244         /* if sprites aren't enabled, sprites get nothing */
2245         if (is_sprite && !config->sprites_enabled)
2246                 return 0;
2247
2248         /* HSW allows LP1+ watermarks even with multiple pipes */
2249         if (level == 0 || config->num_pipes_active > 1) {
2250                 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
2251
2252                 /*
2253                  * For some reason the non self refresh
2254                  * FIFO size is only half of the self
2255                  * refresh FIFO size on ILK/SNB.
2256                  */
2257                 if (INTEL_GEN(dev_priv) <= 6)
2258                         fifo_size /= 2;
2259         }
2260
2261         if (config->sprites_enabled) {
2262                 /* level 0 is always calculated with 1:1 split */
2263                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2264                         if (is_sprite)
2265                                 fifo_size *= 5;
2266                         fifo_size /= 6;
2267                 } else {
2268                         fifo_size /= 2;
2269                 }
2270         }
2271
2272         /* clamp to max that the registers can hold */
2273         return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2274 }
2275
2276 /* Calculate the maximum cursor plane watermark */
2277 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
2278                                       int level,
2279                                       const struct intel_wm_config *config)
2280 {
2281         /* HSW LP1+ watermarks w/ multiple pipes */
2282         if (level > 0 && config->num_pipes_active > 1)
2283                 return 64;
2284
2285         /* otherwise just report max that registers can hold */
2286         return ilk_cursor_wm_reg_max(to_i915(dev), level);
2287 }
2288
2289 static void ilk_compute_wm_maximums(const struct drm_device *dev,
2290                                     int level,
2291                                     const struct intel_wm_config *config,
2292                                     enum intel_ddb_partitioning ddb_partitioning,
2293                                     struct ilk_wm_maximums *max)
2294 {
2295         max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
2296         max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
2297         max->cur = ilk_cursor_wm_max(dev, level, config);
2298         max->fbc = ilk_fbc_wm_reg_max(to_i915(dev));
2299 }
2300
2301 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2302                                         int level,
2303                                         struct ilk_wm_maximums *max)
2304 {
2305         max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2306         max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2307         max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2308         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2309 }
2310
2311 static bool ilk_validate_wm_level(int level,
2312                                   const struct ilk_wm_maximums *max,
2313                                   struct intel_wm_level *result)
2314 {
2315         bool ret;
2316
2317         /* already determined to be invalid? */
2318         if (!result->enable)
2319                 return false;
2320
2321         result->enable = result->pri_val <= max->pri &&
2322                          result->spr_val <= max->spr &&
2323                          result->cur_val <= max->cur;
2324
2325         ret = result->enable;
2326
2327         /*
2328          * HACK until we can pre-compute everything,
2329          * and thus fail gracefully if LP0 watermarks
2330          * are exceeded...
2331          */
2332         if (level == 0 && !result->enable) {
2333                 if (result->pri_val > max->pri)
2334                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2335                                       level, result->pri_val, max->pri);
2336                 if (result->spr_val > max->spr)
2337                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2338                                       level, result->spr_val, max->spr);
2339                 if (result->cur_val > max->cur)
2340                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2341                                       level, result->cur_val, max->cur);
2342
2343                 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
2344                 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
2345                 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
2346                 result->enable = true;
2347         }
2348
2349         return ret;
2350 }
2351
2352 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2353                                  const struct intel_crtc *intel_crtc,
2354                                  int level,
2355                                  struct intel_crtc_state *cstate,
2356                                  struct intel_plane_state *pristate,
2357                                  struct intel_plane_state *sprstate,
2358                                  struct intel_plane_state *curstate,
2359                                  struct intel_wm_level *result)
2360 {
2361         uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2362         uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2363         uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2364
2365         /* WM1+ latency values stored in 0.5us units */
2366         if (level > 0) {
2367                 pri_latency *= 5;
2368                 spr_latency *= 5;
2369                 cur_latency *= 5;
2370         }
2371
2372         if (pristate) {
2373                 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2374                                                      pri_latency, level);
2375                 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2376         }
2377
2378         if (sprstate)
2379                 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2380
2381         if (curstate)
2382                 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2383
2384         result->enable = true;
2385 }
2386
2387 static uint32_t
2388 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
2389 {
2390         const struct intel_atomic_state *intel_state =
2391                 to_intel_atomic_state(cstate->base.state);
2392         const struct drm_display_mode *adjusted_mode =
2393                 &cstate->base.adjusted_mode;
2394         u32 linetime, ips_linetime;
2395
2396         if (!cstate->base.active)
2397                 return 0;
2398         if (WARN_ON(adjusted_mode->crtc_clock == 0))
2399                 return 0;
2400         if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
2401                 return 0;
2402
2403         /* The WM are computed with base on how long it takes to fill a single
2404          * row at the given clock rate, multiplied by 8.
2405          * */
2406         linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2407                                      adjusted_mode->crtc_clock);
2408         ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2409                                          intel_state->cdclk.logical.cdclk);
2410
2411         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2412                PIPE_WM_LINETIME_TIME(linetime);
2413 }
2414
2415 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2416                                   uint16_t wm[8])
2417 {
2418         if (IS_GEN9(dev_priv)) {
2419                 uint32_t val;
2420                 int ret, i;
2421                 int level, max_level = ilk_wm_max_level(dev_priv);
2422
2423                 /* read the first set of memory latencies[0:3] */
2424                 val = 0; /* data0 to be programmed to 0 for first set */
2425                 mutex_lock(&dev_priv->rps.hw_lock);
2426                 ret = sandybridge_pcode_read(dev_priv,
2427                                              GEN9_PCODE_READ_MEM_LATENCY,
2428                                              &val);
2429                 mutex_unlock(&dev_priv->rps.hw_lock);
2430
2431                 if (ret) {
2432                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2433                         return;
2434                 }
2435
2436                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2437                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2438                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2439                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2440                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2441                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2442                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2443
2444                 /* read the second set of memory latencies[4:7] */
2445                 val = 1; /* data0 to be programmed to 1 for second set */
2446                 mutex_lock(&dev_priv->rps.hw_lock);
2447                 ret = sandybridge_pcode_read(dev_priv,
2448                                              GEN9_PCODE_READ_MEM_LATENCY,
2449                                              &val);
2450                 mutex_unlock(&dev_priv->rps.hw_lock);
2451                 if (ret) {
2452                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2453                         return;
2454                 }
2455
2456                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2457                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2458                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2459                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2460                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2461                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2462                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2463
2464                 /*
2465                  * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2466                  * need to be disabled. We make sure to sanitize the values out
2467                  * of the punit to satisfy this requirement.
2468                  */
2469                 for (level = 1; level <= max_level; level++) {
2470                         if (wm[level] == 0) {
2471                                 for (i = level + 1; i <= max_level; i++)
2472                                         wm[i] = 0;
2473                                 break;
2474                         }
2475                 }
2476
2477                 /*
2478                  * WaWmMemoryReadLatency:skl,glk
2479                  *
2480                  * punit doesn't take into account the read latency so we need
2481                  * to add 2us to the various latency levels we retrieve from the
2482                  * punit when level 0 response data us 0us.
2483                  */
2484                 if (wm[0] == 0) {
2485                         wm[0] += 2;
2486                         for (level = 1; level <= max_level; level++) {
2487                                 if (wm[level] == 0)
2488                                         break;
2489                                 wm[level] += 2;
2490                         }
2491                 }
2492
2493         } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2494                 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2495
2496                 wm[0] = (sskpd >> 56) & 0xFF;
2497                 if (wm[0] == 0)
2498                         wm[0] = sskpd & 0xF;
2499                 wm[1] = (sskpd >> 4) & 0xFF;
2500                 wm[2] = (sskpd >> 12) & 0xFF;
2501                 wm[3] = (sskpd >> 20) & 0x1FF;
2502                 wm[4] = (sskpd >> 32) & 0x1FF;
2503         } else if (INTEL_GEN(dev_priv) >= 6) {
2504                 uint32_t sskpd = I915_READ(MCH_SSKPD);
2505
2506                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2507                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2508                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2509                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2510         } else if (INTEL_GEN(dev_priv) >= 5) {
2511                 uint32_t mltr = I915_READ(MLTR_ILK);
2512
2513                 /* ILK primary LP0 latency is 700 ns */
2514                 wm[0] = 7;
2515                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2516                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2517         }
2518 }
2519
2520 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2521                                        uint16_t wm[5])
2522 {
2523         /* ILK sprite LP0 latency is 1300 ns */
2524         if (IS_GEN5(dev_priv))
2525                 wm[0] = 13;
2526 }
2527
2528 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2529                                        uint16_t wm[5])
2530 {
2531         /* ILK cursor LP0 latency is 1300 ns */
2532         if (IS_GEN5(dev_priv))
2533                 wm[0] = 13;
2534
2535         /* WaDoubleCursorLP3Latency:ivb */
2536         if (IS_IVYBRIDGE(dev_priv))
2537                 wm[3] *= 2;
2538 }
2539
2540 int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2541 {
2542         /* how many WM levels are we expecting */
2543         if (INTEL_GEN(dev_priv) >= 9)
2544                 return 7;
2545         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2546                 return 4;
2547         else if (INTEL_GEN(dev_priv) >= 6)
2548                 return 3;
2549         else
2550                 return 2;
2551 }
2552
2553 static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2554                                    const char *name,
2555                                    const uint16_t wm[8])
2556 {
2557         int level, max_level = ilk_wm_max_level(dev_priv);
2558
2559         for (level = 0; level <= max_level; level++) {
2560                 unsigned int latency = wm[level];
2561
2562                 if (latency == 0) {
2563                         DRM_ERROR("%s WM%d latency not provided\n",
2564                                   name, level);
2565                         continue;
2566                 }
2567
2568                 /*
2569                  * - latencies are in us on gen9.
2570                  * - before then, WM1+ latency values are in 0.5us units
2571                  */
2572                 if (IS_GEN9(dev_priv))
2573                         latency *= 10;
2574                 else if (level > 0)
2575                         latency *= 5;
2576
2577                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2578                               name, level, wm[level],
2579                               latency / 10, latency % 10);
2580         }
2581 }
2582
2583 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2584                                     uint16_t wm[5], uint16_t min)
2585 {
2586         int level, max_level = ilk_wm_max_level(dev_priv);
2587
2588         if (wm[0] >= min)
2589                 return false;
2590
2591         wm[0] = max(wm[0], min);
2592         for (level = 1; level <= max_level; level++)
2593                 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2594
2595         return true;
2596 }
2597
2598 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
2599 {
2600         bool changed;
2601
2602         /*
2603          * The BIOS provided WM memory latency values are often
2604          * inadequate for high resolution displays. Adjust them.
2605          */
2606         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
2607                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
2608                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
2609
2610         if (!changed)
2611                 return;
2612
2613         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
2614         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
2615         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
2616         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
2617 }
2618
2619 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
2620 {
2621         intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
2622
2623         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
2624                sizeof(dev_priv->wm.pri_latency));
2625         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
2626                sizeof(dev_priv->wm.pri_latency));
2627
2628         intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
2629         intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
2630
2631         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
2632         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
2633         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
2634
2635         if (IS_GEN6(dev_priv))
2636                 snb_wm_latency_quirk(dev_priv);
2637 }
2638
2639 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
2640 {
2641         intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
2642         intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
2643 }
2644
2645 static bool ilk_validate_pipe_wm(struct drm_device *dev,
2646                                  struct intel_pipe_wm *pipe_wm)
2647 {
2648         /* LP0 watermark maximums depend on this pipe alone */
2649         const struct intel_wm_config config = {
2650                 .num_pipes_active = 1,
2651                 .sprites_enabled = pipe_wm->sprites_enabled,
2652                 .sprites_scaled = pipe_wm->sprites_scaled,
2653         };
2654         struct ilk_wm_maximums max;
2655
2656         /* LP0 watermarks always use 1/2 DDB partitioning */
2657         ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
2658
2659         /* At least LP0 must be valid */
2660         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
2661                 DRM_DEBUG_KMS("LP0 watermark invalid\n");
2662                 return false;
2663         }
2664
2665         return true;
2666 }
2667
2668 /* Compute new watermarks for the pipe */
2669 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
2670 {
2671         struct drm_atomic_state *state = cstate->base.state;
2672         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
2673         struct intel_pipe_wm *pipe_wm;
2674         struct drm_device *dev = state->dev;
2675         const struct drm_i915_private *dev_priv = to_i915(dev);
2676         struct intel_plane *intel_plane;
2677         struct intel_plane_state *pristate = NULL;
2678         struct intel_plane_state *sprstate = NULL;
2679         struct intel_plane_state *curstate = NULL;
2680         int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
2681         struct ilk_wm_maximums max;
2682
2683         pipe_wm = &cstate->wm.ilk.optimal;
2684
2685         for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
2686                 struct intel_plane_state *ps;
2687
2688                 ps = intel_atomic_get_existing_plane_state(state,
2689                                                            intel_plane);
2690                 if (!ps)
2691                         continue;
2692
2693                 if (intel_plane->base.type == DRM_PLANE_TYPE_PRIMARY)
2694                         pristate = ps;
2695                 else if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY)
2696                         sprstate = ps;
2697                 else if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR)
2698                         curstate = ps;
2699         }
2700
2701         pipe_wm->pipe_enabled = cstate->base.active;
2702         if (sprstate) {
2703                 pipe_wm->sprites_enabled = sprstate->base.visible;
2704                 pipe_wm->sprites_scaled = sprstate->base.visible &&
2705                         (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
2706                          drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
2707         }
2708
2709         usable_level = max_level;
2710
2711         /* ILK/SNB: LP2+ watermarks only w/o sprites */
2712         if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
2713                 usable_level = 1;
2714
2715         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
2716         if (pipe_wm->sprites_scaled)
2717                 usable_level = 0;
2718
2719         ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
2720                              pristate, sprstate, curstate, &pipe_wm->raw_wm[0]);
2721
2722         memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
2723         pipe_wm->wm[0] = pipe_wm->raw_wm[0];
2724
2725         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2726                 pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
2727
2728         if (!ilk_validate_pipe_wm(dev, pipe_wm))
2729                 return -EINVAL;
2730
2731         ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
2732
2733         for (level = 1; level <= max_level; level++) {
2734                 struct intel_wm_level *wm = &pipe_wm->raw_wm[level];
2735
2736                 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
2737                                      pristate, sprstate, curstate, wm);
2738
2739                 /*
2740                  * Disable any watermark level that exceeds the
2741                  * register maximums since such watermarks are
2742                  * always invalid.
2743                  */
2744                 if (level > usable_level)
2745                         continue;
2746
2747                 if (ilk_validate_wm_level(level, &max, wm))
2748                         pipe_wm->wm[level] = *wm;
2749                 else
2750                         usable_level = level;
2751         }
2752
2753         return 0;
2754 }
2755
2756 /*
2757  * Build a set of 'intermediate' watermark values that satisfy both the old
2758  * state and the new state.  These can be programmed to the hardware
2759  * immediately.
2760  */
2761 static int ilk_compute_intermediate_wm(struct drm_device *dev,
2762                                        struct intel_crtc *intel_crtc,
2763                                        struct intel_crtc_state *newstate)
2764 {
2765         struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
2766         struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk;
2767         int level, max_level = ilk_wm_max_level(to_i915(dev));
2768
2769         /*
2770          * Start with the final, target watermarks, then combine with the
2771          * currently active watermarks to get values that are safe both before
2772          * and after the vblank.
2773          */
2774         *a = newstate->wm.ilk.optimal;
2775         a->pipe_enabled |= b->pipe_enabled;
2776         a->sprites_enabled |= b->sprites_enabled;
2777         a->sprites_scaled |= b->sprites_scaled;
2778
2779         for (level = 0; level <= max_level; level++) {
2780                 struct intel_wm_level *a_wm = &a->wm[level];
2781                 const struct intel_wm_level *b_wm = &b->wm[level];
2782
2783                 a_wm->enable &= b_wm->enable;
2784                 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
2785                 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
2786                 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
2787                 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
2788         }
2789
2790         /*
2791          * We need to make sure that these merged watermark values are
2792          * actually a valid configuration themselves.  If they're not,
2793          * there's no safe way to transition from the old state to
2794          * the new state, so we need to fail the atomic transaction.
2795          */
2796         if (!ilk_validate_pipe_wm(dev, a))
2797                 return -EINVAL;
2798
2799         /*
2800          * If our intermediate WM are identical to the final WM, then we can
2801          * omit the post-vblank programming; only update if it's different.
2802          */
2803         if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
2804                 newstate->wm.need_postvbl_update = true;
2805
2806         return 0;
2807 }
2808
2809 /*
2810  * Merge the watermarks from all active pipes for a specific level.
2811  */
2812 static void ilk_merge_wm_level(struct drm_device *dev,
2813                                int level,
2814                                struct intel_wm_level *ret_wm)
2815 {
2816         const struct intel_crtc *intel_crtc;
2817
2818         ret_wm->enable = true;
2819
2820         for_each_intel_crtc(dev, intel_crtc) {
2821                 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
2822                 const struct intel_wm_level *wm = &active->wm[level];
2823
2824                 if (!active->pipe_enabled)
2825                         continue;
2826
2827                 /*
2828                  * The watermark values may have been used in the past,
2829                  * so we must maintain them in the registers for some
2830                  * time even if the level is now disabled.
2831                  */
2832                 if (!wm->enable)
2833                         ret_wm->enable = false;
2834
2835                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2836                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2837                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2838                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2839         }
2840 }
2841
2842 /*
2843  * Merge all low power watermarks for all active pipes.
2844  */
2845 static void ilk_wm_merge(struct drm_device *dev,
2846                          const struct intel_wm_config *config,
2847                          const struct ilk_wm_maximums *max,
2848                          struct intel_pipe_wm *merged)
2849 {
2850         struct drm_i915_private *dev_priv = to_i915(dev);
2851         int level, max_level = ilk_wm_max_level(dev_priv);
2852         int last_enabled_level = max_level;
2853
2854         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
2855         if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
2856             config->num_pipes_active > 1)
2857                 last_enabled_level = 0;
2858
2859         /* ILK: FBC WM must be disabled always */
2860         merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
2861
2862         /* merge each WM1+ level */
2863         for (level = 1; level <= max_level; level++) {
2864                 struct intel_wm_level *wm = &merged->wm[level];
2865
2866                 ilk_merge_wm_level(dev, level, wm);
2867
2868                 if (level > last_enabled_level)
2869                         wm->enable = false;
2870                 else if (!ilk_validate_wm_level(level, max, wm))
2871                         /* make sure all following levels get disabled */
2872                         last_enabled_level = level - 1;
2873
2874                 /*
2875                  * The spec says it is preferred to disable
2876                  * FBC WMs instead of disabling a WM level.
2877                  */
2878                 if (wm->fbc_val > max->fbc) {
2879                         if (wm->enable)
2880                                 merged->fbc_wm_enabled = false;
2881                         wm->fbc_val = 0;
2882                 }
2883         }
2884
2885         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
2886         /*
2887          * FIXME this is racy. FBC might get enabled later.
2888          * What we should check here is whether FBC can be
2889          * enabled sometime later.
2890          */
2891         if (IS_GEN5(dev_priv) && !merged->fbc_wm_enabled &&
2892             intel_fbc_is_active(dev_priv)) {
2893                 for (level = 2; level <= max_level; level++) {
2894                         struct intel_wm_level *wm = &merged->wm[level];
2895
2896                         wm->enable = false;
2897                 }
2898         }
2899 }
2900
2901 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2902 {
2903         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2904         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2905 }
2906
2907 /* The value we need to program into the WM_LPx latency field */
2908 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
2909 {
2910         struct drm_i915_private *dev_priv = to_i915(dev);
2911
2912         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2913                 return 2 * level;
2914         else
2915                 return dev_priv->wm.pri_latency[level];
2916 }
2917
2918 static void ilk_compute_wm_results(struct drm_device *dev,
2919                                    const struct intel_pipe_wm *merged,
2920                                    enum intel_ddb_partitioning partitioning,
2921                                    struct ilk_wm_values *results)
2922 {
2923         struct drm_i915_private *dev_priv = to_i915(dev);
2924         struct intel_crtc *intel_crtc;
2925         int level, wm_lp;
2926
2927         results->enable_fbc_wm = merged->fbc_wm_enabled;
2928         results->partitioning = partitioning;
2929
2930         /* LP1+ register values */
2931         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2932                 const struct intel_wm_level *r;
2933
2934                 level = ilk_wm_lp_to_level(wm_lp, merged);
2935
2936                 r = &merged->wm[level];
2937
2938                 /*
2939                  * Maintain the watermark values even if the level is
2940                  * disabled. Doing otherwise could cause underruns.
2941                  */
2942                 results->wm_lp[wm_lp - 1] =
2943                         (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
2944                         (r->pri_val << WM1_LP_SR_SHIFT) |
2945                         r->cur_val;
2946
2947                 if (r->enable)
2948                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
2949
2950                 if (INTEL_GEN(dev_priv) >= 8)
2951                         results->wm_lp[wm_lp - 1] |=
2952                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2953                 else
2954                         results->wm_lp[wm_lp - 1] |=
2955                                 r->fbc_val << WM1_LP_FBC_SHIFT;
2956
2957                 /*
2958                  * Always set WM1S_LP_EN when spr_val != 0, even if the
2959                  * level is disabled. Doing otherwise could cause underruns.
2960                  */
2961                 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
2962                         WARN_ON(wm_lp != 1);
2963                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
2964                 } else
2965                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2966         }
2967
2968         /* LP0 register values */
2969         for_each_intel_crtc(dev, intel_crtc) {
2970                 enum pipe pipe = intel_crtc->pipe;
2971                 const struct intel_wm_level *r =
2972                         &intel_crtc->wm.active.ilk.wm[0];
2973
2974                 if (WARN_ON(!r->enable))
2975                         continue;
2976
2977                 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
2978
2979                 results->wm_pipe[pipe] =
2980                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2981                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2982                         r->cur_val;
2983         }
2984 }
2985
2986 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2987  * case both are at the same level. Prefer r1 in case they're the same. */
2988 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
2989                                                   struct intel_pipe_wm *r1,
2990                                                   struct intel_pipe_wm *r2)
2991 {
2992         int level, max_level = ilk_wm_max_level(to_i915(dev));
2993         int level1 = 0, level2 = 0;
2994
2995         for (level = 1; level <= max_level; level++) {
2996                 if (r1->wm[level].enable)
2997                         level1 = level;
2998                 if (r2->wm[level].enable)
2999                         level2 = level;
3000         }
3001
3002         if (level1 == level2) {
3003                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3004                         return r2;
3005                 else
3006                         return r1;
3007         } else if (level1 > level2) {
3008                 return r1;
3009         } else {
3010                 return r2;
3011         }
3012 }
3013
3014 /* dirty bits used to track which watermarks need changes */
3015 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3016 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3017 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3018 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3019 #define WM_DIRTY_FBC (1 << 24)
3020 #define WM_DIRTY_DDB (1 << 25)
3021
3022 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3023                                          const struct ilk_wm_values *old,
3024                                          const struct ilk_wm_values *new)
3025 {
3026         unsigned int dirty = 0;
3027         enum pipe pipe;
3028         int wm_lp;
3029
3030         for_each_pipe(dev_priv, pipe) {
3031                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3032                         dirty |= WM_DIRTY_LINETIME(pipe);
3033                         /* Must disable LP1+ watermarks too */
3034                         dirty |= WM_DIRTY_LP_ALL;
3035                 }
3036
3037                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3038                         dirty |= WM_DIRTY_PIPE(pipe);
3039                         /* Must disable LP1+ watermarks too */
3040                         dirty |= WM_DIRTY_LP_ALL;
3041                 }
3042         }
3043
3044         if (old->enable_fbc_wm != new->enable_fbc_wm) {
3045                 dirty |= WM_DIRTY_FBC;
3046                 /* Must disable LP1+ watermarks too */
3047                 dirty |= WM_DIRTY_LP_ALL;
3048         }
3049
3050         if (old->partitioning != new->partitioning) {
3051                 dirty |= WM_DIRTY_DDB;
3052                 /* Must disable LP1+ watermarks too */
3053                 dirty |= WM_DIRTY_LP_ALL;
3054         }
3055
3056         /* LP1+ watermarks already deemed dirty, no need to continue */
3057         if (dirty & WM_DIRTY_LP_ALL)
3058                 return dirty;
3059
3060         /* Find the lowest numbered LP1+ watermark in need of an update... */
3061         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3062                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3063                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3064                         break;
3065         }
3066
3067         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3068         for (; wm_lp <= 3; wm_lp++)
3069                 dirty |= WM_DIRTY_LP(wm_lp);
3070
3071         return dirty;
3072 }
3073
3074 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3075                                unsigned int dirty)
3076 {
3077         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3078         bool changed = false;
3079
3080         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3081                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3082                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3083                 changed = true;
3084         }
3085         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3086                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3087                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3088                 changed = true;
3089         }
3090         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3091                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3092                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3093                 changed = true;
3094         }
3095
3096         /*
3097          * Don't touch WM1S_LP_EN here.
3098          * Doing so could cause underruns.
3099          */
3100
3101         return changed;
3102 }
3103
3104 /*
3105  * The spec says we shouldn't write when we don't need, because every write
3106  * causes WMs to be re-evaluated, expending some power.
3107  */
3108 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3109                                 struct ilk_wm_values *results)
3110 {
3111         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3112         unsigned int dirty;
3113         uint32_t val;
3114
3115         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3116         if (!dirty)
3117                 return;
3118
3119         _ilk_disable_lp_wm(dev_priv, dirty);
3120
3121         if (dirty & WM_DIRTY_PIPE(PIPE_A))
3122                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3123         if (dirty & WM_DIRTY_PIPE(PIPE_B))
3124                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3125         if (dirty & WM_DIRTY_PIPE(PIPE_C))
3126                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3127
3128         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
3129                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
3130         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
3131                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
3132         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
3133                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3134
3135         if (dirty & WM_DIRTY_DDB) {
3136                 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3137                         val = I915_READ(WM_MISC);
3138                         if (results->partitioning == INTEL_DDB_PART_1_2)
3139                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
3140                         else
3141                                 val |= WM_MISC_DATA_PARTITION_5_6;
3142                         I915_WRITE(WM_MISC, val);
3143                 } else {
3144                         val = I915_READ(DISP_ARB_CTL2);
3145                         if (results->partitioning == INTEL_DDB_PART_1_2)
3146                                 val &= ~DISP_DATA_PARTITION_5_6;
3147                         else
3148                                 val |= DISP_DATA_PARTITION_5_6;
3149                         I915_WRITE(DISP_ARB_CTL2, val);
3150                 }
3151         }
3152
3153         if (dirty & WM_DIRTY_FBC) {
3154                 val = I915_READ(DISP_ARB_CTL);
3155                 if (results->enable_fbc_wm)
3156                         val &= ~DISP_FBC_WM_DIS;
3157                 else
3158                         val |= DISP_FBC_WM_DIS;
3159                 I915_WRITE(DISP_ARB_CTL, val);
3160         }
3161
3162         if (dirty & WM_DIRTY_LP(1) &&
3163             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3164                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3165
3166         if (INTEL_GEN(dev_priv) >= 7) {
3167                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3168                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3169                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3170                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3171         }
3172
3173         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3174                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3175         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3176                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3177         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3178                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3179
3180         dev_priv->wm.hw = *results;
3181 }
3182
3183 bool ilk_disable_lp_wm(struct drm_device *dev)
3184 {
3185         struct drm_i915_private *dev_priv = to_i915(dev);
3186
3187         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3188 }
3189
3190 #define SKL_SAGV_BLOCK_TIME     30 /* µs */
3191
3192 /*
3193  * FIXME: We still don't have the proper code detect if we need to apply the WA,
3194  * so assume we'll always need it in order to avoid underruns.
3195  */
3196 static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
3197 {
3198         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
3199
3200         if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv))
3201                 return true;
3202
3203         return false;
3204 }
3205
3206 static bool
3207 intel_has_sagv(struct drm_i915_private *dev_priv)
3208 {
3209         if (IS_KABYLAKE(dev_priv))
3210                 return true;
3211
3212         if (IS_SKYLAKE(dev_priv) &&
3213             dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED)
3214                 return true;
3215
3216         return false;
3217 }
3218
3219 /*
3220  * SAGV dynamically adjusts the system agent voltage and clock frequencies
3221  * depending on power and performance requirements. The display engine access
3222  * to system memory is blocked during the adjustment time. Because of the
3223  * blocking time, having this enabled can cause full system hangs and/or pipe
3224  * underruns if we don't meet all of the following requirements:
3225  *
3226  *  - <= 1 pipe enabled
3227  *  - All planes can enable watermarks for latencies >= SAGV engine block time
3228  *  - We're not using an interlaced display configuration
3229  */
3230 int
3231 intel_enable_sagv(struct drm_i915_private *dev_priv)
3232 {
3233         int ret;
3234
3235         if (!intel_has_sagv(dev_priv))
3236                 return 0;
3237
3238         if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3239                 return 0;
3240
3241         DRM_DEBUG_KMS("Enabling the SAGV\n");
3242         mutex_lock(&dev_priv->rps.hw_lock);
3243
3244         ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3245                                       GEN9_SAGV_ENABLE);
3246
3247         /* We don't need to wait for the SAGV when enabling */
3248         mutex_unlock(&dev_priv->rps.hw_lock);
3249
3250         /*
3251          * Some skl systems, pre-release machines in particular,
3252          * don't actually have an SAGV.
3253          */
3254         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3255                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3256                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3257                 return 0;
3258         } else if (ret < 0) {
3259                 DRM_ERROR("Failed to enable the SAGV\n");
3260                 return ret;
3261         }
3262
3263         dev_priv->sagv_status = I915_SAGV_ENABLED;
3264         return 0;
3265 }
3266
3267 int
3268 intel_disable_sagv(struct drm_i915_private *dev_priv)
3269 {
3270         int ret;
3271
3272         if (!intel_has_sagv(dev_priv))
3273                 return 0;
3274
3275         if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3276                 return 0;
3277
3278         DRM_DEBUG_KMS("Disabling the SAGV\n");
3279         mutex_lock(&dev_priv->rps.hw_lock);
3280
3281         /* bspec says to keep retrying for at least 1 ms */
3282         ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3283                                 GEN9_SAGV_DISABLE,
3284                                 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3285                                 1);
3286         mutex_unlock(&dev_priv->rps.hw_lock);
3287
3288         /*
3289          * Some skl systems, pre-release machines in particular,
3290          * don't actually have an SAGV.
3291          */
3292         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3293                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3294                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3295                 return 0;
3296         } else if (ret < 0) {
3297                 DRM_ERROR("Failed to disable the SAGV (%d)\n", ret);
3298                 return ret;
3299         }
3300
3301         dev_priv->sagv_status = I915_SAGV_DISABLED;
3302         return 0;
3303 }
3304
3305 bool intel_can_enable_sagv(struct drm_atomic_state *state)
3306 {
3307         struct drm_device *dev = state->dev;
3308         struct drm_i915_private *dev_priv = to_i915(dev);
3309         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3310         struct intel_crtc *crtc;
3311         struct intel_plane *plane;
3312         struct intel_crtc_state *cstate;
3313         enum pipe pipe;
3314         int level, latency;
3315
3316         if (!intel_has_sagv(dev_priv))
3317                 return false;
3318
3319         /*
3320          * SKL workaround: bspec recommends we disable the SAGV when we have
3321          * more then one pipe enabled
3322          *
3323          * If there are no active CRTCs, no additional checks need be performed
3324          */
3325         if (hweight32(intel_state->active_crtcs) == 0)
3326                 return true;
3327         else if (hweight32(intel_state->active_crtcs) > 1)
3328                 return false;
3329
3330         /* Since we're now guaranteed to only have one active CRTC... */
3331         pipe = ffs(intel_state->active_crtcs) - 1;
3332         crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3333         cstate = to_intel_crtc_state(crtc->base.state);
3334
3335         if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3336                 return false;
3337
3338         for_each_intel_plane_on_crtc(dev, crtc, plane) {
3339                 struct skl_plane_wm *wm =
3340                         &cstate->wm.skl.optimal.planes[plane->id];
3341
3342                 /* Skip this plane if it's not enabled */
3343                 if (!wm->wm[0].plane_en)
3344                         continue;
3345
3346                 /* Find the highest enabled wm level for this plane */
3347                 for (level = ilk_wm_max_level(dev_priv);
3348                      !wm->wm[level].plane_en; --level)
3349                      { }
3350
3351                 latency = dev_priv->wm.skl_latency[level];
3352
3353                 if (skl_needs_memory_bw_wa(intel_state) &&
3354                     plane->base.state->fb->modifier ==
3355                     I915_FORMAT_MOD_X_TILED)
3356                         latency += 15;
3357
3358                 /*
3359                  * If any of the planes on this pipe don't enable wm levels
3360                  * that incur memory latencies higher then 30µs we can't enable
3361                  * the SAGV
3362                  */
3363                 if (latency < SKL_SAGV_BLOCK_TIME)
3364                         return false;
3365         }
3366
3367         return true;
3368 }
3369
3370 static void
3371 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
3372                                    const struct intel_crtc_state *cstate,
3373                                    struct skl_ddb_entry *alloc, /* out */
3374                                    int *num_active /* out */)
3375 {
3376         struct drm_atomic_state *state = cstate->base.state;
3377         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3378         struct drm_i915_private *dev_priv = to_i915(dev);
3379         struct drm_crtc *for_crtc = cstate->base.crtc;
3380         unsigned int pipe_size, ddb_size;
3381         int nth_active_pipe;
3382
3383         if (WARN_ON(!state) || !cstate->base.active) {
3384                 alloc->start = 0;
3385                 alloc->end = 0;
3386                 *num_active = hweight32(dev_priv->active_crtcs);
3387                 return;
3388         }
3389
3390         if (intel_state->active_pipe_changes)
3391                 *num_active = hweight32(intel_state->active_crtcs);
3392         else
3393                 *num_active = hweight32(dev_priv->active_crtcs);
3394
3395         ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3396         WARN_ON(ddb_size == 0);
3397
3398         ddb_size -= 4; /* 4 blocks for bypass path allocation */
3399
3400         /*
3401          * If the state doesn't change the active CRTC's, then there's
3402          * no need to recalculate; the existing pipe allocation limits
3403          * should remain unchanged.  Note that we're safe from racing
3404          * commits since any racing commit that changes the active CRTC
3405          * list would need to grab _all_ crtc locks, including the one
3406          * we currently hold.
3407          */
3408         if (!intel_state->active_pipe_changes) {
3409                 /*
3410                  * alloc may be cleared by clear_intel_crtc_state,
3411                  * copy from old state to be sure
3412                  */
3413                 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3414                 return;
3415         }
3416
3417         nth_active_pipe = hweight32(intel_state->active_crtcs &
3418                                     (drm_crtc_mask(for_crtc) - 1));
3419         pipe_size = ddb_size / hweight32(intel_state->active_crtcs);
3420         alloc->start = nth_active_pipe * ddb_size / *num_active;
3421         alloc->end = alloc->start + pipe_size;
3422 }
3423
3424 static unsigned int skl_cursor_allocation(int num_active)
3425 {
3426         if (num_active == 1)
3427                 return 32;
3428
3429         return 8;
3430 }
3431
3432 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
3433 {
3434         entry->start = reg & 0x3ff;
3435         entry->end = (reg >> 16) & 0x3ff;
3436         if (entry->end)
3437                 entry->end += 1;
3438 }
3439
3440 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
3441                           struct skl_ddb_allocation *ddb /* out */)
3442 {
3443         struct intel_crtc *crtc;
3444
3445         memset(ddb, 0, sizeof(*ddb));
3446
3447         for_each_intel_crtc(&dev_priv->drm, crtc) {
3448                 enum intel_display_power_domain power_domain;
3449                 enum plane_id plane_id;
3450                 enum pipe pipe = crtc->pipe;
3451
3452                 power_domain = POWER_DOMAIN_PIPE(pipe);
3453                 if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
3454                         continue;
3455
3456                 for_each_plane_id_on_crtc(crtc, plane_id) {
3457                         u32 val;
3458
3459                         if (plane_id != PLANE_CURSOR)
3460                                 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
3461                         else
3462                                 val = I915_READ(CUR_BUF_CFG(pipe));
3463
3464                         skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val);
3465                 }
3466
3467                 intel_display_power_put(dev_priv, power_domain);
3468         }
3469 }
3470
3471 /*
3472  * Determines the downscale amount of a plane for the purposes of watermark calculations.
3473  * The bspec defines downscale amount as:
3474  *
3475  * """
3476  * Horizontal down scale amount = maximum[1, Horizontal source size /
3477  *                                           Horizontal destination size]
3478  * Vertical down scale amount = maximum[1, Vertical source size /
3479  *                                         Vertical destination size]
3480  * Total down scale amount = Horizontal down scale amount *
3481  *                           Vertical down scale amount
3482  * """
3483  *
3484  * Return value is provided in 16.16 fixed point form to retain fractional part.
3485  * Caller should take care of dividing & rounding off the value.
3486  */
3487 static uint32_t
3488 skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
3489                            const struct intel_plane_state *pstate)
3490 {
3491         struct intel_plane *plane = to_intel_plane(pstate->base.plane);
3492         uint32_t downscale_h, downscale_w;
3493         uint32_t src_w, src_h, dst_w, dst_h;
3494
3495         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
3496                 return DRM_PLANE_HELPER_NO_SCALING;
3497
3498         /* n.b., src is 16.16 fixed point, dst is whole integer */
3499         if (plane->id == PLANE_CURSOR) {
3500                 src_w = pstate->base.src_w;
3501                 src_h = pstate->base.src_h;
3502                 dst_w = pstate->base.crtc_w;
3503                 dst_h = pstate->base.crtc_h;
3504         } else {
3505                 src_w = drm_rect_width(&pstate->base.src);
3506                 src_h = drm_rect_height(&pstate->base.src);
3507                 dst_w = drm_rect_width(&pstate->base.dst);
3508                 dst_h = drm_rect_height(&pstate->base.dst);
3509         }
3510
3511         if (drm_rotation_90_or_270(pstate->base.rotation))
3512                 swap(dst_w, dst_h);
3513
3514         downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
3515         downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
3516
3517         /* Provide result in 16.16 fixed point */
3518         return (uint64_t)downscale_w * downscale_h >> 16;
3519 }
3520
3521 static unsigned int
3522 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
3523                              const struct drm_plane_state *pstate,
3524                              int y)
3525 {
3526         struct intel_plane *plane = to_intel_plane(pstate->plane);
3527         struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
3528         uint32_t down_scale_amount, data_rate;
3529         uint32_t width = 0, height = 0;
3530         struct drm_framebuffer *fb;
3531         u32 format;
3532
3533         if (!intel_pstate->base.visible)
3534                 return 0;
3535
3536         fb = pstate->fb;
3537         format = fb->format->format;
3538
3539         if (plane->id == PLANE_CURSOR)
3540                 return 0;
3541         if (y && format != DRM_FORMAT_NV12)
3542                 return 0;
3543
3544         width = drm_rect_width(&intel_pstate->base.src) >> 16;
3545         height = drm_rect_height(&intel_pstate->base.src) >> 16;
3546
3547         if (drm_rotation_90_or_270(pstate->rotation))
3548                 swap(width, height);
3549
3550         /* for planar format */
3551         if (format == DRM_FORMAT_NV12) {
3552                 if (y)  /* y-plane data rate */
3553                         data_rate = width * height *
3554                                 fb->format->cpp[0];
3555                 else    /* uv-plane data rate */
3556                         data_rate = (width / 2) * (height / 2) *
3557                                 fb->format->cpp[1];
3558         } else {
3559                 /* for packed formats */
3560                 data_rate = width * height * fb->format->cpp[0];
3561         }
3562
3563         down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
3564
3565         return (uint64_t)data_rate * down_scale_amount >> 16;
3566 }
3567
3568 /*
3569  * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
3570  * a 8192x4096@32bpp framebuffer:
3571  *   3 * 4096 * 8192  * 4 < 2^32
3572  */
3573 static unsigned int
3574 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
3575                                  unsigned *plane_data_rate,
3576                                  unsigned *plane_y_data_rate)
3577 {
3578         struct drm_crtc_state *cstate = &intel_cstate->base;
3579         struct drm_atomic_state *state = cstate->state;
3580         struct drm_plane *plane;
3581         const struct drm_plane_state *pstate;
3582         unsigned int total_data_rate = 0;
3583
3584         if (WARN_ON(!state))
3585                 return 0;
3586
3587         /* Calculate and cache data rate for each plane */
3588         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
3589                 enum plane_id plane_id = to_intel_plane(plane)->id;
3590                 unsigned int rate;
3591
3592                 /* packed/uv */
3593                 rate = skl_plane_relative_data_rate(intel_cstate,
3594                                                     pstate, 0);
3595                 plane_data_rate[plane_id] = rate;
3596
3597                 total_data_rate += rate;
3598
3599                 /* y-plane */
3600                 rate = skl_plane_relative_data_rate(intel_cstate,
3601                                                     pstate, 1);
3602                 plane_y_data_rate[plane_id] = rate;
3603
3604                 total_data_rate += rate;
3605         }
3606
3607         return total_data_rate;
3608 }
3609
3610 static uint16_t
3611 skl_ddb_min_alloc(const struct drm_plane_state *pstate,
3612                   const int y)
3613 {
3614         struct drm_framebuffer *fb = pstate->fb;
3615         struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
3616         uint32_t src_w, src_h;
3617         uint32_t min_scanlines = 8;
3618         uint8_t plane_bpp;
3619
3620         if (WARN_ON(!fb))
3621                 return 0;
3622
3623         /* For packed formats, no y-plane, return 0 */
3624         if (y && fb->format->format != DRM_FORMAT_NV12)
3625                 return 0;
3626
3627         /* For Non Y-tile return 8-blocks */
3628         if (fb->modifier != I915_FORMAT_MOD_Y_TILED &&
3629             fb->modifier != I915_FORMAT_MOD_Yf_TILED)
3630                 return 8;
3631
3632         src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
3633         src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
3634
3635         if (drm_rotation_90_or_270(pstate->rotation))
3636                 swap(src_w, src_h);
3637
3638         /* Halve UV plane width and height for NV12 */
3639         if (fb->format->format == DRM_FORMAT_NV12 && !y) {
3640                 src_w /= 2;
3641                 src_h /= 2;
3642         }
3643
3644         if (fb->format->format == DRM_FORMAT_NV12 && !y)
3645                 plane_bpp = fb->format->cpp[1];
3646         else
3647                 plane_bpp = fb->format->cpp[0];
3648
3649         if (drm_rotation_90_or_270(pstate->rotation)) {
3650                 switch (plane_bpp) {
3651                 case 1:
3652                         min_scanlines = 32;
3653                         break;
3654                 case 2:
3655                         min_scanlines = 16;
3656                         break;
3657                 case 4:
3658                         min_scanlines = 8;
3659                         break;
3660                 case 8:
3661                         min_scanlines = 4;
3662                         break;
3663                 default:
3664                         WARN(1, "Unsupported pixel depth %u for rotation",
3665                              plane_bpp);
3666                         min_scanlines = 32;
3667                 }
3668         }
3669
3670         return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3;
3671 }
3672
3673 static void
3674 skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active,
3675                  uint16_t *minimum, uint16_t *y_minimum)
3676 {
3677         const struct drm_plane_state *pstate;
3678         struct drm_plane *plane;
3679
3680         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) {
3681                 enum plane_id plane_id = to_intel_plane(plane)->id;
3682
3683                 if (plane_id == PLANE_CURSOR)
3684                         continue;
3685
3686                 if (!pstate->visible)
3687                         continue;
3688
3689                 minimum[plane_id] = skl_ddb_min_alloc(pstate, 0);
3690                 y_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
3691         }
3692
3693         minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active);
3694 }
3695
3696 static int
3697 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
3698                       struct skl_ddb_allocation *ddb /* out */)
3699 {
3700         struct drm_atomic_state *state = cstate->base.state;
3701         struct drm_crtc *crtc = cstate->base.crtc;
3702         struct drm_device *dev = crtc->dev;
3703         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3704         enum pipe pipe = intel_crtc->pipe;
3705         struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
3706         uint16_t alloc_size, start;
3707         uint16_t minimum[I915_MAX_PLANES] = {};
3708         uint16_t y_minimum[I915_MAX_PLANES] = {};
3709         unsigned int total_data_rate;
3710         enum plane_id plane_id;
3711         int num_active;
3712         unsigned plane_data_rate[I915_MAX_PLANES] = {};
3713         unsigned plane_y_data_rate[I915_MAX_PLANES] = {};
3714
3715         /* Clear the partitioning for disabled planes. */
3716         memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
3717         memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe]));
3718
3719         if (WARN_ON(!state))
3720                 return 0;
3721
3722         if (!cstate->base.active) {
3723                 alloc->start = alloc->end = 0;
3724                 return 0;
3725         }
3726
3727         skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active);
3728         alloc_size = skl_ddb_entry_size(alloc);
3729         if (alloc_size == 0) {
3730                 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
3731                 return 0;
3732         }
3733
3734         skl_ddb_calc_min(cstate, num_active, minimum, y_minimum);
3735
3736         /*
3737          * 1. Allocate the mininum required blocks for each active plane
3738          * and allocate the cursor, it doesn't require extra allocation
3739          * proportional to the data rate.
3740          */
3741
3742         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
3743                 alloc_size -= minimum[plane_id];
3744                 alloc_size -= y_minimum[plane_id];
3745         }
3746
3747         ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
3748         ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
3749
3750         /*
3751          * 2. Distribute the remaining space in proportion to the amount of
3752          * data each plane needs to fetch from memory.
3753          *
3754          * FIXME: we may not allocate every single block here.
3755          */
3756         total_data_rate = skl_get_total_relative_data_rate(cstate,
3757                                                            plane_data_rate,
3758                                                            plane_y_data_rate);
3759         if (total_data_rate == 0)
3760                 return 0;
3761
3762         start = alloc->start;
3763         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
3764                 unsigned int data_rate, y_data_rate;
3765                 uint16_t plane_blocks, y_plane_blocks = 0;
3766
3767                 if (plane_id == PLANE_CURSOR)
3768                         continue;
3769
3770                 data_rate = plane_data_rate[plane_id];
3771
3772                 /*
3773                  * allocation for (packed formats) or (uv-plane part of planar format):
3774                  * promote the expression to 64 bits to avoid overflowing, the
3775                  * result is < available as data_rate / total_data_rate < 1
3776                  */
3777                 plane_blocks = minimum[plane_id];
3778                 plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
3779                                         total_data_rate);
3780
3781                 /* Leave disabled planes at (0,0) */
3782                 if (data_rate) {
3783                         ddb->plane[pipe][plane_id].start = start;
3784                         ddb->plane[pipe][plane_id].end = start + plane_blocks;
3785                 }
3786
3787                 start += plane_blocks;
3788
3789                 /*
3790                  * allocation for y_plane part of planar format:
3791                  */
3792                 y_data_rate = plane_y_data_rate[plane_id];
3793
3794                 y_plane_blocks = y_minimum[plane_id];
3795                 y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
3796                                         total_data_rate);
3797
3798                 if (y_data_rate) {
3799                         ddb->y_plane[pipe][plane_id].start = start;
3800                         ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks;
3801                 }
3802
3803                 start += y_plane_blocks;
3804         }
3805
3806         return 0;
3807 }
3808
3809 /*
3810  * The max latency should be 257 (max the punit can code is 255 and we add 2us
3811  * for the read latency) and cpp should always be <= 8, so that
3812  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
3813  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
3814 */
3815 static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp,
3816                                          uint32_t latency)
3817 {
3818         uint32_t wm_intermediate_val;
3819         uint_fixed_16_16_t ret;
3820
3821         if (latency == 0)
3822                 return FP_16_16_MAX;
3823
3824         wm_intermediate_val = latency * pixel_rate * cpp;
3825         ret = fixed_16_16_div_round_up_u64(wm_intermediate_val, 1000 * 512);
3826         return ret;
3827 }
3828
3829 static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
3830                         uint32_t pipe_htotal,
3831                         uint32_t latency,
3832                         uint_fixed_16_16_t plane_blocks_per_line)
3833 {
3834         uint32_t wm_intermediate_val;
3835         uint_fixed_16_16_t ret;
3836
3837         if (latency == 0)
3838                 return FP_16_16_MAX;
3839
3840         wm_intermediate_val = latency * pixel_rate;
3841         wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
3842                                            pipe_htotal * 1000);
3843         ret = mul_u32_fixed_16_16(wm_intermediate_val, plane_blocks_per_line);
3844         return ret;
3845 }
3846
3847 static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
3848                                               struct intel_plane_state *pstate)
3849 {
3850         uint64_t adjusted_pixel_rate;
3851         uint64_t downscale_amount;
3852         uint64_t pixel_rate;
3853
3854         /* Shouldn't reach here on disabled planes... */
3855         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
3856                 return 0;
3857
3858         /*
3859          * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
3860          * with additional adjustments for plane-specific scaling.
3861          */
3862         adjusted_pixel_rate = cstate->pixel_rate;
3863         downscale_amount = skl_plane_downscale_amount(cstate, pstate);
3864
3865         pixel_rate = adjusted_pixel_rate * downscale_amount >> 16;
3866         WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0));
3867
3868         return pixel_rate;
3869 }
3870
3871 static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3872                                 struct intel_crtc_state *cstate,
3873                                 struct intel_plane_state *intel_pstate,
3874                                 uint16_t ddb_allocation,
3875                                 int level,
3876                                 uint16_t *out_blocks, /* out */
3877                                 uint8_t *out_lines, /* out */
3878                                 bool *enabled /* out */)
3879 {
3880         struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
3881         struct drm_plane_state *pstate = &intel_pstate->base;
3882         struct drm_framebuffer *fb = pstate->fb;
3883         uint32_t latency = dev_priv->wm.skl_latency[level];
3884         uint_fixed_16_16_t method1, method2;
3885         uint_fixed_16_16_t plane_blocks_per_line;
3886         uint_fixed_16_16_t selected_result;
3887         uint32_t interm_pbpl;
3888         uint32_t plane_bytes_per_line;
3889         uint32_t res_blocks, res_lines;
3890         uint8_t cpp;
3891         uint32_t width = 0, height = 0;
3892         uint32_t plane_pixel_rate;
3893         uint_fixed_16_16_t y_tile_minimum;
3894         uint32_t y_min_scanlines;
3895         struct intel_atomic_state *state =
3896                 to_intel_atomic_state(cstate->base.state);
3897         bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
3898         bool y_tiled, x_tiled;
3899
3900         if (latency == 0 ||
3901             !intel_wm_plane_visible(cstate, intel_pstate)) {
3902                 *enabled = false;
3903                 return 0;
3904         }
3905
3906         y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
3907                   fb->modifier == I915_FORMAT_MOD_Yf_TILED;
3908         x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
3909
3910         /* Display WA #1141: kbl. */
3911         if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
3912                 latency += 4;
3913
3914         if (apply_memory_bw_wa && x_tiled)
3915                 latency += 15;
3916
3917         if (plane->id == PLANE_CURSOR) {
3918                 width = intel_pstate->base.crtc_w;
3919                 height = intel_pstate->base.crtc_h;
3920         } else {
3921                 width = drm_rect_width(&intel_pstate->base.src) >> 16;
3922                 height = drm_rect_height(&intel_pstate->base.src) >> 16;
3923         }
3924
3925         if (drm_rotation_90_or_270(pstate->rotation))
3926                 swap(width, height);
3927
3928         cpp = fb->format->cpp[0];
3929         plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
3930
3931         if (drm_rotation_90_or_270(pstate->rotation)) {
3932                 int cpp = (fb->format->format == DRM_FORMAT_NV12) ?
3933                         fb->format->cpp[1] :
3934                         fb->format->cpp[0];
3935
3936                 switch (cpp) {
3937                 case 1:
3938                         y_min_scanlines = 16;
3939                         break;
3940                 case 2:
3941                         y_min_scanlines = 8;
3942                         break;
3943                 case 4:
3944                         y_min_scanlines = 4;
3945                         break;
3946                 default:
3947                         MISSING_CASE(cpp);
3948                         return -EINVAL;
3949                 }
3950         } else {
3951                 y_min_scanlines = 4;
3952         }
3953
3954         if (apply_memory_bw_wa)
3955                 y_min_scanlines *= 2;
3956
3957         plane_bytes_per_line = width * cpp;
3958         if (y_tiled) {
3959                 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
3960                                            y_min_scanlines, 512);
3961                 plane_blocks_per_line =
3962                       fixed_16_16_div_round_up(interm_pbpl, y_min_scanlines);
3963         } else if (x_tiled) {
3964                 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
3965                 plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl);
3966         } else {
3967                 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
3968                 plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl);
3969         }
3970
3971         method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
3972         method2 = skl_wm_method2(plane_pixel_rate,
3973                                  cstate->base.adjusted_mode.crtc_htotal,
3974                                  latency,
3975                                  plane_blocks_per_line);
3976
3977         y_tile_minimum = mul_u32_fixed_16_16(y_min_scanlines,
3978                                              plane_blocks_per_line);
3979
3980         if (y_tiled) {
3981                 selected_result = max_fixed_16_16(method2, y_tile_minimum);
3982         } else {
3983                 if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
3984                     (plane_bytes_per_line / 512 < 1))
3985                         selected_result = method2;
3986                 else if ((ddb_allocation /
3987                         fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1)
3988                         selected_result = min_fixed_16_16(method1, method2);
3989                 else
3990                         selected_result = method1;
3991         }
3992
3993         res_blocks = fixed_16_16_to_u32_round_up(selected_result) + 1;
3994         res_lines = DIV_ROUND_UP(selected_result.val,
3995                                  plane_blocks_per_line.val);
3996
3997         if (level >= 1 && level <= 7) {
3998                 if (y_tiled) {
3999                         res_blocks += fixed_16_16_to_u32_round_up(y_tile_minimum);
4000                         res_lines += y_min_scanlines;
4001                 } else {
4002                         res_blocks++;
4003                 }
4004         }
4005
4006         if (res_blocks >= ddb_allocation || res_lines > 31) {
4007                 *enabled = false;
4008
4009                 /*
4010                  * If there are no valid level 0 watermarks, then we can't
4011                  * support this display configuration.
4012                  */
4013                 if (level) {
4014                         return 0;
4015                 } else {
4016                         struct drm_plane *plane = pstate->plane;
4017
4018                         DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n");
4019                         DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n",
4020                                       plane->base.id, plane->name,
4021                                       res_blocks, ddb_allocation, res_lines);
4022                         return -EINVAL;
4023                 }
4024         }
4025
4026         *out_blocks = res_blocks;
4027         *out_lines = res_lines;
4028         *enabled = true;
4029
4030         return 0;
4031 }
4032
4033 static int
4034 skl_compute_wm_level(const struct drm_i915_private *dev_priv,
4035                      struct skl_ddb_allocation *ddb,
4036                      struct intel_crtc_state *cstate,
4037                      struct intel_plane *intel_plane,
4038                      int level,
4039                      struct skl_wm_level *result)
4040 {
4041         struct drm_atomic_state *state = cstate->base.state;
4042         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
4043         struct drm_plane *plane = &intel_plane->base;
4044         struct intel_plane_state *intel_pstate = NULL;
4045         uint16_t ddb_blocks;
4046         enum pipe pipe = intel_crtc->pipe;
4047         int ret;
4048
4049         if (state)
4050                 intel_pstate =
4051                         intel_atomic_get_existing_plane_state(state,
4052                                                               intel_plane);
4053
4054         /*
4055          * Note: If we start supporting multiple pending atomic commits against
4056          * the same planes/CRTC's in the future, plane->state will no longer be
4057          * the correct pre-state to use for the calculations here and we'll
4058          * need to change where we get the 'unchanged' plane data from.
4059          *
4060          * For now this is fine because we only allow one queued commit against
4061          * a CRTC.  Even if the plane isn't modified by this transaction and we
4062          * don't have a plane lock, we still have the CRTC's lock, so we know
4063          * that no other transactions are racing with us to update it.
4064          */
4065         if (!intel_pstate)
4066                 intel_pstate = to_intel_plane_state(plane->state);
4067
4068         WARN_ON(!intel_pstate->base.fb);
4069
4070         ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]);
4071
4072         ret = skl_compute_plane_wm(dev_priv,
4073                                    cstate,
4074                                    intel_pstate,
4075                                    ddb_blocks,
4076                                    level,
4077                                    &result->plane_res_b,
4078                                    &result->plane_res_l,
4079                                    &result->plane_en);
4080         if (ret)
4081                 return ret;
4082
4083         return 0;
4084 }
4085
4086 static uint32_t
4087 skl_compute_linetime_wm(struct intel_crtc_state *cstate)
4088 {
4089         struct drm_atomic_state *state = cstate->base.state;
4090         struct drm_i915_private *dev_priv = to_i915(state->dev);
4091         uint32_t pixel_rate;
4092         uint32_t linetime_wm;
4093
4094         if (!cstate->base.active)
4095                 return 0;
4096
4097         pixel_rate = cstate->pixel_rate;
4098
4099         if (WARN_ON(pixel_rate == 0))
4100                 return 0;
4101
4102         linetime_wm = DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal *
4103                                    1000, pixel_rate);
4104
4105         /* Display WA #1135: bxt. */
4106         if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled)
4107                 linetime_wm = DIV_ROUND_UP(linetime_wm, 2);
4108
4109         return linetime_wm;
4110 }
4111
4112 static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
4113                                       struct skl_wm_level *trans_wm /* out */)
4114 {
4115         if (!cstate->base.active)
4116                 return;
4117
4118         /* Until we know more, just disable transition WMs */
4119         trans_wm->plane_en = false;
4120 }
4121
4122 static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
4123                              struct skl_ddb_allocation *ddb,
4124                              struct skl_pipe_wm *pipe_wm)
4125 {
4126         struct drm_device *dev = cstate->base.crtc->dev;
4127         const struct drm_i915_private *dev_priv = to_i915(dev);
4128         struct intel_plane *intel_plane;
4129         struct skl_plane_wm *wm;
4130         int level, max_level = ilk_wm_max_level(dev_priv);
4131         int ret;
4132
4133         /*
4134          * We'll only calculate watermarks for planes that are actually
4135          * enabled, so make sure all other planes are set as disabled.
4136          */
4137         memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
4138
4139         for_each_intel_plane_mask(&dev_priv->drm,
4140                                   intel_plane,
4141                                   cstate->base.plane_mask) {
4142                 wm = &pipe_wm->planes[intel_plane->id];
4143
4144                 for (level = 0; level <= max_level; level++) {
4145                         ret = skl_compute_wm_level(dev_priv, ddb, cstate,
4146                                                    intel_plane, level,
4147                                                    &wm->wm[level]);
4148                         if (ret)
4149                                 return ret;
4150                 }
4151                 skl_compute_transition_wm(cstate, &wm->trans_wm);
4152         }
4153         pipe_wm->linetime = skl_compute_linetime_wm(cstate);
4154
4155         return 0;
4156 }
4157
4158 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
4159                                 i915_reg_t reg,
4160                                 const struct skl_ddb_entry *entry)
4161 {
4162         if (entry->end)
4163                 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
4164         else
4165                 I915_WRITE(reg, 0);
4166 }
4167
4168 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
4169                                i915_reg_t reg,
4170                                const struct skl_wm_level *level)
4171 {
4172         uint32_t val = 0;
4173
4174         if (level->plane_en) {
4175                 val |= PLANE_WM_EN;
4176                 val |= level->plane_res_b;
4177                 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
4178         }
4179
4180         I915_WRITE(reg, val);
4181 }
4182
4183 static void skl_write_plane_wm(struct intel_crtc *intel_crtc,
4184                                const struct skl_plane_wm *wm,
4185                                const struct skl_ddb_allocation *ddb,
4186                                enum plane_id plane_id)
4187 {
4188         struct drm_crtc *crtc = &intel_crtc->base;
4189         struct drm_device *dev = crtc->dev;
4190         struct drm_i915_private *dev_priv = to_i915(dev);
4191         int level, max_level = ilk_wm_max_level(dev_priv);
4192         enum pipe pipe = intel_crtc->pipe;
4193
4194         for (level = 0; level <= max_level; level++) {
4195                 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
4196                                    &wm->wm[level]);
4197         }
4198         skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
4199                            &wm->trans_wm);
4200
4201         skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
4202                             &ddb->plane[pipe][plane_id]);
4203         skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane_id),
4204                             &ddb->y_plane[pipe][plane_id]);
4205 }
4206
4207 static void skl_write_cursor_wm(struct intel_crtc *intel_crtc,
4208                                 const struct skl_plane_wm *wm,
4209                                 const struct skl_ddb_allocation *ddb)
4210 {
4211         struct drm_crtc *crtc = &intel_crtc->base;
4212         struct drm_device *dev = crtc->dev;
4213         struct drm_i915_private *dev_priv = to_i915(dev);
4214         int level, max_level = ilk_wm_max_level(dev_priv);
4215         enum pipe pipe = intel_crtc->pipe;
4216
4217         for (level = 0; level <= max_level; level++) {
4218                 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
4219                                    &wm->wm[level]);
4220         }
4221         skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
4222
4223         skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
4224                             &ddb->plane[pipe][PLANE_CURSOR]);
4225 }
4226
4227 bool skl_wm_level_equals(const struct skl_wm_level *l1,
4228                          const struct skl_wm_level *l2)
4229 {
4230         if (l1->plane_en != l2->plane_en)
4231                 return false;
4232
4233         /* If both planes aren't enabled, the rest shouldn't matter */
4234         if (!l1->plane_en)
4235                 return true;
4236
4237         return (l1->plane_res_l == l2->plane_res_l &&
4238                 l1->plane_res_b == l2->plane_res_b);
4239 }
4240
4241 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
4242                                            const struct skl_ddb_entry *b)
4243 {
4244         return a->start < b->end && b->start < a->end;
4245 }
4246
4247 bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries,
4248                                  const struct skl_ddb_entry *ddb,
4249                                  int ignore)
4250 {
4251         int i;
4252
4253         for (i = 0; i < I915_MAX_PIPES; i++)
4254                 if (i != ignore && entries[i] &&
4255                     skl_ddb_entries_overlap(ddb, entries[i]))
4256                         return true;
4257
4258         return false;
4259 }
4260
4261 static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
4262                               const struct skl_pipe_wm *old_pipe_wm,
4263                               struct skl_pipe_wm *pipe_wm, /* out */
4264                               struct skl_ddb_allocation *ddb, /* out */
4265                               bool *changed /* out */)
4266 {
4267         struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate);
4268         int ret;
4269
4270         ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm);
4271         if (ret)
4272                 return ret;
4273
4274         if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
4275                 *changed = false;
4276         else
4277                 *changed = true;
4278
4279         return 0;
4280 }
4281
4282 static uint32_t
4283 pipes_modified(struct drm_atomic_state *state)
4284 {
4285         struct drm_crtc *crtc;
4286         struct drm_crtc_state *cstate;
4287         uint32_t i, ret = 0;
4288
4289         for_each_new_crtc_in_state(state, crtc, cstate, i)
4290                 ret |= drm_crtc_mask(crtc);
4291
4292         return ret;
4293 }
4294
4295 static int
4296 skl_ddb_add_affected_planes(struct intel_crtc_state *cstate)
4297 {
4298         struct drm_atomic_state *state = cstate->base.state;
4299         struct drm_device *dev = state->dev;
4300         struct drm_crtc *crtc = cstate->base.crtc;
4301         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4302         struct drm_i915_private *dev_priv = to_i915(dev);
4303         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
4304         struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
4305         struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
4306         struct drm_plane_state *plane_state;
4307         struct drm_plane *plane;
4308         enum pipe pipe = intel_crtc->pipe;
4309
4310         WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc));
4311
4312         drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) {
4313                 enum plane_id plane_id = to_intel_plane(plane)->id;
4314
4315                 if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id],
4316                                         &new_ddb->plane[pipe][plane_id]) &&
4317                     skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id],
4318                                         &new_ddb->y_plane[pipe][plane_id]))
4319                         continue;
4320
4321                 plane_state = drm_atomic_get_plane_state(state, plane);
4322                 if (IS_ERR(plane_state))
4323                         return PTR_ERR(plane_state);
4324         }
4325
4326         return 0;
4327 }
4328
4329 static int
4330 skl_compute_ddb(struct drm_atomic_state *state)
4331 {
4332         struct drm_device *dev = state->dev;
4333         struct drm_i915_private *dev_priv = to_i915(dev);
4334         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
4335         struct intel_crtc *intel_crtc;
4336         struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb;
4337         uint32_t realloc_pipes = pipes_modified(state);
4338         int ret;
4339
4340         /*
4341          * If this is our first atomic update following hardware readout,
4342          * we can't trust the DDB that the BIOS programmed for us.  Let's
4343          * pretend that all pipes switched active status so that we'll
4344          * ensure a full DDB recompute.
4345          */
4346         if (dev_priv->wm.distrust_bios_wm) {
4347                 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
4348                                        state->acquire_ctx);
4349                 if (ret)
4350                         return ret;
4351
4352                 intel_state->active_pipe_changes = ~0;
4353
4354                 /*
4355                  * We usually only initialize intel_state->active_crtcs if we
4356                  * we're doing a modeset; make sure this field is always
4357                  * initialized during the sanitization process that happens
4358                  * on the first commit too.
4359                  */
4360                 if (!intel_state->modeset)
4361                         intel_state->active_crtcs = dev_priv->active_crtcs;
4362         }
4363
4364         /*
4365          * If the modeset changes which CRTC's are active, we need to
4366          * recompute the DDB allocation for *all* active pipes, even
4367          * those that weren't otherwise being modified in any way by this
4368          * atomic commit.  Due to the shrinking of the per-pipe allocations
4369          * when new active CRTC's are added, it's possible for a pipe that
4370          * we were already using and aren't changing at all here to suddenly
4371          * become invalid if its DDB needs exceeds its new allocation.
4372          *
4373          * Note that if we wind up doing a full DDB recompute, we can't let
4374          * any other display updates race with this transaction, so we need
4375          * to grab the lock on *all* CRTC's.
4376          */
4377         if (intel_state->active_pipe_changes) {
4378                 realloc_pipes = ~0;
4379                 intel_state->wm_results.dirty_pipes = ~0;
4380         }
4381
4382         /*
4383          * We're not recomputing for the pipes not included in the commit, so
4384          * make sure we start with the current state.
4385          */
4386         memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
4387
4388         for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
4389                 struct intel_crtc_state *cstate;
4390
4391                 cstate = intel_atomic_get_crtc_state(state, intel_crtc);
4392                 if (IS_ERR(cstate))
4393                         return PTR_ERR(cstate);
4394
4395                 ret = skl_allocate_pipe_ddb(cstate, ddb);
4396                 if (ret)
4397                         return ret;
4398
4399                 ret = skl_ddb_add_affected_planes(cstate);
4400                 if (ret)
4401                         return ret;
4402         }
4403
4404         return 0;
4405 }
4406
4407 static void
4408 skl_copy_wm_for_pipe(struct skl_wm_values *dst,
4409                      struct skl_wm_values *src,
4410                      enum pipe pipe)
4411 {
4412         memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe],
4413                sizeof(dst->ddb.y_plane[pipe]));
4414         memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
4415                sizeof(dst->ddb.plane[pipe]));
4416 }
4417
4418 static void
4419 skl_print_wm_changes(const struct drm_atomic_state *state)
4420 {
4421         const struct drm_device *dev = state->dev;
4422         const struct drm_i915_private *dev_priv = to_i915(dev);
4423         const struct intel_atomic_state *intel_state =
4424                 to_intel_atomic_state(state);
4425         const struct drm_crtc *crtc;
4426         const struct drm_crtc_state *cstate;
4427         const struct intel_plane *intel_plane;
4428         const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb;
4429         const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
4430         int i;
4431
4432         for_each_new_crtc_in_state(state, crtc, cstate, i) {
4433                 const struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4434                 enum pipe pipe = intel_crtc->pipe;
4435
4436                 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
4437                         enum plane_id plane_id = intel_plane->id;
4438                         const struct skl_ddb_entry *old, *new;
4439
4440                         old = &old_ddb->plane[pipe][plane_id];
4441                         new = &new_ddb->plane[pipe][plane_id];
4442
4443                         if (skl_ddb_entry_equal(old, new))
4444                                 continue;
4445
4446                         DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
4447                                          intel_plane->base.base.id,
4448                                          intel_plane->base.name,
4449                                          old->start, old->end,
4450                                          new->start, new->end);
4451                 }
4452         }
4453 }
4454
4455 static int
4456 skl_compute_wm(struct drm_atomic_state *state)
4457 {
4458         struct drm_crtc *crtc;
4459         struct drm_crtc_state *cstate;
4460         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
4461         struct skl_wm_values *results = &intel_state->wm_results;
4462         struct skl_pipe_wm *pipe_wm;
4463         bool changed = false;
4464         int ret, i;
4465
4466         /*
4467          * If this transaction isn't actually touching any CRTC's, don't
4468          * bother with watermark calculation.  Note that if we pass this
4469          * test, we're guaranteed to hold at least one CRTC state mutex,
4470          * which means we can safely use values like dev_priv->active_crtcs
4471          * since any racing commits that want to update them would need to
4472          * hold _all_ CRTC state mutexes.
4473          */
4474         for_each_new_crtc_in_state(state, crtc, cstate, i)
4475                 changed = true;
4476         if (!changed)
4477                 return 0;
4478
4479         /* Clear all dirty flags */
4480         results->dirty_pipes = 0;
4481
4482         ret = skl_compute_ddb(state);
4483         if (ret)
4484                 return ret;
4485
4486         /*
4487          * Calculate WM's for all pipes that are part of this transaction.
4488          * Note that the DDB allocation above may have added more CRTC's that
4489          * weren't otherwise being modified (and set bits in dirty_pipes) if
4490          * pipe allocations had to change.
4491          *
4492          * FIXME:  Now that we're doing this in the atomic check phase, we
4493          * should allow skl_update_pipe_wm() to return failure in cases where
4494          * no suitable watermark values can be found.
4495          */
4496         for_each_new_crtc_in_state(state, crtc, cstate, i) {
4497                 struct intel_crtc_state *intel_cstate =
4498                         to_intel_crtc_state(cstate);
4499                 const struct skl_pipe_wm *old_pipe_wm =
4500                         &to_intel_crtc_state(crtc->state)->wm.skl.optimal;
4501
4502                 pipe_wm = &intel_cstate->wm.skl.optimal;
4503                 ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm,
4504                                          &results->ddb, &changed);
4505                 if (ret)
4506                         return ret;
4507
4508                 if (changed)
4509                         results->dirty_pipes |= drm_crtc_mask(crtc);
4510
4511                 if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
4512                         /* This pipe's WM's did not change */
4513                         continue;
4514
4515                 intel_cstate->update_wm_pre = true;
4516         }
4517
4518         skl_print_wm_changes(state);
4519
4520         return 0;
4521 }
4522
4523 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
4524                                       struct intel_crtc_state *cstate)
4525 {
4526         struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
4527         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
4528         struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
4529         const struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
4530         enum pipe pipe = crtc->pipe;
4531         enum plane_id plane_id;
4532
4533         if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
4534                 return;
4535
4536         I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
4537
4538         for_each_plane_id_on_crtc(crtc, plane_id) {
4539                 if (plane_id != PLANE_CURSOR)
4540                         skl_write_plane_wm(crtc, &pipe_wm->planes[plane_id],
4541                                            ddb, plane_id);
4542                 else
4543                         skl_write_cursor_wm(crtc, &pipe_wm->planes[plane_id],
4544                                             ddb);
4545         }
4546 }
4547
4548 static void skl_initial_wm(struct intel_atomic_state *state,
4549                            struct intel_crtc_state *cstate)
4550 {
4551         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
4552         struct drm_device *dev = intel_crtc->base.dev;
4553         struct drm_i915_private *dev_priv = to_i915(dev);
4554         struct skl_wm_values *results = &state->wm_results;
4555         struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
4556         enum pipe pipe = intel_crtc->pipe;
4557
4558         if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
4559                 return;
4560
4561         mutex_lock(&dev_priv->wm.wm_mutex);
4562
4563         if (cstate->base.active_changed)
4564                 skl_atomic_update_crtc_wm(state, cstate);
4565
4566         skl_copy_wm_for_pipe(hw_vals, results, pipe);
4567
4568         mutex_unlock(&dev_priv->wm.wm_mutex);
4569 }
4570
4571 static void ilk_compute_wm_config(struct drm_device *dev,
4572                                   struct intel_wm_config *config)
4573 {
4574         struct intel_crtc *crtc;
4575
4576         /* Compute the currently _active_ config */
4577         for_each_intel_crtc(dev, crtc) {
4578                 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
4579
4580                 if (!wm->pipe_enabled)
4581                         continue;
4582
4583                 config->sprites_enabled |= wm->sprites_enabled;
4584                 config->sprites_scaled |= wm->sprites_scaled;
4585                 config->num_pipes_active++;
4586         }
4587 }
4588
4589 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
4590 {
4591         struct drm_device *dev = &dev_priv->drm;
4592         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
4593         struct ilk_wm_maximums max;
4594         struct intel_wm_config config = {};
4595         struct ilk_wm_values results = {};
4596         enum intel_ddb_partitioning partitioning;
4597
4598         ilk_compute_wm_config(dev, &config);
4599
4600         ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
4601         ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
4602
4603         /* 5/6 split only in single pipe config on IVB+ */
4604         if (INTEL_GEN(dev_priv) >= 7 &&
4605             config.num_pipes_active == 1 && config.sprites_enabled) {
4606                 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
4607                 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
4608
4609                 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
4610         } else {
4611                 best_lp_wm = &lp_wm_1_2;
4612         }
4613
4614         partitioning = (best_lp_wm == &lp_wm_1_2) ?
4615                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
4616
4617         ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
4618
4619         ilk_write_wm_values(dev_priv, &results);
4620 }
4621
4622 static void ilk_initial_watermarks(struct intel_atomic_state *state,
4623                                    struct intel_crtc_state *cstate)
4624 {
4625         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
4626         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
4627
4628         mutex_lock(&dev_priv->wm.wm_mutex);
4629         intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
4630         ilk_program_watermarks(dev_priv);
4631         mutex_unlock(&dev_priv->wm.wm_mutex);
4632 }
4633
4634 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
4635                                     struct intel_crtc_state *cstate)
4636 {
4637         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
4638         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
4639
4640         mutex_lock(&dev_priv->wm.wm_mutex);
4641         if (cstate->wm.need_postvbl_update) {
4642                 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
4643                 ilk_program_watermarks(dev_priv);
4644         }
4645         mutex_unlock(&dev_priv->wm.wm_mutex);
4646 }
4647
4648 static inline void skl_wm_level_from_reg_val(uint32_t val,
4649                                              struct skl_wm_level *level)
4650 {
4651         level->plane_en = val & PLANE_WM_EN;
4652         level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
4653         level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
4654                 PLANE_WM_LINES_MASK;
4655 }
4656
4657 void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
4658                               struct skl_pipe_wm *out)
4659 {
4660         struct drm_i915_private *dev_priv = to_i915(crtc->dev);
4661         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4662         enum pipe pipe = intel_crtc->pipe;
4663         int level, max_level;
4664         enum plane_id plane_id;
4665         uint32_t val;
4666
4667         max_level = ilk_wm_max_level(dev_priv);
4668
4669         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4670                 struct skl_plane_wm *wm = &out->planes[plane_id];
4671
4672                 for (level = 0; level <= max_level; level++) {
4673                         if (plane_id != PLANE_CURSOR)
4674                                 val = I915_READ(PLANE_WM(pipe, plane_id, level));
4675                         else
4676                                 val = I915_READ(CUR_WM(pipe, level));
4677
4678                         skl_wm_level_from_reg_val(val, &wm->wm[level]);
4679                 }
4680
4681                 if (plane_id != PLANE_CURSOR)
4682                         val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
4683                 else
4684                         val = I915_READ(CUR_WM_TRANS(pipe));
4685
4686                 skl_wm_level_from_reg_val(val, &wm->trans_wm);
4687         }
4688
4689         if (!intel_crtc->active)
4690                 return;
4691
4692         out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
4693 }
4694
4695 void skl_wm_get_hw_state(struct drm_device *dev)
4696 {
4697         struct drm_i915_private *dev_priv = to_i915(dev);
4698         struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
4699         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
4700         struct drm_crtc *crtc;
4701         struct intel_crtc *intel_crtc;
4702         struct intel_crtc_state *cstate;
4703
4704         skl_ddb_get_hw_state(dev_priv, ddb);
4705         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
4706                 intel_crtc = to_intel_crtc(crtc);
4707                 cstate = to_intel_crtc_state(crtc->state);
4708
4709                 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
4710
4711                 if (intel_crtc->active)
4712                         hw->dirty_pipes |= drm_crtc_mask(crtc);
4713         }
4714
4715         if (dev_priv->active_crtcs) {
4716                 /* Fully recompute DDB on first atomic commit */
4717                 dev_priv->wm.distrust_bios_wm = true;
4718         } else {
4719                 /* Easy/common case; just sanitize DDB now if everything off */
4720                 memset(ddb, 0, sizeof(*ddb));
4721         }
4722 }
4723
4724 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
4725 {
4726         struct drm_device *dev = crtc->dev;
4727         struct drm_i915_private *dev_priv = to_i915(dev);
4728         struct ilk_wm_values *hw = &dev_priv->wm.hw;
4729         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4730         struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
4731         struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
4732         enum pipe pipe = intel_crtc->pipe;
4733         static const i915_reg_t wm0_pipe_reg[] = {
4734                 [PIPE_A] = WM0_PIPEA_ILK,
4735                 [PIPE_B] = WM0_PIPEB_ILK,
4736                 [PIPE_C] = WM0_PIPEC_IVB,
4737         };
4738
4739         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
4740         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
4741                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
4742
4743         memset(active, 0, sizeof(*active));
4744
4745         active->pipe_enabled = intel_crtc->active;
4746
4747         if (active->pipe_enabled) {
4748                 u32 tmp = hw->wm_pipe[pipe];
4749
4750                 /*
4751                  * For active pipes LP0 watermark is marked as
4752                  * enabled, and LP1+ watermaks as disabled since
4753                  * we can't really reverse compute them in case
4754                  * multiple pipes are active.
4755                  */
4756                 active->wm[0].enable = true;
4757                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
4758                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
4759                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
4760                 active->linetime = hw->wm_linetime[pipe];
4761         } else {
4762                 int level, max_level = ilk_wm_max_level(dev_priv);
4763
4764                 /*
4765                  * For inactive pipes, all watermark levels
4766                  * should be marked as enabled but zeroed,
4767                  * which is what we'd compute them to.
4768                  */
4769                 for (level = 0; level <= max_level; level++)
4770                         active->wm[level].enable = true;
4771         }
4772
4773         intel_crtc->wm.active.ilk = *active;
4774 }
4775
4776 #define _FW_WM(value, plane) \
4777         (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
4778 #define _FW_WM_VLV(value, plane) \
4779         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
4780
4781 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
4782                                struct vlv_wm_values *wm)
4783 {
4784         enum pipe pipe;
4785         uint32_t tmp;
4786
4787         for_each_pipe(dev_priv, pipe) {
4788                 tmp = I915_READ(VLV_DDL(pipe));
4789
4790                 wm->ddl[pipe].plane[PLANE_PRIMARY] =
4791                         (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
4792                 wm->ddl[pipe].plane[PLANE_CURSOR] =
4793                         (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
4794                 wm->ddl[pipe].plane[PLANE_SPRITE0] =
4795                         (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
4796                 wm->ddl[pipe].plane[PLANE_SPRITE1] =
4797                         (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
4798         }
4799
4800         tmp = I915_READ(DSPFW1);
4801         wm->sr.plane = _FW_WM(tmp, SR);
4802         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
4803         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
4804         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
4805
4806         tmp = I915_READ(DSPFW2);
4807         wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
4808         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
4809         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
4810
4811         tmp = I915_READ(DSPFW3);
4812         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
4813
4814         if (IS_CHERRYVIEW(dev_priv)) {
4815                 tmp = I915_READ(DSPFW7_CHV);
4816                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
4817                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
4818
4819                 tmp = I915_READ(DSPFW8_CHV);
4820                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
4821                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
4822
4823                 tmp = I915_READ(DSPFW9_CHV);
4824                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
4825                 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
4826
4827                 tmp = I915_READ(DSPHOWM);
4828                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
4829                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
4830                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
4831                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
4832                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
4833                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
4834                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
4835                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
4836                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
4837                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
4838         } else {
4839                 tmp = I915_READ(DSPFW7);
4840                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
4841                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
4842
4843                 tmp = I915_READ(DSPHOWM);
4844                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
4845                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
4846                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
4847                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
4848                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
4849                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
4850                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
4851         }
4852 }
4853
4854 #undef _FW_WM
4855 #undef _FW_WM_VLV
4856
4857 void vlv_wm_get_hw_state(struct drm_device *dev)
4858 {
4859         struct drm_i915_private *dev_priv = to_i915(dev);
4860         struct vlv_wm_values *wm = &dev_priv->wm.vlv;
4861         struct intel_crtc *crtc;
4862         u32 val;
4863
4864         vlv_read_wm_values(dev_priv, wm);
4865
4866         wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
4867         wm->level = VLV_WM_LEVEL_PM2;
4868
4869         if (IS_CHERRYVIEW(dev_priv)) {
4870                 mutex_lock(&dev_priv->rps.hw_lock);
4871
4872                 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
4873                 if (val & DSP_MAXFIFO_PM5_ENABLE)
4874                         wm->level = VLV_WM_LEVEL_PM5;
4875
4876                 /*
4877                  * If DDR DVFS is disabled in the BIOS, Punit
4878                  * will never ack the request. So if that happens
4879                  * assume we don't have to enable/disable DDR DVFS
4880                  * dynamically. To test that just set the REQ_ACK
4881                  * bit to poke the Punit, but don't change the
4882                  * HIGH/LOW bits so that we don't actually change
4883                  * the current state.
4884                  */
4885                 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4886                 val |= FORCE_DDR_FREQ_REQ_ACK;
4887                 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
4888
4889                 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
4890                               FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
4891                         DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
4892                                       "assuming DDR DVFS is disabled\n");
4893                         dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
4894                 } else {
4895                         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4896                         if ((val & FORCE_DDR_HIGH_FREQ) == 0)
4897                                 wm->level = VLV_WM_LEVEL_DDR_DVFS;
4898                 }
4899
4900                 mutex_unlock(&dev_priv->rps.hw_lock);
4901         }
4902
4903         for_each_intel_crtc(dev, crtc) {
4904                 struct intel_crtc_state *crtc_state =
4905                         to_intel_crtc_state(crtc->base.state);
4906                 struct vlv_wm_state *active = &crtc->wm.active.vlv;
4907                 const struct vlv_fifo_state *fifo_state =
4908                         &crtc_state->wm.vlv.fifo_state;
4909                 enum pipe pipe = crtc->pipe;
4910                 enum plane_id plane_id;
4911                 int level;
4912
4913                 vlv_get_fifo_size(crtc_state);
4914
4915                 active->num_levels = wm->level + 1;
4916                 active->cxsr = wm->cxsr;
4917
4918                 for (level = 0; level < active->num_levels; level++) {
4919                         struct g4x_pipe_wm *raw =
4920                                 &crtc_state->wm.vlv.raw[level];
4921
4922                         active->sr[level].plane = wm->sr.plane;
4923                         active->sr[level].cursor = wm->sr.cursor;
4924
4925                         for_each_plane_id_on_crtc(crtc, plane_id) {
4926                                 active->wm[level].plane[plane_id] =
4927                                         wm->pipe[pipe].plane[plane_id];
4928
4929                                 raw->plane[plane_id] =
4930                                         vlv_invert_wm_value(active->wm[level].plane[plane_id],
4931                                                             fifo_state->plane[plane_id]);
4932                         }
4933                 }
4934
4935                 for_each_plane_id_on_crtc(crtc, plane_id)
4936                         vlv_raw_plane_wm_set(crtc_state, level,
4937                                              plane_id, USHRT_MAX);
4938                 vlv_invalidate_wms(crtc, active, level);
4939
4940                 crtc_state->wm.vlv.optimal = *active;
4941                 crtc_state->wm.vlv.intermediate = *active;
4942
4943                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
4944                               pipe_name(pipe),
4945                               wm->pipe[pipe].plane[PLANE_PRIMARY],
4946                               wm->pipe[pipe].plane[PLANE_CURSOR],
4947                               wm->pipe[pipe].plane[PLANE_SPRITE0],
4948                               wm->pipe[pipe].plane[PLANE_SPRITE1]);
4949         }
4950
4951         DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
4952                       wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
4953 }
4954
4955 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
4956 {
4957         struct intel_plane *plane;
4958         struct intel_crtc *crtc;
4959
4960         mutex_lock(&dev_priv->wm.wm_mutex);
4961
4962         for_each_intel_plane(&dev_priv->drm, plane) {
4963                 struct intel_crtc *crtc =
4964                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
4965                 struct intel_crtc_state *crtc_state =
4966                         to_intel_crtc_state(crtc->base.state);
4967                 struct intel_plane_state *plane_state =
4968                         to_intel_plane_state(plane->base.state);
4969                 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
4970                 const struct vlv_fifo_state *fifo_state =
4971                         &crtc_state->wm.vlv.fifo_state;
4972                 enum plane_id plane_id = plane->id;
4973                 int level;
4974
4975                 if (plane_state->base.visible)
4976                         continue;
4977
4978                 for (level = 0; level < wm_state->num_levels; level++) {
4979                         struct g4x_pipe_wm *raw =
4980                                 &crtc_state->wm.vlv.raw[level];
4981
4982                         raw->plane[plane_id] = 0;
4983
4984                         wm_state->wm[level].plane[plane_id] =
4985                                 vlv_invert_wm_value(raw->plane[plane_id],
4986                                                     fifo_state->plane[plane_id]);
4987                 }
4988         }
4989
4990         for_each_intel_crtc(&dev_priv->drm, crtc) {
4991                 struct intel_crtc_state *crtc_state =
4992                         to_intel_crtc_state(crtc->base.state);
4993
4994                 crtc_state->wm.vlv.intermediate =
4995                         crtc_state->wm.vlv.optimal;
4996                 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
4997         }
4998
4999         vlv_program_watermarks(dev_priv);
5000
5001         mutex_unlock(&dev_priv->wm.wm_mutex);
5002 }
5003
5004 void ilk_wm_get_hw_state(struct drm_device *dev)
5005 {
5006         struct drm_i915_private *dev_priv = to_i915(dev);
5007         struct ilk_wm_values *hw = &dev_priv->wm.hw;
5008         struct drm_crtc *crtc;
5009
5010         for_each_crtc(dev, crtc)
5011                 ilk_pipe_wm_get_hw_state(crtc);
5012
5013         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
5014         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
5015         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
5016
5017         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
5018         if (INTEL_GEN(dev_priv) >= 7) {
5019                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
5020                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
5021         }
5022
5023         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5024                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
5025                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
5026         else if (IS_IVYBRIDGE(dev_priv))
5027                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
5028                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
5029
5030         hw->enable_fbc_wm =
5031                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
5032 }
5033
5034 /**
5035  * intel_update_watermarks - update FIFO watermark values based on current modes
5036  *
5037  * Calculate watermark values for the various WM regs based on current mode
5038  * and plane configuration.
5039  *
5040  * There are several cases to deal with here:
5041  *   - normal (i.e. non-self-refresh)
5042  *   - self-refresh (SR) mode
5043  *   - lines are large relative to FIFO size (buffer can hold up to 2)
5044  *   - lines are small relative to FIFO size (buffer can hold more than 2
5045  *     lines), so need to account for TLB latency
5046  *
5047  *   The normal calculation is:
5048  *     watermark = dotclock * bytes per pixel * latency
5049  *   where latency is platform & configuration dependent (we assume pessimal
5050  *   values here).
5051  *
5052  *   The SR calculation is:
5053  *     watermark = (trunc(latency/line time)+1) * surface width *
5054  *       bytes per pixel
5055  *   where
5056  *     line time = htotal / dotclock
5057  *     surface width = hdisplay for normal plane and 64 for cursor
5058  *   and latency is assumed to be high, as above.
5059  *
5060  * The final value programmed to the register should always be rounded up,
5061  * and include an extra 2 entries to account for clock crossings.
5062  *
5063  * We don't use the sprite, so we can ignore that.  And on Crestline we have
5064  * to set the non-SR watermarks to 8.
5065  */
5066 void intel_update_watermarks(struct intel_crtc *crtc)
5067 {
5068         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5069
5070         if (dev_priv->display.update_wm)
5071                 dev_priv->display.update_wm(crtc);
5072 }
5073
5074 /*
5075  * Lock protecting IPS related data structures
5076  */
5077 DEFINE_SPINLOCK(mchdev_lock);
5078
5079 /* Global for IPS driver to get at the current i915 device. Protected by
5080  * mchdev_lock. */
5081 static struct drm_i915_private *i915_mch_dev;
5082
5083 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
5084 {
5085         u16 rgvswctl;
5086
5087         lockdep_assert_held(&mchdev_lock);
5088
5089         rgvswctl = I915_READ16(MEMSWCTL);
5090         if (rgvswctl & MEMCTL_CMD_STS) {
5091                 DRM_DEBUG("gpu busy, RCS change rejected\n");
5092                 return false; /* still busy with another command */
5093         }
5094
5095         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
5096                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
5097         I915_WRITE16(MEMSWCTL, rgvswctl);
5098         POSTING_READ16(MEMSWCTL);
5099
5100         rgvswctl |= MEMCTL_CMD_STS;
5101         I915_WRITE16(MEMSWCTL, rgvswctl);
5102
5103         return true;
5104 }
5105
5106 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
5107 {
5108         u32 rgvmodectl;
5109         u8 fmax, fmin, fstart, vstart;
5110
5111         spin_lock_irq(&mchdev_lock);
5112
5113         rgvmodectl = I915_READ(MEMMODECTL);
5114
5115         /* Enable temp reporting */
5116         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
5117         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
5118
5119         /* 100ms RC evaluation intervals */
5120         I915_WRITE(RCUPEI, 100000);
5121         I915_WRITE(RCDNEI, 100000);
5122
5123         /* Set max/min thresholds to 90ms and 80ms respectively */
5124         I915_WRITE(RCBMAXAVG, 90000);
5125         I915_WRITE(RCBMINAVG, 80000);
5126
5127         I915_WRITE(MEMIHYST, 1);
5128
5129         /* Set up min, max, and cur for interrupt handling */
5130         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
5131         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
5132         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
5133                 MEMMODE_FSTART_SHIFT;
5134
5135         vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
5136                 PXVFREQ_PX_SHIFT;
5137
5138         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
5139         dev_priv->ips.fstart = fstart;
5140
5141         dev_priv->ips.max_delay = fstart;
5142         dev_priv->ips.min_delay = fmin;
5143         dev_priv->ips.cur_delay = fstart;
5144
5145         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
5146                          fmax, fmin, fstart);
5147
5148         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
5149
5150         /*
5151          * Interrupts will be enabled in ironlake_irq_postinstall
5152          */
5153
5154         I915_WRITE(VIDSTART, vstart);
5155         POSTING_READ(VIDSTART);
5156
5157         rgvmodectl |= MEMMODE_SWMODE_EN;
5158         I915_WRITE(MEMMODECTL, rgvmodectl);
5159
5160         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
5161                 DRM_ERROR("stuck trying to change perf mode\n");
5162         mdelay(1);
5163
5164         ironlake_set_drps(dev_priv, fstart);
5165
5166         dev_priv->ips.last_count1 = I915_READ(DMIEC) +
5167                 I915_READ(DDREC) + I915_READ(CSIEC);
5168         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
5169         dev_priv->ips.last_count2 = I915_READ(GFXEC);
5170         dev_priv->ips.last_time2 = ktime_get_raw_ns();
5171
5172         spin_unlock_irq(&mchdev_lock);
5173 }
5174
5175 static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
5176 {
5177         u16 rgvswctl;
5178
5179         spin_lock_irq(&mchdev_lock);
5180
5181         rgvswctl = I915_READ16(MEMSWCTL);
5182
5183         /* Ack interrupts, disable EFC interrupt */
5184         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
5185         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
5186         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
5187         I915_WRITE(DEIIR, DE_PCU_EVENT);
5188         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
5189
5190         /* Go back to the starting frequency */
5191         ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
5192         mdelay(1);
5193         rgvswctl |= MEMCTL_CMD_STS;
5194         I915_WRITE(MEMSWCTL, rgvswctl);
5195         mdelay(1);
5196
5197         spin_unlock_irq(&mchdev_lock);
5198 }
5199
5200 /* There's a funny hw issue where the hw returns all 0 when reading from
5201  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
5202  * ourselves, instead of doing a rmw cycle (which might result in us clearing
5203  * all limits and the gpu stuck at whatever frequency it is at atm).
5204  */
5205 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
5206 {
5207         u32 limits;
5208
5209         /* Only set the down limit when we've reached the lowest level to avoid
5210          * getting more interrupts, otherwise leave this clear. This prevents a
5211          * race in the hw when coming out of rc6: There's a tiny window where
5212          * the hw runs at the minimal clock before selecting the desired
5213          * frequency, if the down threshold expires in that window we will not
5214          * receive a down interrupt. */
5215         if (IS_GEN9(dev_priv)) {
5216                 limits = (dev_priv->rps.max_freq_softlimit) << 23;
5217                 if (val <= dev_priv->rps.min_freq_softlimit)
5218                         limits |= (dev_priv->rps.min_freq_softlimit) << 14;
5219         } else {
5220                 limits = dev_priv->rps.max_freq_softlimit << 24;
5221                 if (val <= dev_priv->rps.min_freq_softlimit)
5222                         limits |= dev_priv->rps.min_freq_softlimit << 16;
5223         }
5224
5225         return limits;
5226 }
5227
5228 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
5229 {
5230         int new_power;
5231         u32 threshold_up = 0, threshold_down = 0; /* in % */
5232         u32 ei_up = 0, ei_down = 0;
5233
5234         new_power = dev_priv->rps.power;
5235         switch (dev_priv->rps.power) {
5236         case LOW_POWER:
5237                 if (val > dev_priv->rps.efficient_freq + 1 &&
5238                     val > dev_priv->rps.cur_freq)
5239                         new_power = BETWEEN;
5240                 break;
5241
5242         case BETWEEN:
5243                 if (val <= dev_priv->rps.efficient_freq &&
5244                     val < dev_priv->rps.cur_freq)
5245                         new_power = LOW_POWER;
5246                 else if (val >= dev_priv->rps.rp0_freq &&
5247                          val > dev_priv->rps.cur_freq)
5248                         new_power = HIGH_POWER;
5249                 break;
5250
5251         case HIGH_POWER:
5252                 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 &&
5253                     val < dev_priv->rps.cur_freq)
5254                         new_power = BETWEEN;
5255                 break;
5256         }
5257         /* Max/min bins are special */
5258         if (val <= dev_priv->rps.min_freq_softlimit)
5259                 new_power = LOW_POWER;
5260         if (val >= dev_priv->rps.max_freq_softlimit)
5261                 new_power = HIGH_POWER;
5262         if (new_power == dev_priv->rps.power)
5263                 return;
5264
5265         /* Note the units here are not exactly 1us, but 1280ns. */
5266         switch (new_power) {
5267         case LOW_POWER:
5268                 /* Upclock if more than 95% busy over 16ms */
5269                 ei_up = 16000;
5270                 threshold_up = 95;
5271
5272                 /* Downclock if less than 85% busy over 32ms */
5273                 ei_down = 32000;
5274                 threshold_down = 85;
5275                 break;
5276
5277         case BETWEEN:
5278                 /* Upclock if more than 90% busy over 13ms */
5279                 ei_up = 13000;
5280                 threshold_up = 90;
5281
5282                 /* Downclock if less than 75% busy over 32ms */
5283                 ei_down = 32000;
5284                 threshold_down = 75;
5285                 break;
5286
5287         case HIGH_POWER:
5288                 /* Upclock if more than 85% busy over 10ms */
5289                 ei_up = 10000;
5290                 threshold_up = 85;
5291
5292                 /* Downclock if less than 60% busy over 32ms */
5293                 ei_down = 32000;
5294                 threshold_down = 60;
5295                 break;
5296         }
5297
5298         /* When byt can survive without system hang with dynamic
5299          * sw freq adjustments, this restriction can be lifted.
5300          */
5301         if (IS_VALLEYVIEW(dev_priv))
5302                 goto skip_hw_write;
5303
5304         I915_WRITE(GEN6_RP_UP_EI,
5305                    GT_INTERVAL_FROM_US(dev_priv, ei_up));
5306         I915_WRITE(GEN6_RP_UP_THRESHOLD,
5307                    GT_INTERVAL_FROM_US(dev_priv,
5308                                        ei_up * threshold_up / 100));
5309
5310         I915_WRITE(GEN6_RP_DOWN_EI,
5311                    GT_INTERVAL_FROM_US(dev_priv, ei_down));
5312         I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
5313                    GT_INTERVAL_FROM_US(dev_priv,
5314                                        ei_down * threshold_down / 100));
5315
5316         I915_WRITE(GEN6_RP_CONTROL,
5317                    GEN6_RP_MEDIA_TURBO |
5318                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
5319                    GEN6_RP_MEDIA_IS_GFX |
5320                    GEN6_RP_ENABLE |
5321                    GEN6_RP_UP_BUSY_AVG |
5322                    GEN6_RP_DOWN_IDLE_AVG);
5323
5324 skip_hw_write:
5325         dev_priv->rps.power = new_power;
5326         dev_priv->rps.up_threshold = threshold_up;
5327         dev_priv->rps.down_threshold = threshold_down;
5328         dev_priv->rps.last_adj = 0;
5329 }
5330
5331 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
5332 {
5333         u32 mask = 0;
5334
5335         /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
5336         if (val > dev_priv->rps.min_freq_softlimit)
5337                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
5338         if (val < dev_priv->rps.max_freq_softlimit)
5339                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
5340
5341         mask &= dev_priv->pm_rps_events;
5342
5343         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
5344 }
5345
5346 /* gen6_set_rps is called to update the frequency request, but should also be
5347  * called when the range (min_delay and max_delay) is modified so that we can
5348  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
5349 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
5350 {
5351         /* min/max delay may still have been modified so be sure to
5352          * write the limits value.
5353          */
5354         if (val != dev_priv->rps.cur_freq) {
5355                 gen6_set_rps_thresholds(dev_priv, val);
5356
5357                 if (IS_GEN9(dev_priv))
5358                         I915_WRITE(GEN6_RPNSWREQ,
5359                                    GEN9_FREQUENCY(val));
5360                 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5361                         I915_WRITE(GEN6_RPNSWREQ,
5362                                    HSW_FREQUENCY(val));
5363                 else
5364                         I915_WRITE(GEN6_RPNSWREQ,
5365                                    GEN6_FREQUENCY(val) |
5366                                    GEN6_OFFSET(0) |
5367                                    GEN6_AGGRESSIVE_TURBO);
5368         }
5369
5370         /* Make sure we continue to get interrupts
5371          * until we hit the minimum or maximum frequencies.
5372          */
5373         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
5374         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
5375
5376         dev_priv->rps.cur_freq = val;
5377         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
5378
5379         return 0;
5380 }
5381
5382 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
5383 {
5384         int err;
5385
5386         if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
5387                       "Odd GPU freq value\n"))
5388                 val &= ~1;
5389
5390         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
5391
5392         if (val != dev_priv->rps.cur_freq) {
5393                 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
5394                 if (err)
5395                         return err;
5396
5397                 gen6_set_rps_thresholds(dev_priv, val);
5398         }
5399
5400         dev_priv->rps.cur_freq = val;
5401         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
5402
5403         return 0;
5404 }
5405
5406 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
5407  *
5408  * * If Gfx is Idle, then
5409  * 1. Forcewake Media well.
5410  * 2. Request idle freq.
5411  * 3. Release Forcewake of Media well.
5412 */
5413 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
5414 {
5415         u32 val = dev_priv->rps.idle_freq;
5416         int err;
5417
5418         if (dev_priv->rps.cur_freq <= val)
5419                 return;
5420
5421         /* The punit delays the write of the frequency and voltage until it
5422          * determines the GPU is awake. During normal usage we don't want to
5423          * waste power changing the frequency if the GPU is sleeping (rc6).
5424          * However, the GPU and driver is now idle and we do not want to delay
5425          * switching to minimum voltage (reducing power whilst idle) as we do
5426          * not expect to be woken in the near future and so must flush the
5427          * change by waking the device.
5428          *
5429          * We choose to take the media powerwell (either would do to trick the
5430          * punit into committing the voltage change) as that takes a lot less
5431          * power than the render powerwell.
5432          */
5433         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
5434         err = valleyview_set_rps(dev_priv, val);
5435         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
5436
5437         if (err)
5438                 DRM_ERROR("Failed to set RPS for idle\n");
5439 }
5440
5441 void gen6_rps_busy(struct drm_i915_private *dev_priv)
5442 {
5443         mutex_lock(&dev_priv->rps.hw_lock);
5444         if (dev_priv->rps.enabled) {
5445                 u8 freq;
5446
5447                 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
5448                         gen6_rps_reset_ei(dev_priv);
5449                 I915_WRITE(GEN6_PMINTRMSK,
5450                            gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
5451
5452                 gen6_enable_rps_interrupts(dev_priv);
5453
5454                 /* Use the user's desired frequency as a guide, but for better
5455                  * performance, jump directly to RPe as our starting frequency.
5456                  */
5457                 freq = max(dev_priv->rps.cur_freq,
5458                            dev_priv->rps.efficient_freq);
5459
5460                 if (intel_set_rps(dev_priv,
5461                                   clamp(freq,
5462                                         dev_priv->rps.min_freq_softlimit,
5463                                         dev_priv->rps.max_freq_softlimit)))
5464                         DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
5465         }
5466         mutex_unlock(&dev_priv->rps.hw_lock);
5467 }
5468
5469 void gen6_rps_idle(struct drm_i915_private *dev_priv)
5470 {
5471         /* Flush our bottom-half so that it does not race with us
5472          * setting the idle frequency and so that it is bounded by
5473          * our rpm wakeref. And then disable the interrupts to stop any
5474          * futher RPS reclocking whilst we are asleep.
5475          */
5476         gen6_disable_rps_interrupts(dev_priv);
5477
5478         mutex_lock(&dev_priv->rps.hw_lock);
5479         if (dev_priv->rps.enabled) {
5480                 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
5481                         vlv_set_rps_idle(dev_priv);
5482                 else
5483                         gen6_set_rps(dev_priv, dev_priv->rps.idle_freq);
5484                 dev_priv->rps.last_adj = 0;
5485                 I915_WRITE(GEN6_PMINTRMSK,
5486                            gen6_sanitize_rps_pm_mask(dev_priv, ~0));
5487         }
5488         mutex_unlock(&dev_priv->rps.hw_lock);
5489
5490         spin_lock(&dev_priv->rps.client_lock);
5491         while (!list_empty(&dev_priv->rps.clients))
5492                 list_del_init(dev_priv->rps.clients.next);
5493         spin_unlock(&dev_priv->rps.client_lock);
5494 }
5495
5496 void gen6_rps_boost(struct drm_i915_private *dev_priv,
5497                     struct intel_rps_client *rps,
5498                     unsigned long submitted)
5499 {
5500         /* This is intentionally racy! We peek at the state here, then
5501          * validate inside the RPS worker.
5502          */
5503         if (!(dev_priv->gt.awake &&
5504               dev_priv->rps.enabled &&
5505               dev_priv->rps.cur_freq < dev_priv->rps.boost_freq))
5506                 return;
5507
5508         /* Force a RPS boost (and don't count it against the client) if
5509          * the GPU is severely congested.
5510          */
5511         if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
5512                 rps = NULL;
5513
5514         spin_lock(&dev_priv->rps.client_lock);
5515         if (rps == NULL || list_empty(&rps->link)) {
5516                 spin_lock_irq(&dev_priv->irq_lock);
5517                 if (dev_priv->rps.interrupts_enabled) {
5518                         dev_priv->rps.client_boost = true;
5519                         schedule_work(&dev_priv->rps.work);
5520                 }
5521                 spin_unlock_irq(&dev_priv->irq_lock);
5522
5523                 if (rps != NULL) {
5524                         list_add(&rps->link, &dev_priv->rps.clients);
5525                         rps->boosts++;
5526                 } else
5527                         dev_priv->rps.boosts++;
5528         }
5529         spin_unlock(&dev_priv->rps.client_lock);
5530 }
5531
5532 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
5533 {
5534         int err;
5535
5536         lockdep_assert_held(&dev_priv->rps.hw_lock);
5537         GEM_BUG_ON(val > dev_priv->rps.max_freq);
5538         GEM_BUG_ON(val < dev_priv->rps.min_freq);
5539
5540         if (!dev_priv->rps.enabled) {
5541                 dev_priv->rps.cur_freq = val;
5542                 return 0;
5543         }
5544
5545         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
5546                 err = valleyview_set_rps(dev_priv, val);
5547         else
5548                 err = gen6_set_rps(dev_priv, val);
5549
5550         return err;
5551 }
5552
5553 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
5554 {
5555         I915_WRITE(GEN6_RC_CONTROL, 0);
5556         I915_WRITE(GEN9_PG_ENABLE, 0);
5557 }
5558
5559 static void gen9_disable_rps(struct drm_i915_private *dev_priv)
5560 {
5561         I915_WRITE(GEN6_RP_CONTROL, 0);
5562 }
5563
5564 static void gen6_disable_rps(struct drm_i915_private *dev_priv)
5565 {
5566         I915_WRITE(GEN6_RC_CONTROL, 0);
5567         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
5568         I915_WRITE(GEN6_RP_CONTROL, 0);
5569 }
5570
5571 static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
5572 {
5573         I915_WRITE(GEN6_RC_CONTROL, 0);
5574 }
5575
5576 static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
5577 {
5578         /* we're doing forcewake before Disabling RC6,
5579          * This what the BIOS expects when going into suspend */
5580         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5581
5582         I915_WRITE(GEN6_RC_CONTROL, 0);
5583
5584         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5585 }
5586
5587 static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode)
5588 {
5589         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
5590                 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
5591                         mode = GEN6_RC_CTL_RC6_ENABLE;
5592                 else
5593                         mode = 0;
5594         }
5595         if (HAS_RC6p(dev_priv))
5596                 DRM_DEBUG_DRIVER("Enabling RC6 states: "
5597                                  "RC6 %s RC6p %s RC6pp %s\n",
5598                                  onoff(mode & GEN6_RC_CTL_RC6_ENABLE),
5599                                  onoff(mode & GEN6_RC_CTL_RC6p_ENABLE),
5600                                  onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE));
5601
5602         else
5603                 DRM_DEBUG_DRIVER("Enabling RC6 states: RC6 %s\n",
5604                                  onoff(mode & GEN6_RC_CTL_RC6_ENABLE));
5605 }
5606
5607 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
5608 {
5609         struct i915_ggtt *ggtt = &dev_priv->ggtt;
5610         bool enable_rc6 = true;
5611         unsigned long rc6_ctx_base;
5612         u32 rc_ctl;
5613         int rc_sw_target;
5614
5615         rc_ctl = I915_READ(GEN6_RC_CONTROL);
5616         rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
5617                        RC_SW_TARGET_STATE_SHIFT;
5618         DRM_DEBUG_DRIVER("BIOS enabled RC states: "
5619                          "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
5620                          onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
5621                          onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
5622                          rc_sw_target);
5623
5624         if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
5625                 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
5626                 enable_rc6 = false;
5627         }
5628
5629         /*
5630          * The exact context size is not known for BXT, so assume a page size
5631          * for this check.
5632          */
5633         rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
5634         if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) &&
5635               (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base +
5636                                         ggtt->stolen_reserved_size))) {
5637                 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
5638                 enable_rc6 = false;
5639         }
5640
5641         if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
5642               ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
5643               ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
5644               ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
5645                 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
5646                 enable_rc6 = false;
5647         }
5648
5649         if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
5650             !I915_READ(GEN8_PUSHBUS_ENABLE) ||
5651             !I915_READ(GEN8_PUSHBUS_SHIFT)) {
5652                 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
5653                 enable_rc6 = false;
5654         }
5655
5656         if (!I915_READ(GEN6_GFXPAUSE)) {
5657                 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
5658                 enable_rc6 = false;
5659         }
5660
5661         if (!I915_READ(GEN8_MISC_CTRL0)) {
5662                 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
5663                 enable_rc6 = false;
5664         }
5665
5666         return enable_rc6;
5667 }
5668
5669 int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6)
5670 {
5671         /* No RC6 before Ironlake and code is gone for ilk. */
5672         if (INTEL_INFO(dev_priv)->gen < 6)
5673                 return 0;
5674
5675         if (!enable_rc6)
5676                 return 0;
5677
5678         if (IS_GEN9_LP(dev_priv) && !bxt_check_bios_rc6_setup(dev_priv)) {
5679                 DRM_INFO("RC6 disabled by BIOS\n");
5680                 return 0;
5681         }
5682
5683         /* Respect the kernel parameter if it is set */
5684         if (enable_rc6 >= 0) {
5685                 int mask;
5686
5687                 if (HAS_RC6p(dev_priv))
5688                         mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
5689                                INTEL_RC6pp_ENABLE;
5690                 else
5691                         mask = INTEL_RC6_ENABLE;
5692
5693                 if ((enable_rc6 & mask) != enable_rc6)
5694                         DRM_DEBUG_DRIVER("Adjusting RC6 mask to %d "
5695                                          "(requested %d, valid %d)\n",
5696                                          enable_rc6 & mask, enable_rc6, mask);
5697
5698                 return enable_rc6 & mask;
5699         }
5700
5701         if (IS_IVYBRIDGE(dev_priv))
5702                 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
5703
5704         return INTEL_RC6_ENABLE;
5705 }
5706
5707 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
5708 {
5709         /* All of these values are in units of 50MHz */
5710
5711         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
5712         if (IS_GEN9_LP(dev_priv)) {
5713                 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
5714                 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff;
5715                 dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
5716                 dev_priv->rps.min_freq = (rp_state_cap >>  0) & 0xff;
5717         } else {
5718                 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
5719                 dev_priv->rps.rp0_freq = (rp_state_cap >>  0) & 0xff;
5720                 dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
5721                 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff;
5722         }
5723         /* hw_max = RP0 until we check for overclocking */
5724         dev_priv->rps.max_freq = dev_priv->rps.rp0_freq;
5725
5726         dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
5727         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
5728             IS_GEN9_BC(dev_priv)) {
5729                 u32 ddcc_status = 0;
5730
5731                 if (sandybridge_pcode_read(dev_priv,
5732                                            HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
5733                                            &ddcc_status) == 0)
5734                         dev_priv->rps.efficient_freq =
5735                                 clamp_t(u8,
5736                                         ((ddcc_status >> 8) & 0xff),
5737                                         dev_priv->rps.min_freq,
5738                                         dev_priv->rps.max_freq);
5739         }
5740
5741         if (IS_GEN9_BC(dev_priv)) {
5742                 /* Store the frequency values in 16.66 MHZ units, which is
5743                  * the natural hardware unit for SKL
5744                  */
5745                 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
5746                 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
5747                 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
5748                 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER;
5749                 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER;
5750         }
5751 }
5752
5753 static void reset_rps(struct drm_i915_private *dev_priv,
5754                       int (*set)(struct drm_i915_private *, u8))
5755 {
5756         u8 freq = dev_priv->rps.cur_freq;
5757
5758         /* force a reset */
5759         dev_priv->rps.power = -1;
5760         dev_priv->rps.cur_freq = -1;
5761
5762         if (set(dev_priv, freq))
5763                 DRM_ERROR("Failed to reset RPS to initial values\n");
5764 }
5765
5766 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
5767 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
5768 {
5769         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5770
5771         /* Program defaults and thresholds for RPS*/
5772         I915_WRITE(GEN6_RC_VIDEO_FREQ,
5773                 GEN9_FREQUENCY(dev_priv->rps.rp1_freq));
5774
5775         /* 1 second timeout*/
5776         I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
5777                 GT_INTERVAL_FROM_US(dev_priv, 1000000));
5778
5779         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
5780
5781         /* Leaning on the below call to gen6_set_rps to program/setup the
5782          * Up/Down EI & threshold registers, as well as the RP_CONTROL,
5783          * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
5784         reset_rps(dev_priv, gen6_set_rps);
5785
5786         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5787 }
5788
5789 static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
5790 {
5791         struct intel_engine_cs *engine;
5792         enum intel_engine_id id;
5793         uint32_t rc6_mask = 0;
5794
5795         /* 1a: Software RC state - RC0 */
5796         I915_WRITE(GEN6_RC_STATE, 0);
5797
5798         /* 1b: Get forcewake during program sequence. Although the driver
5799          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
5800         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5801
5802         /* 2a: Disable RC states. */
5803         I915_WRITE(GEN6_RC_CONTROL, 0);
5804
5805         /* 2b: Program RC6 thresholds.*/
5806
5807         /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
5808         if (IS_SKYLAKE(dev_priv))
5809                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
5810         else
5811                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
5812         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
5813         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
5814         for_each_engine(engine, dev_priv, id)
5815                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
5816
5817         if (HAS_GUC(dev_priv))
5818                 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
5819
5820         I915_WRITE(GEN6_RC_SLEEP, 0);
5821
5822         /* 2c: Program Coarse Power Gating Policies. */
5823         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
5824         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
5825
5826         /* 3a: Enable RC6 */
5827         if (intel_enable_rc6() & INTEL_RC6_ENABLE)
5828                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
5829         DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE));
5830         I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
5831         I915_WRITE(GEN6_RC_CONTROL,
5832                    GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_EI_MODE(1) | rc6_mask);
5833
5834         /*
5835          * 3b: Enable Coarse Power Gating only when RC6 is enabled.
5836          * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
5837          */
5838         if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
5839                 I915_WRITE(GEN9_PG_ENABLE, 0);
5840         else
5841                 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
5842                                 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0);
5843
5844         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5845 }
5846
5847 static void gen8_enable_rps(struct drm_i915_private *dev_priv)
5848 {
5849         struct intel_engine_cs *engine;
5850         enum intel_engine_id id;
5851         uint32_t rc6_mask = 0;
5852
5853         /* 1a: Software RC state - RC0 */
5854         I915_WRITE(GEN6_RC_STATE, 0);
5855
5856         /* 1c & 1d: Get forcewake during program sequence. Although the driver
5857          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
5858         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5859
5860         /* 2a: Disable RC states. */
5861         I915_WRITE(GEN6_RC_CONTROL, 0);
5862
5863         /* 2b: Program RC6 thresholds.*/
5864         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
5865         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
5866         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
5867         for_each_engine(engine, dev_priv, id)
5868                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
5869         I915_WRITE(GEN6_RC_SLEEP, 0);
5870         if (IS_BROADWELL(dev_priv))
5871                 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
5872         else
5873                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
5874
5875         /* 3: Enable RC6 */
5876         if (intel_enable_rc6() & INTEL_RC6_ENABLE)
5877                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
5878         intel_print_rc6_info(dev_priv, rc6_mask);
5879         if (IS_BROADWELL(dev_priv))
5880                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
5881                                 GEN7_RC_CTL_TO_MODE |
5882                                 rc6_mask);
5883         else
5884                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
5885                                 GEN6_RC_CTL_EI_MODE(1) |
5886                                 rc6_mask);
5887
5888         /* 4 Program defaults and thresholds for RPS*/
5889         I915_WRITE(GEN6_RPNSWREQ,
5890                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
5891         I915_WRITE(GEN6_RC_VIDEO_FREQ,
5892                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
5893         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
5894         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
5895
5896         /* Docs recommend 900MHz, and 300 MHz respectively */
5897         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
5898                    dev_priv->rps.max_freq_softlimit << 24 |
5899                    dev_priv->rps.min_freq_softlimit << 16);
5900
5901         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
5902         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
5903         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
5904         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
5905
5906         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5907
5908         /* 5: Enable RPS */
5909         I915_WRITE(GEN6_RP_CONTROL,
5910                    GEN6_RP_MEDIA_TURBO |
5911                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
5912                    GEN6_RP_MEDIA_IS_GFX |
5913                    GEN6_RP_ENABLE |
5914                    GEN6_RP_UP_BUSY_AVG |
5915                    GEN6_RP_DOWN_IDLE_AVG);
5916
5917         /* 6: Ring frequency + overclocking (our driver does this later */
5918
5919         reset_rps(dev_priv, gen6_set_rps);
5920
5921         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5922 }
5923
5924 static void gen6_enable_rps(struct drm_i915_private *dev_priv)
5925 {
5926         struct intel_engine_cs *engine;
5927         enum intel_engine_id id;
5928         u32 rc6vids, rc6_mask = 0;
5929         u32 gtfifodbg;
5930         int rc6_mode;
5931         int ret;
5932
5933         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5934
5935         /* Here begins a magic sequence of register writes to enable
5936          * auto-downclocking.
5937          *
5938          * Perhaps there might be some value in exposing these to
5939          * userspace...
5940          */
5941         I915_WRITE(GEN6_RC_STATE, 0);
5942
5943         /* Clear the DBG now so we don't confuse earlier errors */
5944         gtfifodbg = I915_READ(GTFIFODBG);
5945         if (gtfifodbg) {
5946                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
5947                 I915_WRITE(GTFIFODBG, gtfifodbg);
5948         }
5949
5950         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5951
5952         /* disable the counters and set deterministic thresholds */
5953         I915_WRITE(GEN6_RC_CONTROL, 0);
5954
5955         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
5956         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
5957         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
5958         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
5959         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
5960
5961         for_each_engine(engine, dev_priv, id)
5962                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
5963
5964         I915_WRITE(GEN6_RC_SLEEP, 0);
5965         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
5966         if (IS_IVYBRIDGE(dev_priv))
5967                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
5968         else
5969                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
5970         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
5971         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
5972
5973         /* Check if we are enabling RC6 */
5974         rc6_mode = intel_enable_rc6();
5975         if (rc6_mode & INTEL_RC6_ENABLE)
5976                 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
5977
5978         /* We don't use those on Haswell */
5979         if (!IS_HASWELL(dev_priv)) {
5980                 if (rc6_mode & INTEL_RC6p_ENABLE)
5981                         rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
5982
5983                 if (rc6_mode & INTEL_RC6pp_ENABLE)
5984                         rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
5985         }
5986
5987         intel_print_rc6_info(dev_priv, rc6_mask);
5988
5989         I915_WRITE(GEN6_RC_CONTROL,
5990                    rc6_mask |
5991                    GEN6_RC_CTL_EI_MODE(1) |
5992                    GEN6_RC_CTL_HW_ENABLE);
5993
5994         /* Power down if completely idle for over 50ms */
5995         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
5996         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5997
5998         reset_rps(dev_priv, gen6_set_rps);
5999
6000         rc6vids = 0;
6001         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
6002         if (IS_GEN6(dev_priv) && ret) {
6003                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
6004         } else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
6005                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
6006                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
6007                 rc6vids &= 0xffff00;
6008                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
6009                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
6010                 if (ret)
6011                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
6012         }
6013
6014         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6015 }
6016
6017 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
6018 {
6019         int min_freq = 15;
6020         unsigned int gpu_freq;
6021         unsigned int max_ia_freq, min_ring_freq;
6022         unsigned int max_gpu_freq, min_gpu_freq;
6023         int scaling_factor = 180;
6024         struct cpufreq_policy *policy;
6025
6026         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
6027
6028         policy = cpufreq_cpu_get(0);
6029         if (policy) {
6030                 max_ia_freq = policy->cpuinfo.max_freq;
6031                 cpufreq_cpu_put(policy);
6032         } else {
6033                 /*
6034                  * Default to measured freq if none found, PCU will ensure we
6035                  * don't go over
6036                  */
6037                 max_ia_freq = tsc_khz;
6038         }
6039
6040         /* Convert from kHz to MHz */
6041         max_ia_freq /= 1000;
6042
6043         min_ring_freq = I915_READ(DCLK) & 0xf;
6044         /* convert DDR frequency from units of 266.6MHz to bandwidth */
6045         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
6046
6047         if (IS_GEN9_BC(dev_priv)) {
6048                 /* Convert GT frequency to 50 HZ units */
6049                 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
6050                 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
6051         } else {
6052                 min_gpu_freq = dev_priv->rps.min_freq;
6053                 max_gpu_freq = dev_priv->rps.max_freq;
6054         }
6055
6056         /*
6057          * For each potential GPU frequency, load a ring frequency we'd like
6058          * to use for memory access.  We do this by specifying the IA frequency
6059          * the PCU should use as a reference to determine the ring frequency.
6060          */
6061         for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
6062                 int diff = max_gpu_freq - gpu_freq;
6063                 unsigned int ia_freq = 0, ring_freq = 0;
6064
6065                 if (IS_GEN9_BC(dev_priv)) {
6066                         /*
6067                          * ring_freq = 2 * GT. ring_freq is in 100MHz units
6068                          * No floor required for ring frequency on SKL.
6069                          */
6070                         ring_freq = gpu_freq;
6071                 } else if (INTEL_INFO(dev_priv)->gen >= 8) {
6072                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
6073                         ring_freq = max(min_ring_freq, gpu_freq);
6074                 } else if (IS_HASWELL(dev_priv)) {
6075                         ring_freq = mult_frac(gpu_freq, 5, 4);
6076                         ring_freq = max(min_ring_freq, ring_freq);
6077                         /* leave ia_freq as the default, chosen by cpufreq */
6078                 } else {
6079                         /* On older processors, there is no separate ring
6080                          * clock domain, so in order to boost the bandwidth
6081                          * of the ring, we need to upclock the CPU (ia_freq).
6082                          *
6083                          * For GPU frequencies less than 750MHz,
6084                          * just use the lowest ring freq.
6085                          */
6086                         if (gpu_freq < min_freq)
6087                                 ia_freq = 800;
6088                         else
6089                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
6090                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
6091                 }
6092
6093                 sandybridge_pcode_write(dev_priv,
6094                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
6095                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
6096                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
6097                                         gpu_freq);
6098         }
6099 }
6100
6101 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
6102 {
6103         u32 val, rp0;
6104
6105         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
6106
6107         switch (INTEL_INFO(dev_priv)->sseu.eu_total) {
6108         case 8:
6109                 /* (2 * 4) config */
6110                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
6111                 break;
6112         case 12:
6113                 /* (2 * 6) config */
6114                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
6115                 break;
6116         case 16:
6117                 /* (2 * 8) config */
6118         default:
6119                 /* Setting (2 * 8) Min RP0 for any other combination */
6120                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
6121                 break;
6122         }
6123
6124         rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
6125
6126         return rp0;
6127 }
6128
6129 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
6130 {
6131         u32 val, rpe;
6132
6133         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
6134         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
6135
6136         return rpe;
6137 }
6138
6139 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
6140 {
6141         u32 val, rp1;
6142
6143         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
6144         rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
6145
6146         return rp1;
6147 }
6148
6149 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
6150 {
6151         u32 val, rpn;
6152
6153         val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
6154         rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
6155                        FB_GFX_FREQ_FUSE_MASK);
6156
6157         return rpn;
6158 }
6159
6160 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
6161 {
6162         u32 val, rp1;
6163
6164         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
6165
6166         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
6167
6168         return rp1;
6169 }
6170
6171 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
6172 {
6173         u32 val, rp0;
6174
6175         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
6176
6177         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
6178         /* Clamp to max */
6179         rp0 = min_t(u32, rp0, 0xea);
6180
6181         return rp0;
6182 }
6183
6184 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
6185 {
6186         u32 val, rpe;
6187
6188         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
6189         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
6190         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
6191         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
6192
6193         return rpe;
6194 }
6195
6196 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
6197 {
6198         u32 val;
6199
6200         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
6201         /*
6202          * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
6203          * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
6204          * a BYT-M B0 the above register contains 0xbf. Moreover when setting
6205          * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
6206          * to make sure it matches what Punit accepts.
6207          */
6208         return max_t(u32, val, 0xc0);
6209 }
6210
6211 /* Check that the pctx buffer wasn't move under us. */
6212 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
6213 {
6214         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
6215
6216         WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
6217                              dev_priv->vlv_pctx->stolen->start);
6218 }
6219
6220
6221 /* Check that the pcbr address is not empty. */
6222 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
6223 {
6224         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
6225
6226         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
6227 }
6228
6229 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
6230 {
6231         struct i915_ggtt *ggtt = &dev_priv->ggtt;
6232         unsigned long pctx_paddr, paddr;
6233         u32 pcbr;
6234         int pctx_size = 32*1024;
6235
6236         pcbr = I915_READ(VLV_PCBR);
6237         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
6238                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
6239                 paddr = (dev_priv->mm.stolen_base +
6240                          (ggtt->stolen_size - pctx_size));
6241
6242                 pctx_paddr = (paddr & (~4095));
6243                 I915_WRITE(VLV_PCBR, pctx_paddr);
6244         }
6245
6246         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
6247 }
6248
6249 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
6250 {
6251         struct drm_i915_gem_object *pctx;
6252         unsigned long pctx_paddr;
6253         u32 pcbr;
6254         int pctx_size = 24*1024;
6255
6256         pcbr = I915_READ(VLV_PCBR);
6257         if (pcbr) {
6258                 /* BIOS set it up already, grab the pre-alloc'd space */
6259                 int pcbr_offset;
6260
6261                 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
6262                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
6263                                                                       pcbr_offset,
6264                                                                       I915_GTT_OFFSET_NONE,
6265                                                                       pctx_size);
6266                 goto out;
6267         }
6268
6269         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
6270
6271         /*
6272          * From the Gunit register HAS:
6273          * The Gfx driver is expected to program this register and ensure
6274          * proper allocation within Gfx stolen memory.  For example, this
6275          * register should be programmed such than the PCBR range does not
6276          * overlap with other ranges, such as the frame buffer, protected
6277          * memory, or any other relevant ranges.
6278          */
6279         pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
6280         if (!pctx) {
6281                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
6282                 goto out;
6283         }
6284
6285         pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
6286         I915_WRITE(VLV_PCBR, pctx_paddr);
6287
6288 out:
6289         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
6290         dev_priv->vlv_pctx = pctx;
6291 }
6292
6293 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
6294 {
6295         if (WARN_ON(!dev_priv->vlv_pctx))
6296                 return;
6297
6298         i915_gem_object_put(dev_priv->vlv_pctx);
6299         dev_priv->vlv_pctx = NULL;
6300 }
6301
6302 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
6303 {
6304         dev_priv->rps.gpll_ref_freq =
6305                 vlv_get_cck_clock(dev_priv, "GPLL ref",
6306                                   CCK_GPLL_CLOCK_CONTROL,
6307                                   dev_priv->czclk_freq);
6308
6309         DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
6310                          dev_priv->rps.gpll_ref_freq);
6311 }
6312
6313 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
6314 {
6315         u32 val;
6316
6317         valleyview_setup_pctx(dev_priv);
6318
6319         vlv_init_gpll_ref_freq(dev_priv);
6320
6321         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
6322         switch ((val >> 6) & 3) {
6323         case 0:
6324         case 1:
6325                 dev_priv->mem_freq = 800;
6326                 break;
6327         case 2:
6328                 dev_priv->mem_freq = 1066;
6329                 break;
6330         case 3:
6331                 dev_priv->mem_freq = 1333;
6332                 break;
6333         }
6334         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
6335
6336         dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
6337         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
6338         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
6339                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
6340                          dev_priv->rps.max_freq);
6341
6342         dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
6343         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
6344                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
6345                          dev_priv->rps.efficient_freq);
6346
6347         dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
6348         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
6349                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
6350                          dev_priv->rps.rp1_freq);
6351
6352         dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
6353         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
6354                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
6355                          dev_priv->rps.min_freq);
6356 }
6357
6358 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
6359 {
6360         u32 val;
6361
6362         cherryview_setup_pctx(dev_priv);
6363
6364         vlv_init_gpll_ref_freq(dev_priv);
6365
6366         mutex_lock(&dev_priv->sb_lock);
6367         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
6368         mutex_unlock(&dev_priv->sb_lock);
6369
6370         switch ((val >> 2) & 0x7) {
6371         case 3:
6372                 dev_priv->mem_freq = 2000;
6373                 break;
6374         default:
6375                 dev_priv->mem_freq = 1600;
6376                 break;
6377         }
6378         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
6379
6380         dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
6381         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
6382         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
6383                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
6384                          dev_priv->rps.max_freq);
6385
6386         dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
6387         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
6388                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
6389                          dev_priv->rps.efficient_freq);
6390
6391         dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
6392         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
6393                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
6394                          dev_priv->rps.rp1_freq);
6395
6396         dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
6397         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
6398                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
6399                          dev_priv->rps.min_freq);
6400
6401         WARN_ONCE((dev_priv->rps.max_freq |
6402                    dev_priv->rps.efficient_freq |
6403                    dev_priv->rps.rp1_freq |
6404                    dev_priv->rps.min_freq) & 1,
6405                   "Odd GPU freq values\n");
6406 }
6407
6408 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
6409 {
6410         valleyview_cleanup_pctx(dev_priv);
6411 }
6412
6413 static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
6414 {
6415         struct intel_engine_cs *engine;
6416         enum intel_engine_id id;
6417         u32 gtfifodbg, val, rc6_mode = 0, pcbr;
6418
6419         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
6420
6421         gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
6422                                              GT_FIFO_FREE_ENTRIES_CHV);
6423         if (gtfifodbg) {
6424                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
6425                                  gtfifodbg);
6426                 I915_WRITE(GTFIFODBG, gtfifodbg);
6427         }
6428
6429         cherryview_check_pctx(dev_priv);
6430
6431         /* 1a & 1b: Get forcewake during program sequence. Although the driver
6432          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
6433         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6434
6435         /*  Disable RC states. */
6436         I915_WRITE(GEN6_RC_CONTROL, 0);
6437
6438         /* 2a: Program RC6 thresholds.*/
6439         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
6440         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
6441         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
6442
6443         for_each_engine(engine, dev_priv, id)
6444                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6445         I915_WRITE(GEN6_RC_SLEEP, 0);
6446
6447         /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
6448         I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
6449
6450         /* allows RC6 residency counter to work */
6451         I915_WRITE(VLV_COUNTER_CONTROL,
6452                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
6453                                       VLV_MEDIA_RC6_COUNT_EN |
6454                                       VLV_RENDER_RC6_COUNT_EN));
6455
6456         /* For now we assume BIOS is allocating and populating the PCBR  */
6457         pcbr = I915_READ(VLV_PCBR);
6458
6459         /* 3: Enable RC6 */
6460         if ((intel_enable_rc6() & INTEL_RC6_ENABLE) &&
6461             (pcbr >> VLV_PCBR_ADDR_SHIFT))
6462                 rc6_mode = GEN7_RC_CTL_TO_MODE;
6463
6464         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
6465
6466         /* 4 Program defaults and thresholds for RPS*/
6467         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
6468         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
6469         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
6470         I915_WRITE(GEN6_RP_UP_EI, 66000);
6471         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
6472
6473         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
6474
6475         /* 5: Enable RPS */
6476         I915_WRITE(GEN6_RP_CONTROL,
6477                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
6478                    GEN6_RP_MEDIA_IS_GFX |
6479                    GEN6_RP_ENABLE |
6480                    GEN6_RP_UP_BUSY_AVG |
6481                    GEN6_RP_DOWN_IDLE_AVG);
6482
6483         /* Setting Fixed Bias */
6484         val = VLV_OVERRIDE_EN |
6485                   VLV_SOC_TDP_EN |
6486                   CHV_BIAS_CPU_50_SOC_50;
6487         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
6488
6489         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
6490
6491         /* RPS code assumes GPLL is used */
6492         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
6493
6494         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
6495         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
6496
6497         reset_rps(dev_priv, valleyview_set_rps);
6498
6499         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6500 }
6501
6502 static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
6503 {
6504         struct intel_engine_cs *engine;
6505         enum intel_engine_id id;
6506         u32 gtfifodbg, val, rc6_mode = 0;
6507
6508         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
6509
6510         valleyview_check_pctx(dev_priv);
6511
6512         gtfifodbg = I915_READ(GTFIFODBG);
6513         if (gtfifodbg) {
6514                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
6515                                  gtfifodbg);
6516                 I915_WRITE(GTFIFODBG, gtfifodbg);
6517         }
6518
6519         /* If VLV, Forcewake all wells, else re-direct to regular path */
6520         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6521
6522         /*  Disable RC states. */
6523         I915_WRITE(GEN6_RC_CONTROL, 0);
6524
6525         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
6526         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
6527         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
6528         I915_WRITE(GEN6_RP_UP_EI, 66000);
6529         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
6530
6531         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
6532
6533         I915_WRITE(GEN6_RP_CONTROL,
6534                    GEN6_RP_MEDIA_TURBO |
6535                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
6536                    GEN6_RP_MEDIA_IS_GFX |
6537                    GEN6_RP_ENABLE |
6538                    GEN6_RP_UP_BUSY_AVG |
6539                    GEN6_RP_DOWN_IDLE_CONT);
6540
6541         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
6542         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
6543         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
6544
6545         for_each_engine(engine, dev_priv, id)
6546                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6547
6548         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
6549
6550         /* allows RC6 residency counter to work */
6551         I915_WRITE(VLV_COUNTER_CONTROL,
6552                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
6553                                       VLV_MEDIA_RC0_COUNT_EN |
6554                                       VLV_RENDER_RC0_COUNT_EN |
6555                                       VLV_MEDIA_RC6_COUNT_EN |
6556                                       VLV_RENDER_RC6_COUNT_EN));
6557
6558         if (intel_enable_rc6() & INTEL_RC6_ENABLE)
6559                 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
6560
6561         intel_print_rc6_info(dev_priv, rc6_mode);
6562
6563         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
6564
6565         /* Setting Fixed Bias */
6566         val = VLV_OVERRIDE_EN |
6567                   VLV_SOC_TDP_EN |
6568                   VLV_BIAS_CPU_125_SOC_875;
6569         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
6570
6571         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
6572
6573         /* RPS code assumes GPLL is used */
6574         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
6575
6576         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
6577         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
6578
6579         reset_rps(dev_priv, valleyview_set_rps);
6580
6581         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6582 }
6583
6584 static unsigned long intel_pxfreq(u32 vidfreq)
6585 {
6586         unsigned long freq;
6587         int div = (vidfreq & 0x3f0000) >> 16;
6588         int post = (vidfreq & 0x3000) >> 12;
6589         int pre = (vidfreq & 0x7);
6590
6591         if (!pre)
6592                 return 0;
6593
6594         freq = ((div * 133333) / ((1<<post) * pre));
6595
6596         return freq;
6597 }
6598
6599 static const struct cparams {
6600         u16 i;
6601         u16 t;
6602         u16 m;
6603         u16 c;
6604 } cparams[] = {
6605         { 1, 1333, 301, 28664 },
6606         { 1, 1066, 294, 24460 },
6607         { 1, 800, 294, 25192 },
6608         { 0, 1333, 276, 27605 },
6609         { 0, 1066, 276, 27605 },
6610         { 0, 800, 231, 23784 },
6611 };
6612
6613 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
6614 {
6615         u64 total_count, diff, ret;
6616         u32 count1, count2, count3, m = 0, c = 0;
6617         unsigned long now = jiffies_to_msecs(jiffies), diff1;
6618         int i;
6619
6620         lockdep_assert_held(&mchdev_lock);
6621
6622         diff1 = now - dev_priv->ips.last_time1;
6623
6624         /* Prevent division-by-zero if we are asking too fast.
6625          * Also, we don't get interesting results if we are polling
6626          * faster than once in 10ms, so just return the saved value
6627          * in such cases.
6628          */
6629         if (diff1 <= 10)
6630                 return dev_priv->ips.chipset_power;
6631
6632         count1 = I915_READ(DMIEC);
6633         count2 = I915_READ(DDREC);
6634         count3 = I915_READ(CSIEC);
6635
6636         total_count = count1 + count2 + count3;
6637
6638         /* FIXME: handle per-counter overflow */
6639         if (total_count < dev_priv->ips.last_count1) {
6640                 diff = ~0UL - dev_priv->ips.last_count1;
6641                 diff += total_count;
6642         } else {
6643                 diff = total_count - dev_priv->ips.last_count1;
6644         }
6645
6646         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
6647                 if (cparams[i].i == dev_priv->ips.c_m &&
6648                     cparams[i].t == dev_priv->ips.r_t) {
6649                         m = cparams[i].m;
6650                         c = cparams[i].c;
6651                         break;
6652                 }
6653         }
6654
6655         diff = div_u64(diff, diff1);
6656         ret = ((m * diff) + c);
6657         ret = div_u64(ret, 10);
6658
6659         dev_priv->ips.last_count1 = total_count;
6660         dev_priv->ips.last_time1 = now;
6661
6662         dev_priv->ips.chipset_power = ret;
6663
6664         return ret;
6665 }
6666
6667 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
6668 {
6669         unsigned long val;
6670
6671         if (INTEL_INFO(dev_priv)->gen != 5)
6672                 return 0;
6673
6674         spin_lock_irq(&mchdev_lock);
6675
6676         val = __i915_chipset_val(dev_priv);
6677
6678         spin_unlock_irq(&mchdev_lock);
6679
6680         return val;
6681 }
6682
6683 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
6684 {
6685         unsigned long m, x, b;
6686         u32 tsfs;
6687
6688         tsfs = I915_READ(TSFS);
6689
6690         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
6691         x = I915_READ8(TR1);
6692
6693         b = tsfs & TSFS_INTR_MASK;
6694
6695         return ((m * x) / 127) - b;
6696 }
6697
6698 static int _pxvid_to_vd(u8 pxvid)
6699 {
6700         if (pxvid == 0)
6701                 return 0;
6702
6703         if (pxvid >= 8 && pxvid < 31)
6704                 pxvid = 31;
6705
6706         return (pxvid + 2) * 125;
6707 }
6708
6709 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
6710 {
6711         const int vd = _pxvid_to_vd(pxvid);
6712         const int vm = vd - 1125;
6713
6714         if (INTEL_INFO(dev_priv)->is_mobile)
6715                 return vm > 0 ? vm : 0;
6716
6717         return vd;
6718 }
6719
6720 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
6721 {
6722         u64 now, diff, diffms;
6723         u32 count;
6724
6725         lockdep_assert_held(&mchdev_lock);
6726
6727         now = ktime_get_raw_ns();
6728         diffms = now - dev_priv->ips.last_time2;
6729         do_div(diffms, NSEC_PER_MSEC);
6730
6731         /* Don't divide by 0 */
6732         if (!diffms)
6733                 return;
6734
6735         count = I915_READ(GFXEC);
6736
6737         if (count < dev_priv->ips.last_count2) {
6738                 diff = ~0UL - dev_priv->ips.last_count2;
6739                 diff += count;
6740         } else {
6741                 diff = count - dev_priv->ips.last_count2;
6742         }
6743
6744         dev_priv->ips.last_count2 = count;
6745         dev_priv->ips.last_time2 = now;
6746
6747         /* More magic constants... */
6748         diff = diff * 1181;
6749         diff = div_u64(diff, diffms * 10);
6750         dev_priv->ips.gfx_power = diff;
6751 }
6752
6753 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
6754 {
6755         if (INTEL_INFO(dev_priv)->gen != 5)
6756                 return;
6757
6758         spin_lock_irq(&mchdev_lock);
6759
6760         __i915_update_gfx_val(dev_priv);
6761
6762         spin_unlock_irq(&mchdev_lock);
6763 }
6764
6765 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
6766 {
6767         unsigned long t, corr, state1, corr2, state2;
6768         u32 pxvid, ext_v;
6769
6770         lockdep_assert_held(&mchdev_lock);
6771
6772         pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq));
6773         pxvid = (pxvid >> 24) & 0x7f;
6774         ext_v = pvid_to_extvid(dev_priv, pxvid);
6775
6776         state1 = ext_v;
6777
6778         t = i915_mch_val(dev_priv);
6779
6780         /* Revel in the empirically derived constants */
6781
6782         /* Correction factor in 1/100000 units */
6783         if (t > 80)
6784                 corr = ((t * 2349) + 135940);
6785         else if (t >= 50)
6786                 corr = ((t * 964) + 29317);
6787         else /* < 50 */
6788                 corr = ((t * 301) + 1004);
6789
6790         corr = corr * ((150142 * state1) / 10000 - 78642);
6791         corr /= 100000;
6792         corr2 = (corr * dev_priv->ips.corr);
6793
6794         state2 = (corr2 * state1) / 10000;
6795         state2 /= 100; /* convert to mW */
6796
6797         __i915_update_gfx_val(dev_priv);
6798
6799         return dev_priv->ips.gfx_power + state2;
6800 }
6801
6802 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
6803 {
6804         unsigned long val;
6805
6806         if (INTEL_INFO(dev_priv)->gen != 5)
6807                 return 0;
6808
6809         spin_lock_irq(&mchdev_lock);
6810
6811         val = __i915_gfx_val(dev_priv);
6812
6813         spin_unlock_irq(&mchdev_lock);
6814
6815         return val;
6816 }
6817
6818 /**
6819  * i915_read_mch_val - return value for IPS use
6820  *
6821  * Calculate and return a value for the IPS driver to use when deciding whether
6822  * we have thermal and power headroom to increase CPU or GPU power budget.
6823  */
6824 unsigned long i915_read_mch_val(void)
6825 {
6826         struct drm_i915_private *dev_priv;
6827         unsigned long chipset_val, graphics_val, ret = 0;
6828
6829         spin_lock_irq(&mchdev_lock);
6830         if (!i915_mch_dev)
6831                 goto out_unlock;
6832         dev_priv = i915_mch_dev;
6833
6834         chipset_val = __i915_chipset_val(dev_priv);
6835         graphics_val = __i915_gfx_val(dev_priv);
6836
6837         ret = chipset_val + graphics_val;
6838
6839 out_unlock:
6840         spin_unlock_irq(&mchdev_lock);
6841
6842         return ret;
6843 }
6844 EXPORT_SYMBOL_GPL(i915_read_mch_val);
6845
6846 /**
6847  * i915_gpu_raise - raise GPU frequency limit
6848  *
6849  * Raise the limit; IPS indicates we have thermal headroom.
6850  */
6851 bool i915_gpu_raise(void)
6852 {
6853         struct drm_i915_private *dev_priv;
6854         bool ret = true;
6855
6856         spin_lock_irq(&mchdev_lock);
6857         if (!i915_mch_dev) {
6858                 ret = false;
6859                 goto out_unlock;
6860         }
6861         dev_priv = i915_mch_dev;
6862
6863         if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
6864                 dev_priv->ips.max_delay--;
6865
6866 out_unlock:
6867         spin_unlock_irq(&mchdev_lock);
6868
6869         return ret;
6870 }
6871 EXPORT_SYMBOL_GPL(i915_gpu_raise);
6872
6873 /**
6874  * i915_gpu_lower - lower GPU frequency limit
6875  *
6876  * IPS indicates we're close to a thermal limit, so throttle back the GPU
6877  * frequency maximum.
6878  */
6879 bool i915_gpu_lower(void)
6880 {
6881         struct drm_i915_private *dev_priv;
6882         bool ret = true;
6883
6884         spin_lock_irq(&mchdev_lock);
6885         if (!i915_mch_dev) {
6886                 ret = false;
6887                 goto out_unlock;
6888         }
6889         dev_priv = i915_mch_dev;
6890
6891         if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
6892                 dev_priv->ips.max_delay++;
6893
6894 out_unlock:
6895         spin_unlock_irq(&mchdev_lock);
6896
6897         return ret;
6898 }
6899 EXPORT_SYMBOL_GPL(i915_gpu_lower);
6900
6901 /**
6902  * i915_gpu_busy - indicate GPU business to IPS
6903  *
6904  * Tell the IPS driver whether or not the GPU is busy.
6905  */
6906 bool i915_gpu_busy(void)
6907 {
6908         bool ret = false;
6909
6910         spin_lock_irq(&mchdev_lock);
6911         if (i915_mch_dev)
6912                 ret = i915_mch_dev->gt.awake;
6913         spin_unlock_irq(&mchdev_lock);
6914
6915         return ret;
6916 }
6917 EXPORT_SYMBOL_GPL(i915_gpu_busy);
6918
6919 /**
6920  * i915_gpu_turbo_disable - disable graphics turbo
6921  *
6922  * Disable graphics turbo by resetting the max frequency and setting the
6923  * current frequency to the default.
6924  */
6925 bool i915_gpu_turbo_disable(void)
6926 {
6927         struct drm_i915_private *dev_priv;
6928         bool ret = true;
6929
6930         spin_lock_irq(&mchdev_lock);
6931         if (!i915_mch_dev) {
6932                 ret = false;
6933                 goto out_unlock;
6934         }
6935         dev_priv = i915_mch_dev;
6936
6937         dev_priv->ips.max_delay = dev_priv->ips.fstart;
6938
6939         if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
6940                 ret = false;
6941
6942 out_unlock:
6943         spin_unlock_irq(&mchdev_lock);
6944
6945         return ret;
6946 }
6947 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
6948
6949 /**
6950  * Tells the intel_ips driver that the i915 driver is now loaded, if
6951  * IPS got loaded first.
6952  *
6953  * This awkward dance is so that neither module has to depend on the
6954  * other in order for IPS to do the appropriate communication of
6955  * GPU turbo limits to i915.
6956  */
6957 static void
6958 ips_ping_for_i915_load(void)
6959 {
6960         void (*link)(void);
6961
6962         link = symbol_get(ips_link_to_i915_driver);
6963         if (link) {
6964                 link();
6965                 symbol_put(ips_link_to_i915_driver);
6966         }
6967 }
6968
6969 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
6970 {
6971         /* We only register the i915 ips part with intel-ips once everything is
6972          * set up, to avoid intel-ips sneaking in and reading bogus values. */
6973         spin_lock_irq(&mchdev_lock);
6974         i915_mch_dev = dev_priv;
6975         spin_unlock_irq(&mchdev_lock);
6976
6977         ips_ping_for_i915_load();
6978 }
6979
6980 void intel_gpu_ips_teardown(void)
6981 {
6982         spin_lock_irq(&mchdev_lock);
6983         i915_mch_dev = NULL;
6984         spin_unlock_irq(&mchdev_lock);
6985 }
6986
6987 static void intel_init_emon(struct drm_i915_private *dev_priv)
6988 {
6989         u32 lcfuse;
6990         u8 pxw[16];
6991         int i;
6992
6993         /* Disable to program */
6994         I915_WRITE(ECR, 0);
6995         POSTING_READ(ECR);
6996
6997         /* Program energy weights for various events */
6998         I915_WRITE(SDEW, 0x15040d00);
6999         I915_WRITE(CSIEW0, 0x007f0000);
7000         I915_WRITE(CSIEW1, 0x1e220004);
7001         I915_WRITE(CSIEW2, 0x04000004);
7002
7003         for (i = 0; i < 5; i++)
7004                 I915_WRITE(PEW(i), 0);
7005         for (i = 0; i < 3; i++)
7006                 I915_WRITE(DEW(i), 0);
7007
7008         /* Program P-state weights to account for frequency power adjustment */
7009         for (i = 0; i < 16; i++) {
7010                 u32 pxvidfreq = I915_READ(PXVFREQ(i));
7011                 unsigned long freq = intel_pxfreq(pxvidfreq);
7012                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
7013                         PXVFREQ_PX_SHIFT;
7014                 unsigned long val;
7015
7016                 val = vid * vid;
7017                 val *= (freq / 1000);
7018                 val *= 255;
7019                 val /= (127*127*900);
7020                 if (val > 0xff)
7021                         DRM_ERROR("bad pxval: %ld\n", val);
7022                 pxw[i] = val;
7023         }
7024         /* Render standby states get 0 weight */
7025         pxw[14] = 0;
7026         pxw[15] = 0;
7027
7028         for (i = 0; i < 4; i++) {
7029                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
7030                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
7031                 I915_WRITE(PXW(i), val);
7032         }
7033
7034         /* Adjust magic regs to magic values (more experimental results) */
7035         I915_WRITE(OGW0, 0);
7036         I915_WRITE(OGW1, 0);
7037         I915_WRITE(EG0, 0x00007f00);
7038         I915_WRITE(EG1, 0x0000000e);
7039         I915_WRITE(EG2, 0x000e0000);
7040         I915_WRITE(EG3, 0x68000300);
7041         I915_WRITE(EG4, 0x42000000);
7042         I915_WRITE(EG5, 0x00140031);
7043         I915_WRITE(EG6, 0);
7044         I915_WRITE(EG7, 0);
7045
7046         for (i = 0; i < 8; i++)
7047                 I915_WRITE(PXWL(i), 0);
7048
7049         /* Enable PMON + select events */
7050         I915_WRITE(ECR, 0x80000019);
7051
7052         lcfuse = I915_READ(LCFUSE02);
7053
7054         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
7055 }
7056
7057 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
7058 {
7059         /*
7060          * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
7061          * requirement.
7062          */
7063         if (!i915.enable_rc6) {
7064                 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
7065                 intel_runtime_pm_get(dev_priv);
7066         }
7067
7068         mutex_lock(&dev_priv->drm.struct_mutex);
7069         mutex_lock(&dev_priv->rps.hw_lock);
7070
7071         /* Initialize RPS limits (for userspace) */
7072         if (IS_CHERRYVIEW(dev_priv))
7073                 cherryview_init_gt_powersave(dev_priv);
7074         else if (IS_VALLEYVIEW(dev_priv))
7075                 valleyview_init_gt_powersave(dev_priv);
7076         else if (INTEL_GEN(dev_priv) >= 6)
7077                 gen6_init_rps_frequencies(dev_priv);
7078
7079         /* Derive initial user preferences/limits from the hardware limits */
7080         dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
7081         dev_priv->rps.cur_freq = dev_priv->rps.idle_freq;
7082
7083         dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
7084         dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
7085
7086         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
7087                 dev_priv->rps.min_freq_softlimit =
7088                         max_t(int,
7089                               dev_priv->rps.efficient_freq,
7090                               intel_freq_opcode(dev_priv, 450));
7091
7092         /* After setting max-softlimit, find the overclock max freq */
7093         if (IS_GEN6(dev_priv) ||
7094             IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
7095                 u32 params = 0;
7096
7097                 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
7098                 if (params & BIT(31)) { /* OC supported */
7099                         DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
7100                                          (dev_priv->rps.max_freq & 0xff) * 50,
7101                                          (params & 0xff) * 50);
7102                         dev_priv->rps.max_freq = params & 0xff;
7103                 }
7104         }
7105
7106         /* Finally allow us to boost to max by default */
7107         dev_priv->rps.boost_freq = dev_priv->rps.max_freq;
7108
7109         mutex_unlock(&dev_priv->rps.hw_lock);
7110         mutex_unlock(&dev_priv->drm.struct_mutex);
7111
7112         intel_autoenable_gt_powersave(dev_priv);
7113 }
7114
7115 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7116 {
7117         if (IS_VALLEYVIEW(dev_priv))
7118                 valleyview_cleanup_gt_powersave(dev_priv);
7119
7120         if (!i915.enable_rc6)
7121                 intel_runtime_pm_put(dev_priv);
7122 }
7123
7124 /**
7125  * intel_suspend_gt_powersave - suspend PM work and helper threads
7126  * @dev_priv: i915 device
7127  *
7128  * We don't want to disable RC6 or other features here, we just want
7129  * to make sure any work we've queued has finished and won't bother
7130  * us while we're suspended.
7131  */
7132 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
7133 {
7134         if (INTEL_GEN(dev_priv) < 6)
7135                 return;
7136
7137         if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work))
7138                 intel_runtime_pm_put(dev_priv);
7139
7140         /* gen6_rps_idle() will be called later to disable interrupts */
7141 }
7142
7143 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
7144 {
7145         dev_priv->rps.enabled = true; /* force disabling */
7146         intel_disable_gt_powersave(dev_priv);
7147
7148         gen6_reset_rps_interrupts(dev_priv);
7149 }
7150
7151 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
7152 {
7153         if (!READ_ONCE(dev_priv->rps.enabled))
7154                 return;
7155
7156         mutex_lock(&dev_priv->rps.hw_lock);
7157
7158         if (INTEL_GEN(dev_priv) >= 9) {
7159                 gen9_disable_rc6(dev_priv);
7160                 gen9_disable_rps(dev_priv);
7161         } else if (IS_CHERRYVIEW(dev_priv)) {
7162                 cherryview_disable_rps(dev_priv);
7163         } else if (IS_VALLEYVIEW(dev_priv)) {
7164                 valleyview_disable_rps(dev_priv);
7165         } else if (INTEL_GEN(dev_priv) >= 6) {
7166                 gen6_disable_rps(dev_priv);
7167         }  else if (IS_IRONLAKE_M(dev_priv)) {
7168                 ironlake_disable_drps(dev_priv);
7169         }
7170
7171         dev_priv->rps.enabled = false;
7172         mutex_unlock(&dev_priv->rps.hw_lock);
7173 }
7174
7175 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
7176 {
7177         /* We shouldn't be disabling as we submit, so this should be less
7178          * racy than it appears!
7179          */
7180         if (READ_ONCE(dev_priv->rps.enabled))
7181                 return;
7182
7183         /* Powersaving is controlled by the host when inside a VM */
7184         if (intel_vgpu_active(dev_priv))
7185                 return;
7186
7187         mutex_lock(&dev_priv->rps.hw_lock);
7188
7189         if (IS_CHERRYVIEW(dev_priv)) {
7190                 cherryview_enable_rps(dev_priv);
7191         } else if (IS_VALLEYVIEW(dev_priv)) {
7192                 valleyview_enable_rps(dev_priv);
7193         } else if (INTEL_GEN(dev_priv) >= 9) {
7194                 gen9_enable_rc6(dev_priv);
7195                 gen9_enable_rps(dev_priv);
7196                 if (IS_GEN9_BC(dev_priv))
7197                         gen6_update_ring_freq(dev_priv);
7198         } else if (IS_BROADWELL(dev_priv)) {
7199                 gen8_enable_rps(dev_priv);
7200                 gen6_update_ring_freq(dev_priv);
7201         } else if (INTEL_GEN(dev_priv) >= 6) {
7202                 gen6_enable_rps(dev_priv);
7203                 gen6_update_ring_freq(dev_priv);
7204         } else if (IS_IRONLAKE_M(dev_priv)) {
7205                 ironlake_enable_drps(dev_priv);
7206                 intel_init_emon(dev_priv);
7207         }
7208
7209         WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
7210         WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq);
7211
7212         WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq);
7213         WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
7214
7215         dev_priv->rps.enabled = true;
7216         mutex_unlock(&dev_priv->rps.hw_lock);
7217 }
7218
7219 static void __intel_autoenable_gt_powersave(struct work_struct *work)
7220 {
7221         struct drm_i915_private *dev_priv =
7222                 container_of(work, typeof(*dev_priv), rps.autoenable_work.work);
7223         struct intel_engine_cs *rcs;
7224         struct drm_i915_gem_request *req;
7225
7226         if (READ_ONCE(dev_priv->rps.enabled))
7227                 goto out;
7228
7229         rcs = dev_priv->engine[RCS];
7230         if (rcs->last_retired_context)
7231                 goto out;
7232
7233         if (!rcs->init_context)
7234                 goto out;
7235
7236         mutex_lock(&dev_priv->drm.struct_mutex);
7237
7238         req = i915_gem_request_alloc(rcs, dev_priv->kernel_context);
7239         if (IS_ERR(req))
7240                 goto unlock;
7241
7242         if (!i915.enable_execlists && i915_switch_context(req) == 0)
7243                 rcs->init_context(req);
7244
7245         /* Mark the device busy, calling intel_enable_gt_powersave() */
7246         i915_add_request(req);
7247
7248 unlock:
7249         mutex_unlock(&dev_priv->drm.struct_mutex);
7250 out:
7251         intel_runtime_pm_put(dev_priv);
7252 }
7253
7254 void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv)
7255 {
7256         if (READ_ONCE(dev_priv->rps.enabled))
7257                 return;
7258
7259         if (IS_IRONLAKE_M(dev_priv)) {
7260                 ironlake_enable_drps(dev_priv);
7261                 intel_init_emon(dev_priv);
7262         } else if (INTEL_INFO(dev_priv)->gen >= 6) {
7263                 /*
7264                  * PCU communication is slow and this doesn't need to be
7265                  * done at any specific time, so do this out of our fast path
7266                  * to make resume and init faster.
7267                  *
7268                  * We depend on the HW RC6 power context save/restore
7269                  * mechanism when entering D3 through runtime PM suspend. So
7270                  * disable RPM until RPS/RC6 is properly setup. We can only
7271                  * get here via the driver load/system resume/runtime resume
7272                  * paths, so the _noresume version is enough (and in case of
7273                  * runtime resume it's necessary).
7274                  */
7275                 if (queue_delayed_work(dev_priv->wq,
7276                                        &dev_priv->rps.autoenable_work,
7277                                        round_jiffies_up_relative(HZ)))
7278                         intel_runtime_pm_get_noresume(dev_priv);
7279         }
7280 }
7281
7282 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
7283 {
7284         /*
7285          * On Ibex Peak and Cougar Point, we need to disable clock
7286          * gating for the panel power sequencer or it will fail to
7287          * start up when no ports are active.
7288          */
7289         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
7290 }
7291
7292 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
7293 {
7294         enum pipe pipe;
7295
7296         for_each_pipe(dev_priv, pipe) {
7297                 I915_WRITE(DSPCNTR(pipe),
7298                            I915_READ(DSPCNTR(pipe)) |
7299                            DISPPLANE_TRICKLE_FEED_DISABLE);
7300
7301                 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
7302                 POSTING_READ(DSPSURF(pipe));
7303         }
7304 }
7305
7306 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
7307 {
7308         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
7309         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
7310         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
7311
7312         /*
7313          * Don't touch WM1S_LP_EN here.
7314          * Doing so could cause underruns.
7315          */
7316 }
7317
7318 static void ironlake_init_clock_gating(struct drm_i915_private *dev_priv)
7319 {
7320         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
7321
7322         /*
7323          * Required for FBC
7324          * WaFbcDisableDpfcClockGating:ilk
7325          */
7326         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
7327                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
7328                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
7329
7330         I915_WRITE(PCH_3DCGDIS0,
7331                    MARIUNIT_CLOCK_GATE_DISABLE |
7332                    SVSMUNIT_CLOCK_GATE_DISABLE);
7333         I915_WRITE(PCH_3DCGDIS1,
7334                    VFMUNIT_CLOCK_GATE_DISABLE);
7335
7336         /*
7337          * According to the spec the following bits should be set in
7338          * order to enable memory self-refresh
7339          * The bit 22/21 of 0x42004
7340          * The bit 5 of 0x42020
7341          * The bit 15 of 0x45000
7342          */
7343         I915_WRITE(ILK_DISPLAY_CHICKEN2,
7344                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
7345                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
7346         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
7347         I915_WRITE(DISP_ARB_CTL,
7348                    (I915_READ(DISP_ARB_CTL) |
7349                     DISP_FBC_WM_DIS));
7350
7351         ilk_init_lp_watermarks(dev_priv);
7352
7353         /*
7354          * Based on the document from hardware guys the following bits
7355          * should be set unconditionally in order to enable FBC.
7356          * The bit 22 of 0x42000
7357          * The bit 22 of 0x42004
7358          * The bit 7,8,9 of 0x42020.
7359          */
7360         if (IS_IRONLAKE_M(dev_priv)) {
7361                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
7362                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
7363                            I915_READ(ILK_DISPLAY_CHICKEN1) |
7364                            ILK_FBCQ_DIS);
7365                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
7366                            I915_READ(ILK_DISPLAY_CHICKEN2) |
7367                            ILK_DPARB_GATE);
7368         }
7369
7370         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
7371
7372         I915_WRITE(ILK_DISPLAY_CHICKEN2,
7373                    I915_READ(ILK_DISPLAY_CHICKEN2) |
7374                    ILK_ELPIN_409_SELECT);
7375         I915_WRITE(_3D_CHICKEN2,
7376                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
7377                    _3D_CHICKEN2_WM_READ_PIPELINED);
7378
7379         /* WaDisableRenderCachePipelinedFlush:ilk */
7380         I915_WRITE(CACHE_MODE_0,
7381                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
7382
7383         /* WaDisable_RenderCache_OperationalFlush:ilk */
7384         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7385
7386         g4x_disable_trickle_feed(dev_priv);
7387
7388         ibx_init_clock_gating(dev_priv);
7389 }
7390
7391 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
7392 {
7393         int pipe;
7394         uint32_t val;
7395
7396         /*
7397          * On Ibex Peak and Cougar Point, we need to disable clock
7398          * gating for the panel power sequencer or it will fail to
7399          * start up when no ports are active.
7400          */
7401         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
7402                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
7403                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
7404         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
7405                    DPLS_EDP_PPS_FIX_DIS);
7406         /* The below fixes the weird display corruption, a few pixels shifted
7407          * downward, on (only) LVDS of some HP laptops with IVY.
7408          */
7409         for_each_pipe(dev_priv, pipe) {
7410                 val = I915_READ(TRANS_CHICKEN2(pipe));
7411                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
7412                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
7413                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
7414                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
7415                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
7416                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
7417                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
7418                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
7419         }
7420         /* WADP0ClockGatingDisable */
7421         for_each_pipe(dev_priv, pipe) {
7422                 I915_WRITE(TRANS_CHICKEN1(pipe),
7423                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
7424         }
7425 }
7426
7427 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
7428 {
7429         uint32_t tmp;
7430
7431         tmp = I915_READ(MCH_SSKPD);
7432         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
7433                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
7434                               tmp);
7435 }
7436
7437 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
7438 {
7439         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
7440
7441         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
7442
7443         I915_WRITE(ILK_DISPLAY_CHICKEN2,
7444                    I915_READ(ILK_DISPLAY_CHICKEN2) |
7445                    ILK_ELPIN_409_SELECT);
7446
7447         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
7448         I915_WRITE(_3D_CHICKEN,
7449                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
7450
7451         /* WaDisable_RenderCache_OperationalFlush:snb */
7452         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7453
7454         /*
7455          * BSpec recoomends 8x4 when MSAA is used,
7456          * however in practice 16x4 seems fastest.
7457          *
7458          * Note that PS/WM thread counts depend on the WIZ hashing
7459          * disable bit, which we don't touch here, but it's good
7460          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
7461          */
7462         I915_WRITE(GEN6_GT_MODE,
7463                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
7464
7465         ilk_init_lp_watermarks(dev_priv);
7466
7467         I915_WRITE(CACHE_MODE_0,
7468                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
7469
7470         I915_WRITE(GEN6_UCGCTL1,
7471                    I915_READ(GEN6_UCGCTL1) |
7472                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
7473                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
7474
7475         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
7476          * gating disable must be set.  Failure to set it results in
7477          * flickering pixels due to Z write ordering failures after
7478          * some amount of runtime in the Mesa "fire" demo, and Unigine
7479          * Sanctuary and Tropics, and apparently anything else with
7480          * alpha test or pixel discard.
7481          *
7482          * According to the spec, bit 11 (RCCUNIT) must also be set,
7483          * but we didn't debug actual testcases to find it out.
7484          *
7485          * WaDisableRCCUnitClockGating:snb
7486          * WaDisableRCPBUnitClockGating:snb
7487          */
7488         I915_WRITE(GEN6_UCGCTL2,
7489                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
7490                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
7491
7492         /* WaStripsFansDisableFastClipPerformanceFix:snb */
7493         I915_WRITE(_3D_CHICKEN3,
7494                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
7495
7496         /*
7497          * Bspec says:
7498          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
7499          * 3DSTATE_SF number of SF output attributes is more than 16."
7500          */
7501         I915_WRITE(_3D_CHICKEN3,
7502                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
7503
7504         /*
7505          * According to the spec the following bits should be
7506          * set in order to enable memory self-refresh and fbc:
7507          * The bit21 and bit22 of 0x42000
7508          * The bit21 and bit22 of 0x42004
7509          * The bit5 and bit7 of 0x42020
7510          * The bit14 of 0x70180
7511          * The bit14 of 0x71180
7512          *
7513          * WaFbcAsynchFlipDisableFbcQueue:snb
7514          */
7515         I915_WRITE(ILK_DISPLAY_CHICKEN1,
7516                    I915_READ(ILK_DISPLAY_CHICKEN1) |
7517                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
7518         I915_WRITE(ILK_DISPLAY_CHICKEN2,
7519                    I915_READ(ILK_DISPLAY_CHICKEN2) |
7520                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
7521         I915_WRITE(ILK_DSPCLK_GATE_D,
7522                    I915_READ(ILK_DSPCLK_GATE_D) |
7523                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
7524                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
7525
7526         g4x_disable_trickle_feed(dev_priv);
7527
7528         cpt_init_clock_gating(dev_priv);
7529
7530         gen6_check_mch_setup(dev_priv);
7531 }
7532
7533 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
7534 {
7535         uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
7536
7537         /*
7538          * WaVSThreadDispatchOverride:ivb,vlv
7539          *
7540          * This actually overrides the dispatch
7541          * mode for all thread types.
7542          */
7543         reg &= ~GEN7_FF_SCHED_MASK;
7544         reg |= GEN7_FF_TS_SCHED_HW;
7545         reg |= GEN7_FF_VS_SCHED_HW;
7546         reg |= GEN7_FF_DS_SCHED_HW;
7547
7548         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
7549 }
7550
7551 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
7552 {
7553         /*
7554          * TODO: this bit should only be enabled when really needed, then
7555          * disabled when not needed anymore in order to save power.
7556          */
7557         if (HAS_PCH_LPT_LP(dev_priv))
7558                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
7559                            I915_READ(SOUTH_DSPCLK_GATE_D) |
7560                            PCH_LP_PARTITION_LEVEL_DISABLE);
7561
7562         /* WADPOClockGatingDisable:hsw */
7563         I915_WRITE(TRANS_CHICKEN1(PIPE_A),
7564                    I915_READ(TRANS_CHICKEN1(PIPE_A)) |
7565                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
7566 }
7567
7568 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
7569 {
7570         if (HAS_PCH_LPT_LP(dev_priv)) {
7571                 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
7572
7573                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
7574                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
7575         }
7576 }
7577
7578 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
7579                                    int general_prio_credits,
7580                                    int high_prio_credits)
7581 {
7582         u32 misccpctl;
7583
7584         /* WaTempDisableDOPClkGating:bdw */
7585         misccpctl = I915_READ(GEN7_MISCCPCTL);
7586         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
7587
7588         I915_WRITE(GEN8_L3SQCREG1,
7589                    L3_GENERAL_PRIO_CREDITS(general_prio_credits) |
7590                    L3_HIGH_PRIO_CREDITS(high_prio_credits));
7591
7592         /*
7593          * Wait at least 100 clocks before re-enabling clock gating.
7594          * See the definition of L3SQCREG1 in BSpec.
7595          */
7596         POSTING_READ(GEN8_L3SQCREG1);
7597         udelay(1);
7598         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
7599 }
7600
7601 static void kabylake_init_clock_gating(struct drm_i915_private *dev_priv)
7602 {
7603         gen9_init_clock_gating(dev_priv);
7604
7605         /* WaDisableSDEUnitClockGating:kbl */
7606         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
7607                 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
7608                            GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
7609
7610         /* WaDisableGamClockGating:kbl */
7611         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
7612                 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
7613                            GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
7614
7615         /* WaFbcNukeOnHostModify:kbl */
7616         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
7617                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
7618 }
7619
7620 static void skylake_init_clock_gating(struct drm_i915_private *dev_priv)
7621 {
7622         gen9_init_clock_gating(dev_priv);
7623
7624         /* WAC6entrylatency:skl */
7625         I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
7626                    FBC_LLC_FULLY_OPEN);
7627
7628         /* WaFbcNukeOnHostModify:skl */
7629         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
7630                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
7631 }
7632
7633 static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv)
7634 {
7635         enum pipe pipe;
7636
7637         ilk_init_lp_watermarks(dev_priv);
7638
7639         /* WaSwitchSolVfFArbitrationPriority:bdw */
7640         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
7641
7642         /* WaPsrDPAMaskVBlankInSRD:bdw */
7643         I915_WRITE(CHICKEN_PAR1_1,
7644                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
7645
7646         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
7647         for_each_pipe(dev_priv, pipe) {
7648                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
7649                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
7650                            BDW_DPRS_MASK_VBLANK_SRD);
7651         }
7652
7653         /* WaVSRefCountFullforceMissDisable:bdw */
7654         /* WaDSRefCountFullforceMissDisable:bdw */
7655         I915_WRITE(GEN7_FF_THREAD_MODE,
7656                    I915_READ(GEN7_FF_THREAD_MODE) &
7657                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
7658
7659         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
7660                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
7661
7662         /* WaDisableSDEUnitClockGating:bdw */
7663         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
7664                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
7665
7666         /* WaProgramL3SqcReg1Default:bdw */
7667         gen8_set_l3sqc_credits(dev_priv, 30, 2);
7668
7669         /*
7670          * WaGttCachingOffByDefault:bdw
7671          * GTT cache may not work with big pages, so if those
7672          * are ever enabled GTT cache may need to be disabled.
7673          */
7674         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
7675
7676         /* WaKVMNotificationOnConfigChange:bdw */
7677         I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
7678                    | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
7679
7680         lpt_init_clock_gating(dev_priv);
7681
7682         /* WaDisableDopClockGating:bdw
7683          *
7684          * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
7685          * clock gating.
7686          */
7687         I915_WRITE(GEN6_UCGCTL1,
7688                    I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
7689 }
7690
7691 static void haswell_init_clock_gating(struct drm_i915_private *dev_priv)
7692 {
7693         ilk_init_lp_watermarks(dev_priv);
7694
7695         /* L3 caching of data atomics doesn't work -- disable it. */
7696         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
7697         I915_WRITE(HSW_ROW_CHICKEN3,
7698                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
7699
7700         /* This is required by WaCatErrorRejectionIssue:hsw */
7701         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
7702                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
7703                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
7704
7705         /* WaVSRefCountFullforceMissDisable:hsw */
7706         I915_WRITE(GEN7_FF_THREAD_MODE,
7707                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
7708
7709         /* WaDisable_RenderCache_OperationalFlush:hsw */
7710         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7711
7712         /* enable HiZ Raw Stall Optimization */
7713         I915_WRITE(CACHE_MODE_0_GEN7,
7714                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
7715
7716         /* WaDisable4x2SubspanOptimization:hsw */
7717         I915_WRITE(CACHE_MODE_1,
7718                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
7719
7720         /*
7721          * BSpec recommends 8x4 when MSAA is used,
7722          * however in practice 16x4 seems fastest.
7723          *
7724          * Note that PS/WM thread counts depend on the WIZ hashing
7725          * disable bit, which we don't touch here, but it's good
7726          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
7727          */
7728         I915_WRITE(GEN7_GT_MODE,
7729                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
7730
7731         /* WaSampleCChickenBitEnable:hsw */
7732         I915_WRITE(HALF_SLICE_CHICKEN3,
7733                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
7734
7735         /* WaSwitchSolVfFArbitrationPriority:hsw */
7736         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
7737
7738         /* WaRsPkgCStateDisplayPMReq:hsw */
7739         I915_WRITE(CHICKEN_PAR1_1,
7740                    I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
7741
7742         lpt_init_clock_gating(dev_priv);
7743 }
7744
7745 static void ivybridge_init_clock_gating(struct drm_i915_private *dev_priv)
7746 {
7747         uint32_t snpcr;
7748
7749         ilk_init_lp_watermarks(dev_priv);
7750
7751         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
7752
7753         /* WaDisableEarlyCull:ivb */
7754         I915_WRITE(_3D_CHICKEN3,
7755                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
7756
7757         /* WaDisableBackToBackFlipFix:ivb */
7758         I915_WRITE(IVB_CHICKEN3,
7759                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
7760                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
7761
7762         /* WaDisablePSDDualDispatchEnable:ivb */
7763         if (IS_IVB_GT1(dev_priv))
7764                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
7765                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
7766
7767         /* WaDisable_RenderCache_OperationalFlush:ivb */
7768         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7769
7770         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
7771         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
7772                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
7773
7774         /* WaApplyL3ControlAndL3ChickenMode:ivb */
7775         I915_WRITE(GEN7_L3CNTLREG1,
7776                         GEN7_WA_FOR_GEN7_L3_CONTROL);
7777         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
7778                    GEN7_WA_L3_CHICKEN_MODE);
7779         if (IS_IVB_GT1(dev_priv))
7780                 I915_WRITE(GEN7_ROW_CHICKEN2,
7781                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
7782         else {
7783                 /* must write both registers */
7784                 I915_WRITE(GEN7_ROW_CHICKEN2,
7785                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
7786                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
7787                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
7788         }
7789
7790         /* WaForceL3Serialization:ivb */
7791         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
7792                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
7793
7794         /*
7795          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
7796          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
7797          */
7798         I915_WRITE(GEN6_UCGCTL2,
7799                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
7800
7801         /* This is required by WaCatErrorRejectionIssue:ivb */
7802         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
7803                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
7804                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
7805
7806         g4x_disable_trickle_feed(dev_priv);
7807
7808         gen7_setup_fixed_func_scheduler(dev_priv);
7809
7810         if (0) { /* causes HiZ corruption on ivb:gt1 */
7811                 /* enable HiZ Raw Stall Optimization */
7812                 I915_WRITE(CACHE_MODE_0_GEN7,
7813                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
7814         }
7815
7816         /* WaDisable4x2SubspanOptimization:ivb */
7817         I915_WRITE(CACHE_MODE_1,
7818                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
7819
7820         /*
7821          * BSpec recommends 8x4 when MSAA is used,
7822          * however in practice 16x4 seems fastest.
7823          *
7824          * Note that PS/WM thread counts depend on the WIZ hashing
7825          * disable bit, which we don't touch here, but it's good
7826          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
7827          */
7828         I915_WRITE(GEN7_GT_MODE,
7829                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
7830
7831         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
7832         snpcr &= ~GEN6_MBC_SNPCR_MASK;
7833         snpcr |= GEN6_MBC_SNPCR_MED;
7834         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
7835
7836         if (!HAS_PCH_NOP(dev_priv))
7837                 cpt_init_clock_gating(dev_priv);
7838
7839         gen6_check_mch_setup(dev_priv);
7840 }
7841
7842 static void valleyview_init_clock_gating(struct drm_i915_private *dev_priv)
7843 {
7844         /* WaDisableEarlyCull:vlv */
7845         I915_WRITE(_3D_CHICKEN3,
7846                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
7847
7848         /* WaDisableBackToBackFlipFix:vlv */
7849         I915_WRITE(IVB_CHICKEN3,
7850                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
7851                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
7852
7853         /* WaPsdDispatchEnable:vlv */
7854         /* WaDisablePSDDualDispatchEnable:vlv */
7855         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
7856                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
7857                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
7858
7859         /* WaDisable_RenderCache_OperationalFlush:vlv */
7860         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7861
7862         /* WaForceL3Serialization:vlv */
7863         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
7864                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
7865
7866         /* WaDisableDopClockGating:vlv */
7867         I915_WRITE(GEN7_ROW_CHICKEN2,
7868                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
7869
7870         /* This is required by WaCatErrorRejectionIssue:vlv */
7871         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
7872                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
7873                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
7874
7875         gen7_setup_fixed_func_scheduler(dev_priv);
7876
7877         /*
7878          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
7879          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
7880          */
7881         I915_WRITE(GEN6_UCGCTL2,
7882                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
7883
7884         /* WaDisableL3Bank2xClockGate:vlv
7885          * Disabling L3 clock gating- MMIO 940c[25] = 1
7886          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
7887         I915_WRITE(GEN7_UCGCTL4,
7888                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
7889
7890         /*
7891          * BSpec says this must be set, even though
7892          * WaDisable4x2SubspanOptimization isn't listed for VLV.
7893          */
7894         I915_WRITE(CACHE_MODE_1,
7895                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
7896
7897         /*
7898          * BSpec recommends 8x4 when MSAA is used,
7899          * however in practice 16x4 seems fastest.
7900          *
7901          * Note that PS/WM thread counts depend on the WIZ hashing
7902          * disable bit, which we don't touch here, but it's good
7903          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
7904          */
7905         I915_WRITE(GEN7_GT_MODE,
7906                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
7907
7908         /*
7909          * WaIncreaseL3CreditsForVLVB0:vlv
7910          * This is the hardware default actually.
7911          */
7912         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
7913
7914         /*
7915          * WaDisableVLVClockGating_VBIIssue:vlv
7916          * Disable clock gating on th GCFG unit to prevent a delay
7917          * in the reporting of vblank events.
7918          */
7919         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
7920 }
7921
7922 static void cherryview_init_clock_gating(struct drm_i915_private *dev_priv)
7923 {
7924         /* WaVSRefCountFullforceMissDisable:chv */
7925         /* WaDSRefCountFullforceMissDisable:chv */
7926         I915_WRITE(GEN7_FF_THREAD_MODE,
7927                    I915_READ(GEN7_FF_THREAD_MODE) &
7928                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
7929
7930         /* WaDisableSemaphoreAndSyncFlipWait:chv */
7931         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
7932                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
7933
7934         /* WaDisableCSUnitClockGating:chv */
7935         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
7936                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
7937
7938         /* WaDisableSDEUnitClockGating:chv */
7939         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
7940                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
7941
7942         /*
7943          * WaProgramL3SqcReg1Default:chv
7944          * See gfxspecs/Related Documents/Performance Guide/
7945          * LSQC Setting Recommendations.
7946          */
7947         gen8_set_l3sqc_credits(dev_priv, 38, 2);
7948
7949         /*
7950          * GTT cache may not work with big pages, so if those
7951          * are ever enabled GTT cache may need to be disabled.
7952          */
7953         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
7954 }
7955
7956 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
7957 {
7958         uint32_t dspclk_gate;
7959
7960         I915_WRITE(RENCLK_GATE_D1, 0);
7961         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
7962                    GS_UNIT_CLOCK_GATE_DISABLE |
7963                    CL_UNIT_CLOCK_GATE_DISABLE);
7964         I915_WRITE(RAMCLK_GATE_D, 0);
7965         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
7966                 OVRUNIT_CLOCK_GATE_DISABLE |
7967                 OVCUNIT_CLOCK_GATE_DISABLE;
7968         if (IS_GM45(dev_priv))
7969                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
7970         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
7971
7972         /* WaDisableRenderCachePipelinedFlush */
7973         I915_WRITE(CACHE_MODE_0,
7974                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
7975
7976         /* WaDisable_RenderCache_OperationalFlush:g4x */
7977         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7978
7979         g4x_disable_trickle_feed(dev_priv);
7980 }
7981
7982 static void crestline_init_clock_gating(struct drm_i915_private *dev_priv)
7983 {
7984         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
7985         I915_WRITE(RENCLK_GATE_D2, 0);
7986         I915_WRITE(DSPCLK_GATE_D, 0);
7987         I915_WRITE(RAMCLK_GATE_D, 0);
7988         I915_WRITE16(DEUC, 0);
7989         I915_WRITE(MI_ARB_STATE,
7990                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7991
7992         /* WaDisable_RenderCache_OperationalFlush:gen4 */
7993         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7994 }
7995
7996 static void broadwater_init_clock_gating(struct drm_i915_private *dev_priv)
7997 {
7998         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
7999                    I965_RCC_CLOCK_GATE_DISABLE |
8000                    I965_RCPB_CLOCK_GATE_DISABLE |
8001                    I965_ISC_CLOCK_GATE_DISABLE |
8002                    I965_FBC_CLOCK_GATE_DISABLE);
8003         I915_WRITE(RENCLK_GATE_D2, 0);
8004         I915_WRITE(MI_ARB_STATE,
8005                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
8006
8007         /* WaDisable_RenderCache_OperationalFlush:gen4 */
8008         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8009 }
8010
8011 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
8012 {
8013         u32 dstate = I915_READ(D_STATE);
8014
8015         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
8016                 DSTATE_DOT_CLOCK_GATING;
8017         I915_WRITE(D_STATE, dstate);
8018
8019         if (IS_PINEVIEW(dev_priv))
8020                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
8021
8022         /* IIR "flip pending" means done if this bit is set */
8023         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
8024
8025         /* interrupts should cause a wake up from C3 */
8026         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
8027
8028         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
8029         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
8030
8031         I915_WRITE(MI_ARB_STATE,
8032                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
8033 }
8034
8035 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
8036 {
8037         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
8038
8039         /* interrupts should cause a wake up from C3 */
8040         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
8041                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
8042
8043         I915_WRITE(MEM_MODE,
8044                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
8045 }
8046
8047 static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
8048 {
8049         I915_WRITE(MEM_MODE,
8050                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
8051                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
8052 }
8053
8054 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
8055 {
8056         dev_priv->display.init_clock_gating(dev_priv);
8057 }
8058
8059 void intel_suspend_hw(struct drm_i915_private *dev_priv)
8060 {
8061         if (HAS_PCH_LPT(dev_priv))
8062                 lpt_suspend_hw(dev_priv);
8063 }
8064
8065 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
8066 {
8067         DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
8068 }
8069
8070 /**
8071  * intel_init_clock_gating_hooks - setup the clock gating hooks
8072  * @dev_priv: device private
8073  *
8074  * Setup the hooks that configure which clocks of a given platform can be
8075  * gated and also apply various GT and display specific workarounds for these
8076  * platforms. Note that some GT specific workarounds are applied separately
8077  * when GPU contexts or batchbuffers start their execution.
8078  */
8079 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
8080 {
8081         if (IS_SKYLAKE(dev_priv))
8082                 dev_priv->display.init_clock_gating = skylake_init_clock_gating;
8083         else if (IS_KABYLAKE(dev_priv))
8084                 dev_priv->display.init_clock_gating = kabylake_init_clock_gating;
8085         else if (IS_BROXTON(dev_priv))
8086                 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
8087         else if (IS_GEMINILAKE(dev_priv))
8088                 dev_priv->display.init_clock_gating = glk_init_clock_gating;
8089         else if (IS_BROADWELL(dev_priv))
8090                 dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
8091         else if (IS_CHERRYVIEW(dev_priv))
8092                 dev_priv->display.init_clock_gating = cherryview_init_clock_gating;
8093         else if (IS_HASWELL(dev_priv))
8094                 dev_priv->display.init_clock_gating = haswell_init_clock_gating;
8095         else if (IS_IVYBRIDGE(dev_priv))
8096                 dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
8097         else if (IS_VALLEYVIEW(dev_priv))
8098                 dev_priv->display.init_clock_gating = valleyview_init_clock_gating;
8099         else if (IS_GEN6(dev_priv))
8100                 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
8101         else if (IS_GEN5(dev_priv))
8102                 dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
8103         else if (IS_G4X(dev_priv))
8104                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
8105         else if (IS_I965GM(dev_priv))
8106                 dev_priv->display.init_clock_gating = crestline_init_clock_gating;
8107         else if (IS_I965G(dev_priv))
8108                 dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
8109         else if (IS_GEN3(dev_priv))
8110                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
8111         else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
8112                 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
8113         else if (IS_GEN2(dev_priv))
8114                 dev_priv->display.init_clock_gating = i830_init_clock_gating;
8115         else {
8116                 MISSING_CASE(INTEL_DEVID(dev_priv));
8117                 dev_priv->display.init_clock_gating = nop_init_clock_gating;
8118         }
8119 }
8120
8121 /* Set up chip specific power management-related functions */
8122 void intel_init_pm(struct drm_i915_private *dev_priv)
8123 {
8124         intel_fbc_init(dev_priv);
8125
8126         /* For cxsr */
8127         if (IS_PINEVIEW(dev_priv))
8128                 i915_pineview_get_mem_freq(dev_priv);
8129         else if (IS_GEN5(dev_priv))
8130                 i915_ironlake_get_mem_freq(dev_priv);
8131
8132         /* For FIFO watermark updates */
8133         if (INTEL_GEN(dev_priv) >= 9) {
8134                 skl_setup_wm_latency(dev_priv);
8135                 dev_priv->display.initial_watermarks = skl_initial_wm;
8136                 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
8137                 dev_priv->display.compute_global_watermarks = skl_compute_wm;
8138         } else if (HAS_PCH_SPLIT(dev_priv)) {
8139                 ilk_setup_wm_latency(dev_priv);
8140
8141                 if ((IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[1] &&
8142                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
8143                     (!IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[0] &&
8144                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
8145                         dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
8146                         dev_priv->display.compute_intermediate_wm =
8147                                 ilk_compute_intermediate_wm;
8148                         dev_priv->display.initial_watermarks =
8149                                 ilk_initial_watermarks;
8150                         dev_priv->display.optimize_watermarks =
8151                                 ilk_optimize_watermarks;
8152                 } else {
8153                         DRM_DEBUG_KMS("Failed to read display plane latency. "
8154                                       "Disable CxSR\n");
8155                 }
8156         } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
8157                 vlv_setup_wm_latency(dev_priv);
8158                 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
8159                 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
8160                 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
8161                 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
8162                 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
8163         } else if (IS_PINEVIEW(dev_priv)) {
8164                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
8165                                             dev_priv->is_ddr3,
8166                                             dev_priv->fsb_freq,
8167                                             dev_priv->mem_freq)) {
8168                         DRM_INFO("failed to find known CxSR latency "
8169                                  "(found ddr%s fsb freq %d, mem freq %d), "
8170                                  "disabling CxSR\n",
8171                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
8172                                  dev_priv->fsb_freq, dev_priv->mem_freq);
8173                         /* Disable CxSR and never update its watermark again */
8174                         intel_set_memory_cxsr(dev_priv, false);
8175                         dev_priv->display.update_wm = NULL;
8176                 } else
8177                         dev_priv->display.update_wm = pineview_update_wm;
8178         } else if (IS_G4X(dev_priv)) {
8179                 dev_priv->display.update_wm = g4x_update_wm;
8180         } else if (IS_GEN4(dev_priv)) {
8181                 dev_priv->display.update_wm = i965_update_wm;
8182         } else if (IS_GEN3(dev_priv)) {
8183                 dev_priv->display.update_wm = i9xx_update_wm;
8184                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
8185         } else if (IS_GEN2(dev_priv)) {
8186                 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
8187                         dev_priv->display.update_wm = i845_update_wm;
8188                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
8189                 } else {
8190                         dev_priv->display.update_wm = i9xx_update_wm;
8191                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
8192                 }
8193         } else {
8194                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
8195         }
8196 }
8197
8198 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
8199 {
8200         uint32_t flags =
8201                 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
8202
8203         switch (flags) {
8204         case GEN6_PCODE_SUCCESS:
8205                 return 0;
8206         case GEN6_PCODE_UNIMPLEMENTED_CMD:
8207         case GEN6_PCODE_ILLEGAL_CMD:
8208                 return -ENXIO;
8209         case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
8210         case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
8211                 return -EOVERFLOW;
8212         case GEN6_PCODE_TIMEOUT:
8213                 return -ETIMEDOUT;
8214         default:
8215                 MISSING_CASE(flags);
8216                 return 0;
8217         }
8218 }
8219
8220 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
8221 {
8222         uint32_t flags =
8223                 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
8224
8225         switch (flags) {
8226         case GEN6_PCODE_SUCCESS:
8227                 return 0;
8228         case GEN6_PCODE_ILLEGAL_CMD:
8229                 return -ENXIO;
8230         case GEN7_PCODE_TIMEOUT:
8231                 return -ETIMEDOUT;
8232         case GEN7_PCODE_ILLEGAL_DATA:
8233                 return -EINVAL;
8234         case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
8235                 return -EOVERFLOW;
8236         default:
8237                 MISSING_CASE(flags);
8238                 return 0;
8239         }
8240 }
8241
8242 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
8243 {
8244         int status;
8245
8246         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
8247
8248         /* GEN6_PCODE_* are outside of the forcewake domain, we can
8249          * use te fw I915_READ variants to reduce the amount of work
8250          * required when reading/writing.
8251          */
8252
8253         if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
8254                 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
8255                 return -EAGAIN;
8256         }
8257
8258         I915_WRITE_FW(GEN6_PCODE_DATA, *val);
8259         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
8260         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
8261
8262         if (__intel_wait_for_register_fw(dev_priv,
8263                                          GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
8264                                          500, 0, NULL)) {
8265                 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
8266                 return -ETIMEDOUT;
8267         }
8268
8269         *val = I915_READ_FW(GEN6_PCODE_DATA);
8270         I915_WRITE_FW(GEN6_PCODE_DATA, 0);
8271
8272         if (INTEL_GEN(dev_priv) > 6)
8273                 status = gen7_check_mailbox_status(dev_priv);
8274         else
8275                 status = gen6_check_mailbox_status(dev_priv);
8276
8277         if (status) {
8278                 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n",
8279                                  status);
8280                 return status;
8281         }
8282
8283         return 0;
8284 }
8285
8286 int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
8287                             u32 mbox, u32 val)
8288 {
8289         int status;
8290
8291         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
8292
8293         /* GEN6_PCODE_* are outside of the forcewake domain, we can
8294          * use te fw I915_READ variants to reduce the amount of work
8295          * required when reading/writing.
8296          */
8297
8298         if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
8299                 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
8300                 return -EAGAIN;
8301         }
8302
8303         I915_WRITE_FW(GEN6_PCODE_DATA, val);
8304         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
8305         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
8306
8307         if (__intel_wait_for_register_fw(dev_priv,
8308                                          GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
8309                                          500, 0, NULL)) {
8310                 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
8311                 return -ETIMEDOUT;
8312         }
8313
8314         I915_WRITE_FW(GEN6_PCODE_DATA, 0);
8315
8316         if (INTEL_GEN(dev_priv) > 6)
8317                 status = gen7_check_mailbox_status(dev_priv);
8318         else
8319                 status = gen6_check_mailbox_status(dev_priv);
8320
8321         if (status) {
8322                 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n",
8323                                  status);
8324                 return status;
8325         }
8326
8327         return 0;
8328 }
8329
8330 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
8331                                   u32 request, u32 reply_mask, u32 reply,
8332                                   u32 *status)
8333 {
8334         u32 val = request;
8335
8336         *status = sandybridge_pcode_read(dev_priv, mbox, &val);
8337
8338         return *status || ((val & reply_mask) == reply);
8339 }
8340
8341 /**
8342  * skl_pcode_request - send PCODE request until acknowledgment
8343  * @dev_priv: device private
8344  * @mbox: PCODE mailbox ID the request is targeted for
8345  * @request: request ID
8346  * @reply_mask: mask used to check for request acknowledgment
8347  * @reply: value used to check for request acknowledgment
8348  * @timeout_base_ms: timeout for polling with preemption enabled
8349  *
8350  * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
8351  * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
8352  * The request is acknowledged once the PCODE reply dword equals @reply after
8353  * applying @reply_mask. Polling is first attempted with preemption enabled
8354  * for @timeout_base_ms and if this times out for another 50 ms with
8355  * preemption disabled.
8356  *
8357  * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
8358  * other error as reported by PCODE.
8359  */
8360 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
8361                       u32 reply_mask, u32 reply, int timeout_base_ms)
8362 {
8363         u32 status;
8364         int ret;
8365
8366         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
8367
8368 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
8369                                    &status)
8370
8371         /*
8372          * Prime the PCODE by doing a request first. Normally it guarantees
8373          * that a subsequent request, at most @timeout_base_ms later, succeeds.
8374          * _wait_for() doesn't guarantee when its passed condition is evaluated
8375          * first, so send the first request explicitly.
8376          */
8377         if (COND) {
8378                 ret = 0;
8379                 goto out;
8380         }
8381         ret = _wait_for(COND, timeout_base_ms * 1000, 10);
8382         if (!ret)
8383                 goto out;
8384
8385         /*
8386          * The above can time out if the number of requests was low (2 in the
8387          * worst case) _and_ PCODE was busy for some reason even after a
8388          * (queued) request and @timeout_base_ms delay. As a workaround retry
8389          * the poll with preemption disabled to maximize the number of
8390          * requests. Increase the timeout from @timeout_base_ms to 50ms to
8391          * account for interrupts that could reduce the number of these
8392          * requests, and for any quirks of the PCODE firmware that delays
8393          * the request completion.
8394          */
8395         DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
8396         WARN_ON_ONCE(timeout_base_ms > 3);
8397         preempt_disable();
8398         ret = wait_for_atomic(COND, 50);
8399         preempt_enable();
8400
8401 out:
8402         return ret ? ret : status;
8403 #undef COND
8404 }
8405
8406 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
8407 {
8408         /*
8409          * N = val - 0xb7
8410          * Slow = Fast = GPLL ref * N
8411          */
8412         return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000);
8413 }
8414
8415 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
8416 {
8417         return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7;
8418 }
8419
8420 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
8421 {
8422         /*
8423          * N = val / 2
8424          * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
8425          */
8426         return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000);
8427 }
8428
8429 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
8430 {
8431         /* CHV needs even values */
8432         return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2;
8433 }
8434
8435 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
8436 {
8437         if (IS_GEN9(dev_priv))
8438                 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
8439                                          GEN9_FREQ_SCALER);
8440         else if (IS_CHERRYVIEW(dev_priv))
8441                 return chv_gpu_freq(dev_priv, val);
8442         else if (IS_VALLEYVIEW(dev_priv))
8443                 return byt_gpu_freq(dev_priv, val);
8444         else
8445                 return val * GT_FREQUENCY_MULTIPLIER;
8446 }
8447
8448 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
8449 {
8450         if (IS_GEN9(dev_priv))
8451                 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
8452                                          GT_FREQUENCY_MULTIPLIER);
8453         else if (IS_CHERRYVIEW(dev_priv))
8454                 return chv_freq_opcode(dev_priv, val);
8455         else if (IS_VALLEYVIEW(dev_priv))
8456                 return byt_freq_opcode(dev_priv, val);
8457         else
8458                 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
8459 }
8460
8461 struct request_boost {
8462         struct work_struct work;
8463         struct drm_i915_gem_request *req;
8464 };
8465
8466 static void __intel_rps_boost_work(struct work_struct *work)
8467 {
8468         struct request_boost *boost = container_of(work, struct request_boost, work);
8469         struct drm_i915_gem_request *req = boost->req;
8470
8471         if (!i915_gem_request_completed(req))
8472                 gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
8473
8474         i915_gem_request_put(req);
8475         kfree(boost);
8476 }
8477
8478 void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
8479 {
8480         struct request_boost *boost;
8481
8482         if (req == NULL || INTEL_GEN(req->i915) < 6)
8483                 return;
8484
8485         if (i915_gem_request_completed(req))
8486                 return;
8487
8488         boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
8489         if (boost == NULL)
8490                 return;
8491
8492         boost->req = i915_gem_request_get(req);
8493
8494         INIT_WORK(&boost->work, __intel_rps_boost_work);
8495         queue_work(req->i915->wq, &boost->work);
8496 }
8497
8498 void intel_pm_setup(struct drm_i915_private *dev_priv)
8499 {
8500         mutex_init(&dev_priv->rps.hw_lock);
8501         spin_lock_init(&dev_priv->rps.client_lock);
8502
8503         INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
8504                           __intel_autoenable_gt_powersave);
8505         INIT_LIST_HEAD(&dev_priv->rps.clients);
8506
8507         dev_priv->pm.suspended = false;
8508         atomic_set(&dev_priv->pm.wakeref_count, 0);
8509 }
8510
8511 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
8512                              const i915_reg_t reg)
8513 {
8514         u32 lower, upper, tmp;
8515         int loop = 2;
8516
8517         /* The register accessed do not need forcewake. We borrow
8518          * uncore lock to prevent concurrent access to range reg.
8519          */
8520         spin_lock_irq(&dev_priv->uncore.lock);
8521
8522         /* vlv and chv residency counters are 40 bits in width.
8523          * With a control bit, we can choose between upper or lower
8524          * 32bit window into this counter.
8525          *
8526          * Although we always use the counter in high-range mode elsewhere,
8527          * userspace may attempt to read the value before rc6 is initialised,
8528          * before we have set the default VLV_COUNTER_CONTROL value. So always
8529          * set the high bit to be safe.
8530          */
8531         I915_WRITE_FW(VLV_COUNTER_CONTROL,
8532                       _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
8533         upper = I915_READ_FW(reg);
8534         do {
8535                 tmp = upper;
8536
8537                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
8538                               _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
8539                 lower = I915_READ_FW(reg);
8540
8541                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
8542                               _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
8543                 upper = I915_READ_FW(reg);
8544         } while (upper != tmp && --loop);
8545
8546         /* Everywhere else we always use VLV_COUNTER_CONTROL with the
8547          * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
8548          * now.
8549          */
8550
8551         spin_unlock_irq(&dev_priv->uncore.lock);
8552
8553         return lower | (u64)upper << 8;
8554 }
8555
8556 u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
8557                            const i915_reg_t reg)
8558 {
8559         u64 time_hw, units, div;
8560
8561         if (!intel_enable_rc6())
8562                 return 0;
8563
8564         intel_runtime_pm_get(dev_priv);
8565
8566         /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
8567         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
8568                 units = 1000;
8569                 div = dev_priv->czclk_freq;
8570
8571                 time_hw = vlv_residency_raw(dev_priv, reg);
8572         } else if (IS_GEN9_LP(dev_priv)) {
8573                 units = 1000;
8574                 div = 1200;             /* 833.33ns */
8575
8576                 time_hw = I915_READ(reg);
8577         } else {
8578                 units = 128000; /* 1.28us */
8579                 div = 100000;
8580
8581                 time_hw = I915_READ(reg);
8582         }
8583
8584         intel_runtime_pm_put(dev_priv);
8585         return DIV_ROUND_UP_ULL(time_hw * units, div);
8586 }