]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'drm-next-3.12' of git://people.freedesktop.org/~agd5f/linux into drm-next
authorDave Airlie <airlied@redhat.com>
Sun, 1 Sep 2013 23:31:40 +0000 (09:31 +1000)
committerDave Airlie <airlied@redhat.com>
Sun, 1 Sep 2013 23:31:40 +0000 (09:31 +1000)
Alex writes:
This is the radeon drm-next request.  Big changes include:
- support for dpm on CIK parts
- support for ASPM on CIK parts
- support for berlin GPUs
- major ring handling cleanup
- remove the old 3D blit code for bo moves in favor of CP DMA or sDMA
- lots of bug fixes

[airlied: fix up a bunch of conflicts from drm_order removal]

* 'drm-next-3.12' of git://people.freedesktop.org/~agd5f/linux: (898 commits)
  drm/radeon/dpm: make sure dc performance level limits are valid (CI)
  drm/radeon/dpm: make sure dc performance level limits are valid (BTC-SI) (v2)
  drm/radeon: gcc fixes for extended dpm tables
  drm/radeon: gcc fixes for kb/kv dpm
  drm/radeon: gcc fixes for ci dpm
  drm/radeon: gcc fixes for si dpm
  drm/radeon: gcc fixes for ni dpm
  drm/radeon: gcc fixes for trinity dpm
  drm/radeon: gcc fixes for sumo dpm
  drm/radeonn: gcc fixes for rv7xx/eg/btc dpm
  drm/radeon: gcc fixes for rv6xx dpm
  drm/radeon: gcc fixes for radeon_atombios.c
  drm/radeon: enable UVD interrupts on CIK
  drm/radeon: fix init ordering for r600+
  drm/radeon/dpm: only need to reprogram uvd if uvd pg is enabled
  drm/radeon: check the return value of uvd_v1_0_start in uvd_v1_0_init
  drm/radeon: split out radeon_uvd_resume from uvd_v4_2_resume
  radeon kms: fix uninitialised hotplug work usage in r100_irq_process()
  drm/radeon/audio: set up the sads on DCE3.2 asics
  drm/radeon: fix handling of variable sized arrays for router objects
  ...

Conflicts:
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_gem_dmabuf.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/r600.c

33 files changed:
1  2 
drivers/gpu/drm/ast/ast_ttm.c
drivers/gpu/drm/cirrus/cirrus_ttm.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/gma500/psb_intel_sdvo.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_panel.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/mgag200/mgag200_ttm.c
drivers/gpu/drm/nouveau/core/subdev/mc/base.c
drivers/gpu/drm/nouveau/dispnv04/crtc.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_display.c
drivers/gpu/drm/radeon/atombios_dp.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/cik_sdma.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/ni_dma.c
drivers/gpu/drm/radeon/r100.c
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/r600_dma.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_kms.c
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/radeon/uvd_v1_0.c
include/drm/drm_edid.h
include/drm/drm_pciids.h

index 20fcf4ee3af0ac2d72970a623318adfb1b6e3958,6e8887fe6c1b44fb83d09df82d10d50483924de5..32aecb34dbced78308a393833e177fd46a1e5c9b
@@@ -148,9 -148,7 +148,9 @@@ ast_bo_evict_flags(struct ttm_buffer_ob
  
  static int ast_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
  {
 -      return 0;
 +      struct ast_bo *astbo = ast_bo(bo);
 +
 +      return drm_vma_node_verify_access(&astbo->gem.vma_node, filp);
  }
  
  static int ast_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
@@@ -323,7 -321,9 +323,8 @@@ int ast_bo_create(struct drm_device *de
                return ret;
        }
  
 -      astbo->gem.driver_private = NULL;
        astbo->bo.bdev = &ast->ttm.bdev;
+       astbo->bo.bdev->dev_mapping = dev->dev_mapping;
  
        ast_ttm_placement(astbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
  
index ae2385cc71cb6e24da13885ed7dea84fe79b7bc6,69fd8f1ac8df192729c89f4dd981a5bb5b7611db..75becdeac07d710e1322a64096df65850bf31fd4
@@@ -148,9 -148,7 +148,9 @@@ cirrus_bo_evict_flags(struct ttm_buffer
  
  static int cirrus_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
  {
 -      return 0;
 +      struct cirrus_bo *cirrusbo = cirrus_bo(bo);
 +
 +      return drm_vma_node_verify_access(&cirrusbo->gem.vma_node, filp);
  }
  
  static int cirrus_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
@@@ -328,7 -326,9 +328,8 @@@ int cirrus_bo_create(struct drm_device 
                return ret;
        }
  
 -      cirrusbo->gem.driver_private = NULL;
        cirrusbo->bo.bdev = &cirrus->ttm.bdev;
+       cirrusbo->bo.bdev->dev_mapping = dev->dev_mapping;
  
        cirrus_ttm_placement(cirrusbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
  
index a207cc3f2c57630cfd8b019d3ae3fd769fbb029c,58b4882feedfb51b5cb6521c52c7c75559d43d67..1688ff500513142d6d5072efb8061f1ae231bb3d
@@@ -125,6 -125,9 +125,9 @@@ static struct edid_quirk 
  
        /* ViewSonic VA2026w */
        { "VSC", 5020, EDID_QUIRK_FORCE_REDUCED_BLANKING },
+       /* Medion MD 30217 PG */
+       { "MED", 0x7b8, EDID_QUIRK_PREFER_LARGE_75 },
  };
  
  /*
@@@ -931,36 -934,6 +934,36 @@@ static const struct drm_display_mode ed
         .vrefresh = 100, },
  };
  
 +/*
 + * HDMI 1.4 4k modes.
 + */
 +static const struct drm_display_mode edid_4k_modes[] = {
 +      /* 1 - 3840x2160@30Hz */
 +      { DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 297000,
 +                 3840, 4016, 4104, 4400, 0,
 +                 2160, 2168, 2178, 2250, 0,
 +                 DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
 +        .vrefresh = 30, },
 +      /* 2 - 3840x2160@25Hz */
 +      { DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 297000,
 +                 3840, 4896, 4984, 5280, 0,
 +                 2160, 2168, 2178, 2250, 0,
 +                 DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
 +        .vrefresh = 25, },
 +      /* 3 - 3840x2160@24Hz */
 +      { DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 297000,
 +                 3840, 5116, 5204, 5500, 0,
 +                 2160, 2168, 2178, 2250, 0,
 +                 DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
 +        .vrefresh = 24, },
 +      /* 4 - 4096x2160@24Hz (SMPTE) */
 +      { DRM_MODE("4096x2160", DRM_MODE_TYPE_DRIVER, 297000,
 +                 4096, 5116, 5204, 5500, 0,
 +                 2160, 2168, 2178, 2250, 0,
 +                 DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
 +        .vrefresh = 24, },
 +};
 +
  /*** DDC fetch and block validation ***/
  
  static const u8 edid_header[] = {
@@@ -2317,6 -2290,7 +2320,6 @@@ add_detailed_modes(struct drm_connecto
        return closure.modes;
  }
  
 -#define HDMI_IDENTIFIER 0x000C03
  #define AUDIO_BLOCK   0x01
  #define VIDEO_BLOCK     0x02
  #define VENDOR_BLOCK    0x03
  #define EDID_CEA_YCRCB422     (1 << 4)
  #define EDID_CEA_VCDB_QS      (1 << 6)
  
 -/**
 +/*
   * Search EDID for CEA extension block.
   */
 -u8 *drm_find_cea_extension(struct edid *edid)
 +static u8 *drm_find_cea_extension(struct edid *edid)
  {
        u8 *edid_ext = NULL;
        int i;
  
        return edid_ext;
  }
 -EXPORT_SYMBOL(drm_find_cea_extension);
  
  /*
   * Calculate the alternate clock for the CEA mode
@@@ -2408,54 -2383,6 +2411,54 @@@ u8 drm_match_cea_mode(const struct drm_
  }
  EXPORT_SYMBOL(drm_match_cea_mode);
  
 +/*
 + * Calculate the alternate clock for HDMI modes (those from the HDMI vendor
 + * specific block).
 + *
 + * It's almost like cea_mode_alternate_clock(), we just need to add an
 + * exception for the VIC 4 mode (4096x2160@24Hz): no alternate clock for this
 + * one.
 + */
 +static unsigned int
 +hdmi_mode_alternate_clock(const struct drm_display_mode *hdmi_mode)
 +{
 +      if (hdmi_mode->vdisplay == 4096 && hdmi_mode->hdisplay == 2160)
 +              return hdmi_mode->clock;
 +
 +      return cea_mode_alternate_clock(hdmi_mode);
 +}
 +
 +/*
 + * drm_match_hdmi_mode - look for a HDMI mode matching given mode
 + * @to_match: display mode
 + *
 + * An HDMI mode is one defined in the HDMI vendor specific block.
 + *
 + * Returns the HDMI Video ID (VIC) of the mode or 0 if it isn't one.
 + */
 +static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match)
 +{
 +      u8 mode;
 +
 +      if (!to_match->clock)
 +              return 0;
 +
 +      for (mode = 0; mode < ARRAY_SIZE(edid_4k_modes); mode++) {
 +              const struct drm_display_mode *hdmi_mode = &edid_4k_modes[mode];
 +              unsigned int clock1, clock2;
 +
 +              /* Make sure to also match alternate clocks */
 +              clock1 = hdmi_mode->clock;
 +              clock2 = hdmi_mode_alternate_clock(hdmi_mode);
 +
 +              if ((KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock1) ||
 +                   KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock2)) &&
 +                  drm_mode_equal_no_clocks(to_match, hdmi_mode))
 +                      return mode + 1;
 +      }
 +      return 0;
 +}
 +
  static int
  add_alternate_cea_modes(struct drm_connector *connector, struct edid *edid)
  {
         * with the alternate clock for certain CEA modes.
         */
        list_for_each_entry(mode, &connector->probed_modes, head) {
 -              const struct drm_display_mode *cea_mode;
 +              const struct drm_display_mode *cea_mode = NULL;
                struct drm_display_mode *newmode;
 -              u8 cea_mode_idx = drm_match_cea_mode(mode) - 1;
 +              u8 mode_idx = drm_match_cea_mode(mode) - 1;
                unsigned int clock1, clock2;
  
 -              if (cea_mode_idx >= ARRAY_SIZE(edid_cea_modes))
 -                      continue;
 +              if (mode_idx < ARRAY_SIZE(edid_cea_modes)) {
 +                      cea_mode = &edid_cea_modes[mode_idx];
 +                      clock2 = cea_mode_alternate_clock(cea_mode);
 +              } else {
 +                      mode_idx = drm_match_hdmi_mode(mode) - 1;
 +                      if (mode_idx < ARRAY_SIZE(edid_4k_modes)) {
 +                              cea_mode = &edid_4k_modes[mode_idx];
 +                              clock2 = hdmi_mode_alternate_clock(cea_mode);
 +                      }
 +              }
  
 -              cea_mode = &edid_cea_modes[cea_mode_idx];
 +              if (!cea_mode)
 +                      continue;
  
                clock1 = cea_mode->clock;
 -              clock2 = cea_mode_alternate_clock(cea_mode);
  
                if (clock1 == clock2)
                        continue;
  }
  
  static int
 -do_cea_modes (struct drm_connector *connector, u8 *db, u8 len)
 +do_cea_modes(struct drm_connector *connector, const u8 *db, u8 len)
  {
        struct drm_device *dev = connector->dev;
 -      u8 * mode, cea_mode;
 +      const u8 *mode;
 +      u8 cea_mode;
        int modes = 0;
  
        for (mode = db; mode < db + len; mode++) {
        return modes;
  }
  
 +/*
 + * do_hdmi_vsdb_modes - Parse the HDMI Vendor Specific data block
 + * @connector: connector corresponding to the HDMI sink
 + * @db: start of the CEA vendor specific block
 + * @len: length of the CEA block payload, ie. one can access up to db[len]
 + *
 + * Parses the HDMI VSDB looking for modes to add to @connector.
 + */
 +static int
 +do_hdmi_vsdb_modes(struct drm_connector *connector, const u8 *db, u8 len)
 +{
 +      struct drm_device *dev = connector->dev;
 +      int modes = 0, offset = 0, i;
 +      u8 vic_len;
 +
 +      if (len < 8)
 +              goto out;
 +
 +      /* no HDMI_Video_Present */
 +      if (!(db[8] & (1 << 5)))
 +              goto out;
 +
 +      /* Latency_Fields_Present */
 +      if (db[8] & (1 << 7))
 +              offset += 2;
 +
 +      /* I_Latency_Fields_Present */
 +      if (db[8] & (1 << 6))
 +              offset += 2;
 +
 +      /* the declared length is not long enough for the 2 first bytes
 +       * of additional video format capabilities */
 +      offset += 2;
 +      if (len < (8 + offset))
 +              goto out;
 +
 +      vic_len = db[8 + offset] >> 5;
 +
 +      for (i = 0; i < vic_len && len >= (9 + offset + i); i++) {
 +              struct drm_display_mode *newmode;
 +              u8 vic;
 +
 +              vic = db[9 + offset + i];
 +
 +              vic--; /* VICs start at 1 */
 +              if (vic >= ARRAY_SIZE(edid_4k_modes)) {
 +                      DRM_ERROR("Unknown HDMI VIC: %d\n", vic);
 +                      continue;
 +              }
 +
 +              newmode = drm_mode_duplicate(dev, &edid_4k_modes[vic]);
 +              if (!newmode)
 +                      continue;
 +
 +              drm_mode_probed_add(connector, newmode);
 +              modes++;
 +      }
 +
 +out:
 +      return modes;
 +}
 +
  static int
  cea_db_payload_len(const u8 *db)
  {
@@@ -2643,30 -2499,14 +2646,30 @@@ cea_db_offsets(const u8 *cea, int *star
        return 0;
  }
  
 +static bool cea_db_is_hdmi_vsdb(const u8 *db)
 +{
 +      int hdmi_id;
 +
 +      if (cea_db_tag(db) != VENDOR_BLOCK)
 +              return false;
 +
 +      if (cea_db_payload_len(db) < 5)
 +              return false;
 +
 +      hdmi_id = db[1] | (db[2] << 8) | (db[3] << 16);
 +
 +      return hdmi_id == HDMI_IEEE_OUI;
 +}
 +
  #define for_each_cea_db(cea, i, start, end) \
        for ((i) = (start); (i) < (end) && (i) + cea_db_payload_len(&(cea)[(i)]) < (end); (i) += cea_db_payload_len(&(cea)[(i)]) + 1)
  
  static int
  add_cea_modes(struct drm_connector *connector, struct edid *edid)
  {
 -      u8 * cea = drm_find_cea_extension(edid);
 -      u8 * db, dbl;
 +      const u8 *cea = drm_find_cea_extension(edid);
 +      const u8 *db;
 +      u8 dbl;
        int modes = 0;
  
        if (cea && cea_revision(cea) >= 3) {
                        dbl = cea_db_payload_len(db);
  
                        if (cea_db_tag(db) == VIDEO_BLOCK)
 -                              modes += do_cea_modes (connector, db+1, dbl);
 +                              modes += do_cea_modes(connector, db + 1, dbl);
 +                      else if (cea_db_is_hdmi_vsdb(db))
 +                              modes += do_hdmi_vsdb_modes(connector, db, dbl);
                }
        }
  
@@@ -2735,6 -2573,21 +2738,6 @@@ monitor_name(struct detailed_timing *t
                *(u8 **)data = t->data.other_data.data.str.str;
  }
  
 -static bool cea_db_is_hdmi_vsdb(const u8 *db)
 -{
 -      int hdmi_id;
 -
 -      if (cea_db_tag(db) != VENDOR_BLOCK)
 -              return false;
 -
 -      if (cea_db_payload_len(db) < 5)
 -              return false;
 -
 -      hdmi_id = db[1] | (db[2] << 8) | (db[3] << 16);
 -
 -      return hdmi_id == HDMI_IDENTIFIER;
 -}
 -
  /**
   * drm_edid_to_eld - build ELD from EDID
   * @connector: connector corresponding to the HDMI/DP sink
@@@ -2881,6 -2734,58 +2884,58 @@@ int drm_edid_to_sad(struct edid *edid, 
  }
  EXPORT_SYMBOL(drm_edid_to_sad);
  
+ /**
+  * drm_edid_to_speaker_allocation - extracts Speaker Allocation Data Blocks from EDID
+  * @edid: EDID to parse
+  * @sadb: pointer to the speaker block
+  *
+  * Looks for CEA EDID block and extracts the Speaker Allocation Data Block from it.
+  * Note: returned pointer needs to be kfreed
+  *
+  * Return number of found Speaker Allocation Blocks or negative number on error.
+  */
+ int drm_edid_to_speaker_allocation(struct edid *edid, u8 **sadb)
+ {
+       int count = 0;
+       int i, start, end, dbl;
+       const u8 *cea;
+       cea = drm_find_cea_extension(edid);
+       if (!cea) {
+               DRM_DEBUG_KMS("SAD: no CEA Extension found\n");
+               return -ENOENT;
+       }
+       if (cea_revision(cea) < 3) {
+               DRM_DEBUG_KMS("SAD: wrong CEA revision\n");
+               return -ENOTSUPP;
+       }
+       if (cea_db_offsets(cea, &start, &end)) {
+               DRM_DEBUG_KMS("SAD: invalid data block offsets\n");
+               return -EPROTO;
+       }
+       for_each_cea_db(cea, i, start, end) {
+               const u8 *db = &cea[i];
+               if (cea_db_tag(db) == SPEAKER_BLOCK) {
+                       dbl = cea_db_payload_len(db);
+                       /* Speaker Allocation Data Block */
+                       if (dbl == 3) {
+                               *sadb = kmalloc(dbl, GFP_KERNEL);
+                               memcpy(*sadb, &db[1], dbl);
+                               count = dbl;
+                               break;
+                       }
+               }
+       }
+       return count;
+ }
+ EXPORT_SYMBOL(drm_edid_to_speaker_allocation);
  /**
   * drm_av_sync_delay - HDMI/DP sink audio-video sync delay in millisecond
   * @connector: connector associated with the HDMI/DP sink
@@@ -3252,10 -3157,9 +3307,10 @@@ drm_hdmi_avi_infoframe_from_display_mod
        if (err < 0)
                return err;
  
 +      if (mode->flags & DRM_MODE_FLAG_DBLCLK)
 +              frame->pixel_repeat = 1;
 +
        frame->video_code = drm_match_cea_mode(mode);
 -      if (!frame->video_code)
 -              return 0;
  
        frame->picture_aspect = HDMI_PICTURE_ASPECT_NONE;
        frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
        return 0;
  }
  EXPORT_SYMBOL(drm_hdmi_avi_infoframe_from_display_mode);
 +
 +/**
 + * drm_hdmi_vendor_infoframe_from_display_mode() - fill an HDMI infoframe with
 + * data from a DRM display mode
 + * @frame: HDMI vendor infoframe
 + * @mode: DRM display mode
 + *
 + * Note that there's is a need to send HDMI vendor infoframes only when using a
 + * 4k or stereoscopic 3D mode. So when giving any other mode as input this
 + * function will return -EINVAL, error that can be safely ignored.
 + *
 + * Returns 0 on success or a negative error code on failure.
 + */
 +int
 +drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame,
 +                                          const struct drm_display_mode *mode)
 +{
 +      int err;
 +      u8 vic;
 +
 +      if (!frame || !mode)
 +              return -EINVAL;
 +
 +      vic = drm_match_hdmi_mode(mode);
 +      if (!vic)
 +              return -EINVAL;
 +
 +      err = hdmi_vendor_infoframe_init(frame);
 +      if (err < 0)
 +              return err;
 +
 +      frame->vic = vic;
 +
 +      return 0;
 +}
 +EXPORT_SYMBOL(drm_hdmi_vendor_infoframe_from_display_mode);
index 77841a1136177cab233776da6a92c9cb9069534f,3bc8414533c9bfa235024bef3458bbcf3e119605..6f01cdf5e1250a7dfbefe7f00e8b213ba1c63833
@@@ -65,7 -65,7 +65,7 @@@ static const char *tv_format_names[] = 
  #define TV_FORMAT_NUM  (sizeof(tv_format_names) / sizeof(*tv_format_names))
  
  struct psb_intel_sdvo {
 -      struct psb_intel_encoder base;
 +      struct gma_encoder base;
  
        struct i2c_adapter *i2c;
        u8 slave_addr;
  };
  
  struct psb_intel_sdvo_connector {
 -      struct psb_intel_connector base;
 +      struct gma_connector base;
  
        /* Mark the type of connector */
        uint16_t output_flag;
@@@ -200,13 -200,13 +200,13 @@@ static struct psb_intel_sdvo *to_psb_in
  
  static struct psb_intel_sdvo *intel_attached_sdvo(struct drm_connector *connector)
  {
 -      return container_of(psb_intel_attached_encoder(connector),
 +      return container_of(gma_attached_encoder(connector),
                            struct psb_intel_sdvo, base);
  }
  
  static struct psb_intel_sdvo_connector *to_psb_intel_sdvo_connector(struct drm_connector *connector)
  {
 -      return container_of(to_psb_intel_connector(connector), struct psb_intel_sdvo_connector, base);
 +      return container_of(to_gma_connector(connector), struct psb_intel_sdvo_connector, base);
  }
  
  static bool
@@@ -500,7 -500,8 +500,8 @@@ static bool psb_intel_sdvo_read_respons
                                  &status))
                goto log_fail;
  
-       while (status == SDVO_CMD_STATUS_PENDING && retry--) {
+       while ((status == SDVO_CMD_STATUS_PENDING ||
+               status == SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED) && retry--) {
                udelay(15);
                if (!psb_intel_sdvo_read_byte(psb_intel_sdvo,
                                          SDVO_I2C_CMD_STATUS,
@@@ -987,7 -988,7 +988,7 @@@ static void psb_intel_sdvo_mode_set(str
  {
        struct drm_device *dev = encoder->dev;
        struct drm_crtc *crtc = encoder->crtc;
 -      struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
 +      struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
        struct psb_intel_sdvo *psb_intel_sdvo = to_psb_intel_sdvo(encoder);
        u32 sdvox;
        struct psb_intel_sdvo_in_out_map in_out;
        }
        sdvox |= (9 << 19) | SDVO_BORDER_ENABLE;
  
 -      if (psb_intel_crtc->pipe == 1)
 +      if (gma_crtc->pipe == 1)
                sdvox |= SDVO_PIPE_B_SELECT;
        if (psb_intel_sdvo->has_hdmi_audio)
                sdvox |= SDVO_AUDIO_ENABLE;
@@@ -1121,7 -1122,7 +1122,7 @@@ static void psb_intel_sdvo_dpms(struct 
                if ((temp & SDVO_ENABLE) == 0)
                        psb_intel_sdvo_write_sdvox(psb_intel_sdvo, temp | SDVO_ENABLE);
                for (i = 0; i < 2; i++)
 -                      psb_intel_wait_for_vblank(dev);
 +                      gma_wait_for_vblank(dev);
  
                status = psb_intel_sdvo_get_trained_inputs(psb_intel_sdvo, &input1, &input2);
                /* Warn if the device reported failure to sync.
@@@ -1836,8 -1837,10 +1837,8 @@@ done
  static void psb_intel_sdvo_save(struct drm_connector *connector)
  {
        struct drm_device *dev = connector->dev;
 -      struct psb_intel_encoder *psb_intel_encoder =
 -                                      psb_intel_attached_encoder(connector);
 -      struct psb_intel_sdvo *sdvo =
 -                              to_psb_intel_sdvo(&psb_intel_encoder->base);
 +      struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 +      struct psb_intel_sdvo *sdvo = to_psb_intel_sdvo(&gma_encoder->base);
  
        sdvo->saveSDVO = REG_READ(sdvo->sdvo_reg);
  }
  static void psb_intel_sdvo_restore(struct drm_connector *connector)
  {
        struct drm_device *dev = connector->dev;
 -      struct drm_encoder *encoder =
 -                              &psb_intel_attached_encoder(connector)->base;
 +      struct drm_encoder *encoder = &gma_attached_encoder(connector)->base;
        struct psb_intel_sdvo *sdvo = to_psb_intel_sdvo(encoder);
        struct drm_crtc *crtc = encoder->crtc;
  
  static const struct drm_encoder_helper_funcs psb_intel_sdvo_helper_funcs = {
        .dpms = psb_intel_sdvo_dpms,
        .mode_fixup = psb_intel_sdvo_mode_fixup,
 -      .prepare = psb_intel_encoder_prepare,
 +      .prepare = gma_encoder_prepare,
        .mode_set = psb_intel_sdvo_mode_set,
 -      .commit = psb_intel_encoder_commit,
 +      .commit = gma_encoder_commit,
  };
  
  static const struct drm_connector_funcs psb_intel_sdvo_connector_funcs = {
  static const struct drm_connector_helper_funcs psb_intel_sdvo_connector_helper_funcs = {
        .get_modes = psb_intel_sdvo_get_modes,
        .mode_valid = psb_intel_sdvo_mode_valid,
 -      .best_encoder = psb_intel_best_encoder,
 +      .best_encoder = gma_best_encoder,
  };
  
  static void psb_intel_sdvo_enc_destroy(struct drm_encoder *encoder)
                                 psb_intel_sdvo->sdvo_lvds_fixed_mode);
  
        i2c_del_adapter(&psb_intel_sdvo->ddc);
 -      psb_intel_encoder_destroy(encoder);
 +      gma_encoder_destroy(encoder);
  }
  
  static const struct drm_encoder_funcs psb_intel_sdvo_enc_funcs = {
@@@ -2052,7 -2056,7 +2053,7 @@@ psb_intel_sdvo_connector_init(struct ps
        connector->base.base.doublescan_allowed = 0;
        connector->base.base.display_info.subpixel_order = SubPixelHorizontalRGB;
  
 -      psb_intel_connector_attach_encoder(&connector->base, &encoder->base);
 +      gma_connector_attach_encoder(&connector->base, &encoder->base);
        drm_sysfs_connector_add(&connector->base.base);
  }
  
@@@ -2072,7 -2076,7 +2073,7 @@@ psb_intel_sdvo_dvi_init(struct psb_inte
  {
        struct drm_encoder *encoder = &psb_intel_sdvo->base.base;
        struct drm_connector *connector;
 -      struct psb_intel_connector *intel_connector;
 +      struct gma_connector *intel_connector;
        struct psb_intel_sdvo_connector *psb_intel_sdvo_connector;
  
        psb_intel_sdvo_connector = kzalloc(sizeof(struct psb_intel_sdvo_connector), GFP_KERNEL);
@@@ -2112,7 -2116,7 +2113,7 @@@ psb_intel_sdvo_tv_init(struct psb_intel
  {
        struct drm_encoder *encoder = &psb_intel_sdvo->base.base;
        struct drm_connector *connector;
 -      struct psb_intel_connector *intel_connector;
 +      struct gma_connector *intel_connector;
        struct psb_intel_sdvo_connector *psb_intel_sdvo_connector;
  
        psb_intel_sdvo_connector = kzalloc(sizeof(struct psb_intel_sdvo_connector), GFP_KERNEL);
@@@ -2151,7 -2155,7 +2152,7 @@@ psb_intel_sdvo_analog_init(struct psb_i
  {
        struct drm_encoder *encoder = &psb_intel_sdvo->base.base;
        struct drm_connector *connector;
 -      struct psb_intel_connector *intel_connector;
 +      struct gma_connector *intel_connector;
        struct psb_intel_sdvo_connector *psb_intel_sdvo_connector;
  
        psb_intel_sdvo_connector = kzalloc(sizeof(struct psb_intel_sdvo_connector), GFP_KERNEL);
@@@ -2185,7 -2189,7 +2186,7 @@@ psb_intel_sdvo_lvds_init(struct psb_int
  {
        struct drm_encoder *encoder = &psb_intel_sdvo->base.base;
        struct drm_connector *connector;
 -      struct psb_intel_connector *intel_connector;
 +      struct gma_connector *intel_connector;
        struct psb_intel_sdvo_connector *psb_intel_sdvo_connector;
  
        psb_intel_sdvo_connector = kzalloc(sizeof(struct psb_intel_sdvo_connector), GFP_KERNEL);
@@@ -2537,7 -2541,7 +2538,7 @@@ psb_intel_sdvo_init_ddc_proxy(struct ps
  bool psb_intel_sdvo_init(struct drm_device *dev, int sdvo_reg)
  {
        struct drm_psb_private *dev_priv = dev->dev_private;
 -      struct psb_intel_encoder *psb_intel_encoder;
 +      struct gma_encoder *gma_encoder;
        struct psb_intel_sdvo *psb_intel_sdvo;
        int i;
  
        }
  
        /* encoder type will be decided later */
 -      psb_intel_encoder = &psb_intel_sdvo->base;
 -      psb_intel_encoder->type = INTEL_OUTPUT_SDVO;
 -      drm_encoder_init(dev, &psb_intel_encoder->base, &psb_intel_sdvo_enc_funcs, 0);
 +      gma_encoder = &psb_intel_sdvo->base;
 +      gma_encoder->type = INTEL_OUTPUT_SDVO;
 +      drm_encoder_init(dev, &gma_encoder->base, &psb_intel_sdvo_enc_funcs, 0);
  
        /* Read the regs to test if we can talk to the device */
        for (i = 0; i < 0x40; i++) {
        else
                dev_priv->hotplug_supported_mask |= SDVOC_HOTPLUG_INT_STATUS;
  
 -      drm_encoder_helper_add(&psb_intel_encoder->base, &psb_intel_sdvo_helper_funcs);
 +      drm_encoder_helper_add(&gma_encoder->base, &psb_intel_sdvo_helper_funcs);
  
        /* In default case sdvo lvds is false */
        if (!psb_intel_sdvo_get_capabilities(psb_intel_sdvo, &psb_intel_sdvo->caps))
        return true;
  
  err:
 -      drm_encoder_cleanup(&psb_intel_encoder->base);
 +      drm_encoder_cleanup(&gma_encoder->base);
        i2c_del_adapter(&psb_intel_sdvo->ddc);
        kfree(psb_intel_sdvo);
  
index f22c81d040c043c4a9a4a53b61bbc49138327f8b,1929bffc1c77f645a920c9b4037f0b5eb646a145..52a3785a3fdfa59fbab49f9098f8448418ff2155
@@@ -144,7 -144,6 +144,7 @@@ enum intel_dpll_id 
  
  struct intel_dpll_hw_state {
        uint32_t dpll;
 +      uint32_t dpll_md;
        uint32_t fp0;
        uint32_t fp1;
  };
@@@ -157,8 -156,6 +157,8 @@@ struct intel_shared_dpll 
        /* should match the index in the dev_priv->shared_dplls array */
        enum intel_dpll_id id;
        struct intel_dpll_hw_state hw_state;
 +      void (*mode_set)(struct drm_i915_private *dev_priv,
 +                       struct intel_shared_dpll *pll);
        void (*enable)(struct drm_i915_private *dev_priv,
                       struct intel_shared_dpll *pll);
        void (*disable)(struct drm_i915_private *dev_priv,
@@@ -201,6 -198,7 +201,6 @@@ struct intel_ddi_plls 
  #define DRIVER_MINOR          6
  #define DRIVER_PATCHLEVEL     0
  
 -#define WATCH_COHERENCY       0
  #define WATCH_LISTS   0
  #define WATCH_GTT     0
  
@@@ -322,8 -320,8 +322,8 @@@ struct drm_i915_error_state 
                u32 purgeable:1;
                s32 ring:4;
                u32 cache_level:2;
 -      } *active_bo, *pinned_bo;
 -      u32 active_bo_count, pinned_bo_count;
 +      } **active_bo, **pinned_bo;
 +      u32 *active_bo_count, *pinned_bo_count;
        struct intel_overlay_error_state *overlay;
        struct intel_display_error_state *display;
  };
@@@ -358,16 -356,14 +358,16 @@@ struct drm_i915_display_funcs 
                          struct dpll *match_clock,
                          struct dpll *best_clock);
        void (*update_wm)(struct drm_device *dev);
 -      void (*update_sprite_wm)(struct drm_device *dev, int pipe,
 +      void (*update_sprite_wm)(struct drm_plane *plane,
 +                               struct drm_crtc *crtc,
                                 uint32_t sprite_width, int pixel_size,
 -                               bool enable);
 +                               bool enable, bool scaled);
        void (*modeset_global_resources)(struct drm_device *dev);
        /* Returns the active state of the crtc, and if the crtc is active,
         * fills out the pipe-config with the hw state. */
        bool (*get_pipe_config)(struct intel_crtc *,
                                struct intel_crtc_config *);
 +      void (*get_clock)(struct intel_crtc *, struct intel_crtc_config *);
        int (*crtc_mode_set)(struct drm_crtc *crtc,
                             int x, int y,
                             struct drm_framebuffer *old_fb);
        void (*init_clock_gating)(struct drm_device *dev);
        int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
                          struct drm_framebuffer *fb,
 -                        struct drm_i915_gem_object *obj);
 +                        struct drm_i915_gem_object *obj,
 +                        uint32_t flags);
        int (*update_plane)(struct drm_crtc *crtc, struct drm_framebuffer *fb,
                            int x, int y);
        void (*hpd_irq_setup)(struct drm_device *dev);
        /* pll clock increase/decrease */
  };
  
 -struct drm_i915_gt_funcs {
 +struct intel_uncore_funcs {
        void (*force_wake_get)(struct drm_i915_private *dev_priv);
        void (*force_wake_put)(struct drm_i915_private *dev_priv);
  };
  
 +struct intel_uncore {
 +      spinlock_t lock; /** lock is also taken in irq contexts. */
 +
 +      struct intel_uncore_funcs funcs;
 +
 +      unsigned fifo_count;
 +      unsigned forcewake_count;
 +};
 +
  #define DEV_INFO_FOR_EACH_FLAG(func, sep) \
        func(is_mobile) sep \
        func(is_i85x) sep \
@@@ -450,64 -436,12 +450,64 @@@ struct intel_device_info 
  
  enum i915_cache_level {
        I915_CACHE_NONE = 0,
 -      I915_CACHE_LLC,
 -      I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
 +      I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
 +      I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc
 +                            caches, eg sampler/render caches, and the
 +                            large Last-Level-Cache. LLC is coherent with
 +                            the CPU, but L3 is only visible to the GPU. */
 +      I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
  };
  
  typedef uint32_t gen6_gtt_pte_t;
  
 +struct i915_address_space {
 +      struct drm_mm mm;
 +      struct drm_device *dev;
 +      struct list_head global_link;
 +      unsigned long start;            /* Start offset always 0 for dri2 */
 +      size_t total;           /* size addr space maps (ex. 2GB for ggtt) */
 +
 +      struct {
 +              dma_addr_t addr;
 +              struct page *page;
 +      } scratch;
 +
 +      /**
 +       * List of objects currently involved in rendering.
 +       *
 +       * Includes buffers having the contents of their GPU caches
 +       * flushed, not necessarily primitives.  last_rendering_seqno
 +       * represents when the rendering involved will be completed.
 +       *
 +       * A reference is held on the buffer while on this list.
 +       */
 +      struct list_head active_list;
 +
 +      /**
 +       * LRU list of objects which are not in the ringbuffer and
 +       * are ready to unbind, but are still in the GTT.
 +       *
 +       * last_rendering_seqno is 0 while an object is in this list.
 +       *
 +       * A reference is not held on the buffer while on this list,
 +       * as merely being GTT-bound shouldn't prevent its being
 +       * freed, and we'll pull it off the list in the free path.
 +       */
 +      struct list_head inactive_list;
 +
 +      /* FIXME: Need a more generic return type */
 +      gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
 +                                   enum i915_cache_level level);
 +      void (*clear_range)(struct i915_address_space *vm,
 +                          unsigned int first_entry,
 +                          unsigned int num_entries);
 +      void (*insert_entries)(struct i915_address_space *vm,
 +                             struct sg_table *st,
 +                             unsigned int first_entry,
 +                             enum i915_cache_level cache_level);
 +      void (*cleanup)(struct i915_address_space *vm);
 +};
 +
  /* The Graphics Translation Table is the way in which GEN hardware translates a
   * Graphics Virtual Address into a Physical Address. In addition to the normal
   * collateral associated with any va->pa translations GEN hardware also has a
   * the spec.
   */
  struct i915_gtt {
 -      unsigned long start;            /* Start offset of used GTT */
 -      size_t total;                   /* Total size GTT can map */
 +      struct i915_address_space base;
        size_t stolen_size;             /* Total size of stolen memory */
  
        unsigned long mappable_end;     /* End offset that we can CPU map */
        void __iomem *gsm;
  
        bool do_idle_maps;
 -      dma_addr_t scratch_page_dma;
 -      struct page *scratch_page;
 +
 +      int mtrr;
  
        /* global gtt ops */
        int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
                          size_t *stolen, phys_addr_t *mappable_base,
                          unsigned long *mappable_end);
 -      void (*gtt_remove)(struct drm_device *dev);
 -      void (*gtt_clear_range)(struct drm_device *dev,
 -                              unsigned int first_entry,
 -                              unsigned int num_entries);
 -      void (*gtt_insert_entries)(struct drm_device *dev,
 -                                 struct sg_table *st,
 -                                 unsigned int pg_start,
 -                                 enum i915_cache_level cache_level);
 -      gen6_gtt_pte_t (*pte_encode)(struct drm_device *dev,
 -                                   dma_addr_t addr,
 -                                   enum i915_cache_level level);
  };
 -#define gtt_total_entries(gtt) ((gtt).total >> PAGE_SHIFT)
 +#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
  
 -#define I915_PPGTT_PD_ENTRIES 512
 -#define I915_PPGTT_PT_ENTRIES 1024
  struct i915_hw_ppgtt {
 -      struct drm_device *dev;
 +      struct i915_address_space base;
        unsigned num_pd_entries;
        struct page **pt_pages;
        uint32_t pd_offset;
        dma_addr_t *pt_dma_addr;
 -      dma_addr_t scratch_page_dma_addr;
  
 -      /* pte functions, mirroring the interface of the global gtt. */
 -      void (*clear_range)(struct i915_hw_ppgtt *ppgtt,
 -                          unsigned int first_entry,
 -                          unsigned int num_entries);
 -      void (*insert_entries)(struct i915_hw_ppgtt *ppgtt,
 -                             struct sg_table *st,
 -                             unsigned int pg_start,
 -                             enum i915_cache_level cache_level);
 -      gen6_gtt_pte_t (*pte_encode)(struct drm_device *dev,
 -                                   dma_addr_t addr,
 -                                   enum i915_cache_level level);
        int (*enable)(struct drm_device *dev);
 -      void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
 +};
 +
 +/**
 + * A VMA represents a GEM BO that is bound into an address space. Therefore, a
 + * VMA's presence cannot be guaranteed before binding, or after unbinding the
 + * object into/from the address space.
 + *
 + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
 + * will always be <= an objects lifetime. So object refcounting should cover us.
 + */
 +struct i915_vma {
 +      struct drm_mm_node node;
 +      struct drm_i915_gem_object *obj;
 +      struct i915_address_space *vm;
 +
 +      /** This object's place on the active/inactive lists */
 +      struct list_head mm_list;
 +
 +      struct list_head vma_link; /* Link in the object's VMA list */
 +
 +      /** This vma's place in the batchbuffer or on the eviction list */
 +      struct list_head exec_list;
 +
  };
  
  struct i915_ctx_hang_stats {
@@@ -590,48 -528,15 +590,48 @@@ struct i915_hw_context 
        struct i915_ctx_hang_stats hang_stats;
  };
  
 -enum no_fbc_reason {
 -      FBC_NO_OUTPUT, /* no outputs enabled to compress */
 -      FBC_STOLEN_TOO_SMALL, /* not enough space to hold compressed buffers */
 -      FBC_UNSUPPORTED_MODE, /* interlace or doublescanned mode */
 -      FBC_MODE_TOO_LARGE, /* mode too large for compression */
 -      FBC_BAD_PLANE, /* fbc not supported on plane */
 -      FBC_NOT_TILED, /* buffer not tiled */
 -      FBC_MULTIPLE_PIPES, /* more than one pipe active */
 -      FBC_MODULE_PARAM,
 +struct i915_fbc {
 +      unsigned long size;
 +      unsigned int fb_id;
 +      enum plane plane;
 +      int y;
 +
 +      struct drm_mm_node *compressed_fb;
 +      struct drm_mm_node *compressed_llb;
 +
 +      struct intel_fbc_work {
 +              struct delayed_work work;
 +              struct drm_crtc *crtc;
 +              struct drm_framebuffer *fb;
 +              int interval;
 +      } *fbc_work;
 +
 +      enum no_fbc_reason {
 +              FBC_OK, /* FBC is enabled */
 +              FBC_UNSUPPORTED, /* FBC is not supported by this chipset */
 +              FBC_NO_OUTPUT, /* no outputs enabled to compress */
 +              FBC_STOLEN_TOO_SMALL, /* not enough space for buffers */
 +              FBC_UNSUPPORTED_MODE, /* interlace or doublescanned mode */
 +              FBC_MODE_TOO_LARGE, /* mode too large for compression */
 +              FBC_BAD_PLANE, /* fbc not supported on plane */
 +              FBC_NOT_TILED, /* buffer not tiled */
 +              FBC_MULTIPLE_PIPES, /* more than one pipe active */
 +              FBC_MODULE_PARAM,
 +              FBC_CHIP_DEFAULT, /* disabled by default on this chip */
 +      } no_fbc_reason;
 +};
 +
 +enum no_psr_reason {
 +      PSR_NO_SOURCE, /* Not supported on platform */
 +      PSR_NO_SINK, /* Not supported by panel */
 +      PSR_MODULE_PARAM,
 +      PSR_CRTC_NOT_ACTIVE,
 +      PSR_PWR_WELL_ENABLED,
 +      PSR_NOT_TILED,
 +      PSR_SPRITE_ENABLED,
 +      PSR_S3D_ENABLED,
 +      PSR_INTERLACED_ENABLED,
 +      PSR_HSW_NOT_DDIA,
  };
  
  enum intel_pch {
@@@ -817,12 -722,12 +817,12 @@@ struct i915_suspend_saved_registers 
  };
  
  struct intel_gen6_power_mgmt {
 +      /* work and pm_iir are protected by dev_priv->irq_lock */
        struct work_struct work;
 -      struct delayed_work vlv_work;
        u32 pm_iir;
 -      /* lock - irqsave spinlock that protectects the work_struct and
 -       * pm_iir. */
 -      spinlock_t lock;
 +
 +      /* On vlv we need to manually drop to Vmin with a delayed work. */
 +      struct delayed_work vlv_work;
  
        /* The below variables an all the rps hw state are protected by
         * dev->struct mutext. */
@@@ -888,18 -793,6 +888,18 @@@ struct i915_dri1_state 
        uint32_t counter;
  };
  
 +struct i915_ums_state {
 +      /**
 +       * Flag if the X Server, and thus DRM, is not currently in
 +       * control of the device.
 +       *
 +       * This is set between LeaveVT and EnterVT.  It needs to be
 +       * replaced with a semaphore.  It also needs to be
 +       * transitioned away from for kernel modesetting.
 +       */
 +      int mm_suspended;
 +};
 +
  struct intel_l3_parity {
        u32 *remap_info;
        struct work_struct error_work;
  struct i915_gem_mm {
        /** Memory allocator for GTT stolen memory */
        struct drm_mm stolen;
 -      /** Memory allocator for GTT */
 -      struct drm_mm gtt_space;
        /** List of all objects in gtt_space. Used to restore gtt
         * mappings on resume */
        struct list_head bound_list;
        /** Usable portion of the GTT for GEM */
        unsigned long stolen_base; /* limited to low memory (32-bit) */
  
 -      int gtt_mtrr;
 -
        /** PPGTT used for aliasing the PPGTT with the GTT */
        struct i915_hw_ppgtt *aliasing_ppgtt;
  
        struct shrinker inactive_shrinker;
        bool shrinker_no_lock_stealing;
  
 -      /**
 -       * List of objects currently involved in rendering.
 -       *
 -       * Includes buffers having the contents of their GPU caches
 -       * flushed, not necessarily primitives.  last_rendering_seqno
 -       * represents when the rendering involved will be completed.
 -       *
 -       * A reference is held on the buffer while on this list.
 -       */
 -      struct list_head active_list;
 -
 -      /**
 -       * LRU list of objects which are not in the ringbuffer and
 -       * are ready to unbind, but are still in the GTT.
 -       *
 -       * last_rendering_seqno is 0 while an object is in this list.
 -       *
 -       * A reference is not held on the buffer while on this list,
 -       * as merely being GTT-bound shouldn't prevent its being
 -       * freed, and we'll pull it off the list in the free path.
 -       */
 -      struct list_head inactive_list;
 -
        /** LRU list of objects with fence regs on them. */
        struct list_head fence_list;
  
         */
        bool interruptible;
  
 -      /**
 -       * Flag if the X Server, and thus DRM, is not currently in
 -       * control of the device.
 -       *
 -       * This is set between LeaveVT and EnterVT.  It needs to be
 -       * replaced with a semaphore.  It also needs to be
 -       * transitioned away from for kernel modesetting.
 -       */
 -      int suspended;
 -
        /** Bit 6 swizzling required for X tiling */
        uint32_t bit_6_swizzle_x;
        /** Bit 6 swizzling required for Y tiling */
        struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
  
        /* accounting, useful for userland debugging */
 +      spinlock_t object_stat_lock;
        size_t object_memory;
        u32 object_count;
  };
@@@ -968,11 -897,6 +968,11 @@@ struct drm_i915_error_state_buf 
        loff_t pos;
  };
  
 +struct i915_error_state_file_priv {
 +      struct drm_device *dev;
 +      struct drm_i915_error_state *error;
 +};
 +
  struct i915_gpu_error {
        /* For hangcheck timer */
  #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
@@@ -1064,88 -988,6 +1064,88 @@@ struct intel_vbt_data 
        struct child_device_config *child_dev;
  };
  
 +enum intel_ddb_partitioning {
 +      INTEL_DDB_PART_1_2,
 +      INTEL_DDB_PART_5_6, /* IVB+ */
 +};
 +
 +struct intel_wm_level {
 +      bool enable;
 +      uint32_t pri_val;
 +      uint32_t spr_val;
 +      uint32_t cur_val;
 +      uint32_t fbc_val;
 +};
 +
 +/*
 + * This struct tracks the state needed for the Package C8+ feature.
 + *
 + * Package states C8 and deeper are really deep PC states that can only be
 + * reached when all the devices on the system allow it, so even if the graphics
 + * device allows PC8+, it doesn't mean the system will actually get to these
 + * states.
 + *
 + * Our driver only allows PC8+ when all the outputs are disabled, the power well
 + * is disabled and the GPU is idle. When these conditions are met, we manually
 + * do the other conditions: disable the interrupts, clocks and switch LCPLL
 + * refclk to Fclk.
 + *
 + * When we really reach PC8 or deeper states (not just when we allow it) we lose
 + * the state of some registers, so when we come back from PC8+ we need to
 + * restore this state. We don't get into PC8+ if we're not in RC6, so we don't
 + * need to take care of the registers kept by RC6.
 + *
 + * The interrupt disabling is part of the requirements. We can only leave the
 + * PCH HPD interrupts enabled. If we're in PC8+ and we get another interrupt we
 + * can lock the machine.
 + *
 + * Ideally every piece of our code that needs PC8+ disabled would call
 + * hsw_disable_package_c8, which would increment disable_count and prevent the
 + * system from reaching PC8+. But we don't have a symmetric way to do this for
 + * everything, so we have the requirements_met and gpu_idle variables. When we
 + * switch requirements_met or gpu_idle to true we decrease disable_count, and
 + * increase it in the opposite case. The requirements_met variable is true when
 + * all the CRTCs, encoders and the power well are disabled. The gpu_idle
 + * variable is true when the GPU is idle.
 + *
 + * In addition to everything, we only actually enable PC8+ if disable_count
 + * stays at zero for at least some seconds. This is implemented with the
 + * enable_work variable. We do this so we don't enable/disable PC8 dozens of
 + * consecutive times when all screens are disabled and some background app
 + * queries the state of our connectors, or we have some application constantly
 + * waking up to use the GPU. Only after the enable_work function actually
 + * enables PC8+ the "enable" variable will become true, which means that it can
 + * be false even if disable_count is 0.
 + *
 + * The irqs_disabled variable becomes true exactly after we disable the IRQs and
 + * goes back to false exactly before we reenable the IRQs. We use this variable
 + * to check if someone is trying to enable/disable IRQs while they're supposed
 + * to be disabled. This shouldn't happen and we'll print some error messages in
 + * case it happens, but if it actually happens we'll also update the variables
 + * inside struct regsave so when we restore the IRQs they will contain the
 + * latest expected values.
 + *
 + * For more, read "Display Sequences for Package C8" on our documentation.
 + */
 +struct i915_package_c8 {
 +      bool requirements_met;
 +      bool gpu_idle;
 +      bool irqs_disabled;
 +      /* Only true after the delayed work task actually enables it. */
 +      bool enabled;
 +      int disable_count;
 +      struct mutex lock;
 +      struct delayed_work enable_work;
 +
 +      struct {
 +              uint32_t deimr;
 +              uint32_t sdeimr;
 +              uint32_t gtimr;
 +              uint32_t gtier;
 +              uint32_t gen6_pmimr;
 +      } regsave;
 +};
 +
  typedef struct drm_i915_private {
        struct drm_device *dev;
        struct kmem_cache *slab;
  
        void __iomem *regs;
  
 -      struct drm_i915_gt_funcs gt;
 -      /** gt_fifo_count and the subsequent register write are synchronized
 -       * with dev->struct_mutex. */
 -      unsigned gt_fifo_count;
 -      /** forcewake_count is protected by gt_lock */
 -      unsigned forcewake_count;
 -      /** gt_lock is also taken in irq contexts. */
 -      spinlock_t gt_lock;
 +      struct intel_uncore uncore;
  
        struct intel_gmbus gmbus[GMBUS_NUM_PORTS];
  
        /** Cached value of IMR to avoid reads in updating the bitfield */
        u32 irq_mask;
        u32 gt_irq_mask;
 +      u32 pm_irq_mask;
  
        struct work_struct hotplug_work;
        bool enable_hotplug_processing;
  
        int num_plane;
  
 -      unsigned long cfb_size;
 -      unsigned int cfb_fb;
 -      enum plane cfb_plane;
 -      int cfb_y;
 -      struct intel_fbc_work *fbc_work;
 -
 +      struct i915_fbc fbc;
        struct intel_opregion opregion;
        struct intel_vbt_data vbt;
  
        } backlight;
  
        /* LVDS info */
 -      struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
 -      struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
        bool no_aux_handshake;
  
        struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
        enum modeset_restore modeset_restore;
        struct mutex modeset_restore_lock;
  
 -      struct i915_gtt gtt;
 +      struct list_head vm_list; /* Global list of all address spaces */
 +      struct i915_gtt gtt; /* VMA representing the global address space */
  
        struct i915_gem_mm mm;
  
  
        struct intel_l3_parity l3_parity;
  
 +      /* Cannot be determined by PCIID. You must always read a register. */
 +      size_t ellc_size;
 +
        /* gen6+ rps state */
        struct intel_gen6_power_mgmt rps;
  
        /* Haswell power well */
        struct i915_power_well power_well;
  
 -      enum no_fbc_reason no_fbc_reason;
 -
 -      struct drm_mm_node *compressed_fb;
 -      struct drm_mm_node *compressed_llb;
 +      enum no_psr_reason no_psr_reason;
  
        struct i915_gpu_error gpu_error;
  
  
        struct i915_suspend_saved_registers regfile;
  
 +      struct {
 +              /*
 +               * Raw watermark latency values:
 +               * in 0.1us units for WM0,
 +               * in 0.5us units for WM1+.
 +               */
 +              /* primary */
 +              uint16_t pri_latency[5];
 +              /* sprite */
 +              uint16_t spr_latency[5];
 +              /* cursor */
 +              uint16_t cur_latency[5];
 +      } wm;
 +
 +      struct i915_package_c8 pc8;
 +
        /* Old dri1 support infrastructure, beware the dragons ya fools entering
         * here! */
        struct i915_dri1_state dri1;
 +      /* Old ums support infrastructure, same warning applies. */
 +      struct i915_ums_state ums;
  } drm_i915_private_t;
  
 +static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
 +{
 +      return dev->dev_private;
 +}
 +
  /* Iterate over initialised rings */
  #define for_each_ring(ring__, dev_priv__, i__) \
        for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \
@@@ -1356,7 -1187,7 +1356,7 @@@ enum hdmi_force_audio 
        HDMI_AUDIO_ON,                  /* force turn on HDMI audio */
  };
  
 -#define I915_GTT_RESERVED ((struct drm_mm_node *)0x1)
 +#define I915_GTT_OFFSET_NONE ((u32)-1)
  
  struct drm_i915_gem_object_ops {
        /* Interface between the GEM object and its backing storage.
@@@ -1381,16 -1212,15 +1381,16 @@@ struct drm_i915_gem_object 
  
        const struct drm_i915_gem_object_ops *ops;
  
 -      /** Current space allocated to this object in the GTT, if any. */
 -      struct drm_mm_node *gtt_space;
 +      /** List of VMAs backed by this object */
 +      struct list_head vma_list;
 +
        /** Stolen memory for this object, instead of being backed by shmem. */
        struct drm_mm_node *stolen;
        struct list_head global_list;
  
 -      /** This object's place on the active/inactive lists */
        struct list_head ring_list;
 -      struct list_head mm_list;
 +      /** Used in execbuf to temporarily hold a ref */
 +      struct list_head obj_exec_link;
        /** This object's place in the batchbuffer or on the eviction list */
        struct list_head exec_list;
  
         */
        unsigned int fault_mappable:1;
        unsigned int pin_mappable:1;
 +      unsigned int pin_display:1;
  
        /*
         * Is the GPU currently using a fence to access this buffer,
        unsigned int pending_fenced_gpu_access:1;
        unsigned int fenced_gpu_access:1;
  
 -      unsigned int cache_level:2;
 +      unsigned int cache_level:3;
  
        unsigned int has_aliasing_ppgtt_mapping:1;
        unsigned int has_global_gtt_mapping:1;
        unsigned long exec_handle;
        struct drm_i915_gem_exec_object2 *exec_entry;
  
 -      /**
 -       * Current offset of the object in GTT space.
 -       *
 -       * This is the same as gtt_space->start
 -       */
 -      uint32_t gtt_offset;
 -
        struct intel_ring_buffer *ring;
  
        /** Breadcrumb of last rendering to the buffer. */
@@@ -1560,7 -1396,7 +1560,7 @@@ struct drm_i915_file_private 
        struct i915_ctx_hang_stats hang_stats;
  };
  
 -#define INTEL_INFO(dev)       (((struct drm_i915_private *) (dev)->dev_private)->info)
 +#define INTEL_INFO(dev)       (to_i915(dev)->info)
  
  #define IS_I830(dev)          ((dev)->pci_device == 0x3577)
  #define IS_845G(dev)          ((dev)->pci_device == 0x2562)
  #define IS_PINEVIEW_M(dev)    ((dev)->pci_device == 0xa011)
  #define IS_PINEVIEW(dev)      (INTEL_INFO(dev)->is_pineview)
  #define IS_G33(dev)           (INTEL_INFO(dev)->is_g33)
 -#define IS_IRONLAKE_D(dev)    ((dev)->pci_device == 0x0042)
  #define IS_IRONLAKE_M(dev)    ((dev)->pci_device == 0x0046)
  #define IS_IVYBRIDGE(dev)     (INTEL_INFO(dev)->is_ivybridge)
  #define IS_IVB_GT1(dev)               ((dev)->pci_device == 0x0156 || \
  #define IS_VALLEYVIEW(dev)    (INTEL_INFO(dev)->is_valleyview)
  #define IS_HASWELL(dev)       (INTEL_INFO(dev)->is_haswell)
  #define IS_MOBILE(dev)                (INTEL_INFO(dev)->is_mobile)
 +#define IS_HSW_EARLY_SDV(dev) (IS_HASWELL(dev) && \
 +                               ((dev)->pci_device & 0xFF00) == 0x0C00)
  #define IS_ULT(dev)           (IS_HASWELL(dev) && \
                                 ((dev)->pci_device & 0xFF00) == 0x0A00)
  
  #define HAS_BLT(dev)            (INTEL_INFO(dev)->has_blt_ring)
  #define HAS_VEBOX(dev)          (INTEL_INFO(dev)->has_vebox_ring)
  #define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
 +#define HAS_WT(dev)            (IS_HASWELL(dev) && to_i915(dev)->ellc_size)
  #define I915_NEED_GFX_HWS(dev)        (INTEL_INFO(dev)->need_gfx_hws)
  
  #define HAS_HW_CONTEXTS(dev)  (INTEL_INFO(dev)->gen >= 6)
  #define SUPPORTS_EDP(dev)             (IS_IRONLAKE_M(dev))
  #define SUPPORTS_TV(dev)              (INTEL_INFO(dev)->supports_tv)
  #define I915_HAS_HOTPLUG(dev)          (INTEL_INFO(dev)->has_hotplug)
 -/* dsparb controlled by hw only */
 -#define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IRONLAKE(dev))
  
  #define HAS_FW_BLC(dev) (INTEL_INFO(dev)->gen > 2)
  #define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr)
  
  #define HAS_IPS(dev)          (IS_ULT(dev))
  
 -#define HAS_PIPE_CONTROL(dev) (INTEL_INFO(dev)->gen >= 5)
 -
  #define HAS_DDI(dev)          (INTEL_INFO(dev)->has_ddi)
  #define HAS_POWER_WELL(dev)   (IS_HASWELL(dev))
  #define HAS_FPGA_DBG_UNCLAIMED(dev)   (INTEL_INFO(dev)->has_fpga_dbg)
  #define INTEL_PCH_LPT_DEVICE_ID_TYPE          0x8c00
  #define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE               0x9c00
  
 -#define INTEL_PCH_TYPE(dev) (((struct drm_i915_private *)(dev)->dev_private)->pch_type)
 +#define INTEL_PCH_TYPE(dev) (to_i915(dev)->pch_type)
  #define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
  #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
  #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
  #define INTEL_RC6p_ENABLE                     (1<<1)
  #define INTEL_RC6pp_ENABLE                    (1<<2)
  
 -extern struct drm_ioctl_desc i915_ioctls[];
 +extern const struct drm_ioctl_desc i915_ioctls[];
  extern int i915_max_ioctl;
  extern unsigned int i915_fbpercrtc __always_unused;
  extern int i915_panel_ignore_lid __read_mostly;
@@@ -1702,14 -1540,9 +1702,14 @@@ extern int i915_enable_rc6 __read_mostl
  extern int i915_enable_fbc __read_mostly;
  extern bool i915_enable_hangcheck __read_mostly;
  extern int i915_enable_ppgtt __read_mostly;
 +extern int i915_enable_psr __read_mostly;
  extern unsigned int i915_preliminary_hw_support __read_mostly;
  extern int i915_disable_power_well __read_mostly;
  extern int i915_enable_ips __read_mostly;
 +extern bool i915_fastboot __read_mostly;
 +extern int i915_enable_pc8 __read_mostly;
 +extern int i915_pc8_timeout __read_mostly;
 +extern bool i915_prefault_disable __read_mostly;
  
  extern int i915_suspend(struct drm_device *dev, pm_message_t state);
  extern int i915_resume(struct drm_device *dev);
@@@ -1745,18 -1578,16 +1745,19 @@@ extern void i915_update_gfx_val(struct 
  extern void intel_console_resume(struct work_struct *work);
  
  /* i915_irq.c */
 -void i915_hangcheck_elapsed(unsigned long data);
 +void i915_queue_hangcheck(struct drm_device *dev);
  void i915_handle_error(struct drm_device *dev, bool wedged);
  
  extern void intel_irq_init(struct drm_device *dev);
+ extern void intel_pm_init(struct drm_device *dev);
  extern void intel_hpd_init(struct drm_device *dev);
 -extern void intel_gt_init(struct drm_device *dev);
 -extern void intel_gt_sanitize(struct drm_device *dev);
 +extern void intel_pm_init(struct drm_device *dev);
  
 -void i915_error_state_free(struct kref *error_ref);
 +extern void intel_uncore_sanitize(struct drm_device *dev);
 +extern void intel_uncore_early_sanitize(struct drm_device *dev);
 +extern void intel_uncore_init(struct drm_device *dev);
 +extern void intel_uncore_clear_errors(struct drm_device *dev);
 +extern void intel_uncore_check_errors(struct drm_device *dev);
  
  void
  i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
  void
  i915_disable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
  
 -#ifdef CONFIG_DEBUG_FS
 -extern void i915_destroy_error_state(struct drm_device *dev);
 -#else
 -#define i915_destroy_error_state(x)
 -#endif
 -
 -
  /* i915_gem.c */
  int i915_gem_init_ioctl(struct drm_device *dev, void *data,
                        struct drm_file *file_priv);
@@@ -1820,18 -1658,13 +1821,18 @@@ void i915_gem_object_init(struct drm_i9
  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
                                                  size_t size);
  void i915_gem_free_object(struct drm_gem_object *obj);
 +struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
 +                                   struct i915_address_space *vm);
 +void i915_gem_vma_destroy(struct i915_vma *vma);
  
  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 +                                   struct i915_address_space *vm,
                                     uint32_t alignment,
                                     bool map_and_fenceable,
                                     bool nonblocking);
  void i915_gem_object_unpin(struct drm_i915_gem_object *obj);
 -int __must_check i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 +int __must_check i915_vma_unbind(struct i915_vma *vma);
 +int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj);
  int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
  void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
  void i915_gem_lastclose(struct drm_device *dev);
@@@ -1868,6 -1701,8 +1869,6 @@@ int i915_gem_dumb_create(struct drm_fil
                         struct drm_mode_create_dumb *args);
  int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
                      uint32_t handle, uint64_t *offset);
 -int i915_gem_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
 -                        uint32_t handle);
  /**
   * Returns true if seq1 is later than seq2.
   */
@@@ -1919,7 -1754,10 +1920,7 @@@ static inline bool i915_terminally_wedg
  }
  
  void i915_gem_reset(struct drm_device *dev);
 -void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
 -int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
 -                                          uint32_t read_domains,
 -                                          uint32_t write_domain);
 +bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
  int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
  int __must_check i915_gem_init(struct drm_device *dev);
  int __must_check i915_gem_init_hw(struct drm_device *dev);
@@@ -1946,7 -1784,6 +1947,7 @@@ int __must_chec
  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
                                     u32 alignment,
                                     struct intel_ring_buffer *pipelined);
 +void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj);
  int i915_gem_attach_phys_object(struct drm_device *dev,
                                struct drm_i915_gem_object *obj,
                                int id,
@@@ -1973,56 -1810,6 +1974,56 @@@ struct dma_buf *i915_gem_prime_export(s
  
  void i915_gem_restore_fences(struct drm_device *dev);
  
 +unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
 +                                struct i915_address_space *vm);
 +bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o);
 +bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
 +                      struct i915_address_space *vm);
 +unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
 +                              struct i915_address_space *vm);
 +struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 +                                   struct i915_address_space *vm);
 +struct i915_vma *
 +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 +                                struct i915_address_space *vm);
 +/* Some GGTT VM helpers */
 +#define obj_to_ggtt(obj) \
 +      (&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
 +static inline bool i915_is_ggtt(struct i915_address_space *vm)
 +{
 +      struct i915_address_space *ggtt =
 +              &((struct drm_i915_private *)(vm)->dev->dev_private)->gtt.base;
 +      return vm == ggtt;
 +}
 +
 +static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
 +{
 +      return i915_gem_obj_bound(obj, obj_to_ggtt(obj));
 +}
 +
 +static inline unsigned long
 +i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *obj)
 +{
 +      return i915_gem_obj_offset(obj, obj_to_ggtt(obj));
 +}
 +
 +static inline unsigned long
 +i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj)
 +{
 +      return i915_gem_obj_size(obj, obj_to_ggtt(obj));
 +}
 +
 +static inline int __must_check
 +i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 +                    uint32_t alignment,
 +                    bool map_and_fenceable,
 +                    bool nonblocking)
 +{
 +      return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment,
 +                                 map_and_fenceable, nonblocking);
 +}
 +#undef obj_to_ggtt
 +
  /* i915_gem_context.c */
  void i915_gem_context_init(struct drm_device *dev);
  void i915_gem_context_fini(struct drm_device *dev);
@@@ -2041,7 -1828,7 +2042,7 @@@ static inline void i915_gem_context_unr
  }
  
  struct i915_ctx_hang_stats * __must_check
 -i915_gem_context_get_hang_stats(struct intel_ring_buffer *ring,
 +i915_gem_context_get_hang_stats(struct drm_device *dev,
                                struct drm_file *file,
                                u32 id);
  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
@@@ -2075,9 -1862,7 +2076,9 @@@ static inline void i915_gem_chipset_flu
  
  
  /* i915_gem_evict.c */
 -int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size,
 +int __must_check i915_gem_evict_something(struct drm_device *dev,
 +                                        struct i915_address_space *vm,
 +                                        int min_size,
                                          unsigned alignment,
                                          unsigned cache_level,
                                          bool mappable,
@@@ -2099,7 -1884,7 +2100,7 @@@ i915_gem_object_create_stolen_for_preal
  void i915_gem_object_release_stolen(struct drm_i915_gem_object *obj);
  
  /* i915_gem_tiling.c */
 -inline static bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 +static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
  {
        drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
  
@@@ -2112,36 -1897,23 +2113,36 @@@ void i915_gem_object_do_bit_17_swizzle(
  void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
  
  /* i915_gem_debug.c */
 -void i915_gem_dump_object(struct drm_i915_gem_object *obj, int len,
 -                        const char *where, uint32_t mark);
  #if WATCH_LISTS
  int i915_verify_lists(struct drm_device *dev);
  #else
  #define i915_verify_lists(dev) 0
  #endif
 -void i915_gem_object_check_coherency(struct drm_i915_gem_object *obj,
 -                                   int handle);
 -void i915_gem_dump_object(struct drm_i915_gem_object *obj, int len,
 -                        const char *where, uint32_t mark);
  
  /* i915_debugfs.c */
  int i915_debugfs_init(struct drm_minor *minor);
  void i915_debugfs_cleanup(struct drm_minor *minor);
 +
 +/* i915_gpu_error.c */
  __printf(2, 3)
  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 +int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
 +                          const struct i915_error_state_file_priv *error);
 +int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
 +                            size_t count, loff_t pos);
 +static inline void i915_error_state_buf_release(
 +      struct drm_i915_error_state_buf *eb)
 +{
 +      kfree(eb->buf);
 +}
 +void i915_capture_error_state(struct drm_device *dev);
 +void i915_error_state_get(struct drm_device *dev,
 +                        struct i915_error_state_file_priv *error_priv);
 +void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
 +void i915_destroy_error_state(struct drm_device *dev);
 +
 +void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 +const char *i915_cache_level_str(int type);
  
  /* i915_suspend.c */
  extern int i915_save_state(struct drm_device *dev);
@@@ -2221,6 -1993,7 +2222,6 @@@ int i915_reg_read_ioctl(struct drm_devi
                        struct drm_file *file);
  
  /* overlay */
 -#ifdef CONFIG_DEBUG_FS
  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
                                            struct intel_overlay_error_state *error);
@@@ -2229,6 -2002,7 +2230,6 @@@ extern struct intel_display_error_stat
  extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
                                            struct drm_device *dev,
                                            struct intel_display_error_state *error);
 -#endif
  
  /* On SNB platform, before reading ring registers forcewake bit
   * must be set to prevent GT core from power down and stale values being
   */
  void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
  void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
 -int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
  
  int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val);
  int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val);
@@@ -2254,37 -2029,39 +2255,37 @@@ void intel_sbi_write(struct drm_i915_pr
  int vlv_gpu_freq(int ddr_freq, int val);
  int vlv_freq_opcode(int ddr_freq, int val);
  
 -#define __i915_read(x, y) \
 -      u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
 -
 -__i915_read(8, b)
 -__i915_read(16, w)
 -__i915_read(32, l)
 -__i915_read(64, q)
 +#define __i915_read(x) \
 +      u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg, bool trace);
 +__i915_read(8)
 +__i915_read(16)
 +__i915_read(32)
 +__i915_read(64)
  #undef __i915_read
  
 -#define __i915_write(x, y) \
 -      void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val);
 -
 -__i915_write(8, b)
 -__i915_write(16, w)
 -__i915_write(32, l)
 -__i915_write(64, q)
 +#define __i915_write(x) \
 +      void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool trace);
 +__i915_write(8)
 +__i915_write(16)
 +__i915_write(32)
 +__i915_write(64)
  #undef __i915_write
  
 -#define I915_READ8(reg)               i915_read8(dev_priv, (reg))
 -#define I915_WRITE8(reg, val) i915_write8(dev_priv, (reg), (val))
 +#define I915_READ8(reg)               i915_read8(dev_priv, (reg), true)
 +#define I915_WRITE8(reg, val) i915_write8(dev_priv, (reg), (val), true)
  
 -#define I915_READ16(reg)      i915_read16(dev_priv, (reg))
 -#define I915_WRITE16(reg, val)        i915_write16(dev_priv, (reg), (val))
 -#define I915_READ16_NOTRACE(reg)      readw(dev_priv->regs + (reg))
 -#define I915_WRITE16_NOTRACE(reg, val)        writew(val, dev_priv->regs + (reg))
 +#define I915_READ16(reg)      i915_read16(dev_priv, (reg), true)
 +#define I915_WRITE16(reg, val)        i915_write16(dev_priv, (reg), (val), true)
 +#define I915_READ16_NOTRACE(reg)      i915_read16(dev_priv, (reg), false)
 +#define I915_WRITE16_NOTRACE(reg, val)        i915_write16(dev_priv, (reg), (val), false)
  
 -#define I915_READ(reg)                i915_read32(dev_priv, (reg))
 -#define I915_WRITE(reg, val)  i915_write32(dev_priv, (reg), (val))
 -#define I915_READ_NOTRACE(reg)                readl(dev_priv->regs + (reg))
 -#define I915_WRITE_NOTRACE(reg, val)  writel(val, dev_priv->regs + (reg))
 +#define I915_READ(reg)                i915_read32(dev_priv, (reg), true)
 +#define I915_WRITE(reg, val)  i915_write32(dev_priv, (reg), (val), true)
 +#define I915_READ_NOTRACE(reg)                i915_read32(dev_priv, (reg), false)
 +#define I915_WRITE_NOTRACE(reg, val)  i915_write32(dev_priv, (reg), (val), false)
  
 -#define I915_WRITE64(reg, val)        i915_write64(dev_priv, (reg), (val))
 -#define I915_READ64(reg)      i915_read64(dev_priv, (reg))
 +#define I915_WRITE64(reg, val)        i915_write64(dev_priv, (reg), (val), true)
 +#define I915_READ64(reg)      i915_read64(dev_priv, (reg), true)
  
  #define POSTING_READ(reg)     (void)I915_READ_NOTRACE(reg)
  #define POSTING_READ16(reg)   (void)I915_READ16_NOTRACE(reg)
index 56708c64e68f49c34725e8a702cce590e6afba40,342f1f3361689b03a0c080a671306ce89da20e61..b6a58f720f9a576fe7081b8225bb4e72a5db81d7
  #define   GC_LOW_FREQUENCY_ENABLE     (1 << 7)
  #define   GC_DISPLAY_CLOCK_190_200_MHZ        (0 << 4)
  #define   GC_DISPLAY_CLOCK_333_MHZ    (4 << 4)
 +#define   GC_DISPLAY_CLOCK_267_MHZ_PNV        (0 << 4)
 +#define   GC_DISPLAY_CLOCK_333_MHZ_PNV        (1 << 4)
 +#define   GC_DISPLAY_CLOCK_444_MHZ_PNV        (2 << 4)
 +#define   GC_DISPLAY_CLOCK_200_MHZ_PNV        (5 << 4)
 +#define   GC_DISPLAY_CLOCK_133_MHZ_PNV        (6 << 4)
 +#define   GC_DISPLAY_CLOCK_167_MHZ_PNV        (7 << 4)
  #define   GC_DISPLAY_CLOCK_MASK               (7 << 4)
  #define   GM45_GC_RENDER_CLOCK_MASK   (0xf << 0)
  #define   GM45_GC_RENDER_CLOCK_266_MHZ        (8 << 0)
  #define PUNIT_REG_GPU_LFM                     0xd3
  #define PUNIT_REG_GPU_FREQ_REQ                        0xd4
  #define PUNIT_REG_GPU_FREQ_STS                        0xd8
 +#define   GENFREQSTATUS                               (1<<0)
  #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ                0xdc
  
  #define PUNIT_FUSE_BUS2                               0xf6 /* bits 47:40 */
  #define   ERR_INT_FIFO_UNDERRUN_C     (1<<6)
  #define   ERR_INT_FIFO_UNDERRUN_B     (1<<3)
  #define   ERR_INT_FIFO_UNDERRUN_A     (1<<0)
 +#define   ERR_INT_FIFO_UNDERRUN(pipe) (1<<(pipe*3))
  
  #define FPGA_DBG              0x42300
  #define   FPGA_DBG_RM_NOCLAIM (1<<31)
                                        will not assert AGPBUSY# and will only
                                        be delivered when out of C3. */
  #define   INSTPM_FORCE_ORDERING                               (1<<7) /* GEN6+ */
+ #define   INSTPM_TLB_INVALIDATE       (1<<9)
+ #define   INSTPM_SYNC_FLUSH   (1<<5)
  #define ACTHD         0x020c8
  #define FW_BLC                0x020d8
  #define FW_BLC2               0x020dc
  #define _DPLL_B       (dev_priv->info->display_mmio_offset + 0x6018)
  #define DPLL(pipe) _PIPE(pipe, _DPLL_A, _DPLL_B)
  #define   DPLL_VCO_ENABLE             (1 << 31)
 -#define   DPLL_DVO_HIGH_SPEED         (1 << 30)
 +#define   DPLL_SDVO_HIGH_SPEED                (1 << 30)
 +#define   DPLL_DVO_2X_MODE            (1 << 30)
  #define   DPLL_EXT_BUFFER_ENABLE_VLV  (1 << 30)
  #define   DPLL_SYNCLOCK_ENABLE                (1 << 29)
  #define   DPLL_REFA_CLK_ENABLE_VLV    (1 << 29)
  #define   MCH_SSKPD_WM0_MASK          0x3f
  #define   MCH_SSKPD_WM0_VAL           0xc
  
 +#define MCH_SECP_NRG_STTS             (MCHBAR_MIRROR_BASE_SNB + 0x592c)
 +
  /* Clocking configuration register */
  #define CLKCFG                        0x10c00
  #define CLKCFG_FSB_400                                        (5 << 0)        /* hrawclk 100 */
   */
  #define CCID                  0x2180
  #define   CCID_EN             (1<<0)
 +/*
 + * Notes on SNB/IVB/VLV context size:
 + * - Power context is saved elsewhere (LLC or stolen)
 + * - Ring/execlist context is saved on SNB, not on IVB
 + * - Extended context size already includes render context size
 + * - We always need to follow the extended context size.
 + *   SNB BSpec has comments indicating that we should use the
 + *   render context size instead if execlists are disabled, but
 + *   based on empirical testing that's just nonsense.
 + * - Pipelined/VF state is saved on SNB/IVB respectively
 + * - GT1 size just indicates how much of render context
 + *   doesn't need saving on GT1
 + */
  #define CXT_SIZE              0x21a0
  #define GEN6_CXT_POWER_SIZE(cxt_reg)  ((cxt_reg >> 24) & 0x3f)
  #define GEN6_CXT_RING_SIZE(cxt_reg)   ((cxt_reg >> 18) & 0x3f)
  #define GEN6_CXT_RENDER_SIZE(cxt_reg) ((cxt_reg >> 12) & 0x3f)
  #define GEN6_CXT_EXTENDED_SIZE(cxt_reg)       ((cxt_reg >> 6) & 0x3f)
  #define GEN6_CXT_PIPELINE_SIZE(cxt_reg)       ((cxt_reg >> 0) & 0x3f)
 -#define GEN6_CXT_TOTAL_SIZE(cxt_reg)  (GEN6_CXT_POWER_SIZE(cxt_reg) + \
 -                                      GEN6_CXT_RING_SIZE(cxt_reg) + \
 -                                      GEN6_CXT_RENDER_SIZE(cxt_reg) + \
 +#define GEN6_CXT_TOTAL_SIZE(cxt_reg)  (GEN6_CXT_RING_SIZE(cxt_reg) + \
                                        GEN6_CXT_EXTENDED_SIZE(cxt_reg) + \
                                        GEN6_CXT_PIPELINE_SIZE(cxt_reg))
  #define GEN7_CXT_SIZE         0x21a8
  #define GEN7_CXT_EXTENDED_SIZE(ctx_reg)       ((ctx_reg >> 9) & 0x7f)
  #define GEN7_CXT_GT1_SIZE(ctx_reg)    ((ctx_reg >> 6) & 0x7)
  #define GEN7_CXT_VFSTATE_SIZE(ctx_reg)        ((ctx_reg >> 0) & 0x3f)
 -#define GEN7_CXT_TOTAL_SIZE(ctx_reg)  (GEN7_CXT_POWER_SIZE(ctx_reg) + \
 -                                       GEN7_CXT_RING_SIZE(ctx_reg) + \
 -                                       GEN7_CXT_RENDER_SIZE(ctx_reg) + \
 -                                       GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
 -                                       GEN7_CXT_GT1_SIZE(ctx_reg) + \
 +#define GEN7_CXT_TOTAL_SIZE(ctx_reg)  (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
                                         GEN7_CXT_VFSTATE_SIZE(ctx_reg))
  /* Haswell does have the CXT_SIZE register however it does not appear to be
   * valid. Now, docs explain in dwords what is in the context object. The full
  #define BCLRPAT(pipe) _PIPE(pipe, _BCLRPAT_A, _BCLRPAT_B)
  #define VSYNCSHIFT(trans) _TRANSCODER(trans, _VSYNCSHIFT_A, _VSYNCSHIFT_B)
  
 +/* HSW eDP PSR registers */
 +#define EDP_PSR_CTL                           0x64800
 +#define   EDP_PSR_ENABLE                      (1<<31)
 +#define   EDP_PSR_LINK_DISABLE                        (0<<27)
 +#define   EDP_PSR_LINK_STANDBY                        (1<<27)
 +#define   EDP_PSR_MIN_LINK_ENTRY_TIME_MASK    (3<<25)
 +#define   EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES (0<<25)
 +#define   EDP_PSR_MIN_LINK_ENTRY_TIME_4_LINES (1<<25)
 +#define   EDP_PSR_MIN_LINK_ENTRY_TIME_2_LINES (2<<25)
 +#define   EDP_PSR_MIN_LINK_ENTRY_TIME_0_LINES (3<<25)
 +#define   EDP_PSR_MAX_SLEEP_TIME_SHIFT                20
 +#define   EDP_PSR_SKIP_AUX_EXIT                       (1<<12)
 +#define   EDP_PSR_TP1_TP2_SEL                 (0<<11)
 +#define   EDP_PSR_TP1_TP3_SEL                 (1<<11)
 +#define   EDP_PSR_TP2_TP3_TIME_500us          (0<<8)
 +#define   EDP_PSR_TP2_TP3_TIME_100us          (1<<8)
 +#define   EDP_PSR_TP2_TP3_TIME_2500us         (2<<8)
 +#define   EDP_PSR_TP2_TP3_TIME_0us            (3<<8)
 +#define   EDP_PSR_TP1_TIME_500us              (0<<4)
 +#define   EDP_PSR_TP1_TIME_100us              (1<<4)
 +#define   EDP_PSR_TP1_TIME_2500us             (2<<4)
 +#define   EDP_PSR_TP1_TIME_0us                        (3<<4)
 +#define   EDP_PSR_IDLE_FRAME_SHIFT            0
 +
 +#define EDP_PSR_AUX_CTL                       0x64810
 +#define EDP_PSR_AUX_DATA1             0x64814
 +#define   EDP_PSR_DPCD_COMMAND                0x80060000
 +#define EDP_PSR_AUX_DATA2             0x64818
 +#define   EDP_PSR_DPCD_NORMAL_OPERATION       (1<<24)
 +#define EDP_PSR_AUX_DATA3             0x6481c
 +#define EDP_PSR_AUX_DATA4             0x64820
 +#define EDP_PSR_AUX_DATA5             0x64824
 +
 +#define EDP_PSR_STATUS_CTL                    0x64840
 +#define   EDP_PSR_STATUS_STATE_MASK           (7<<29)
 +#define   EDP_PSR_STATUS_STATE_IDLE           (0<<29)
 +#define   EDP_PSR_STATUS_STATE_SRDONACK               (1<<29)
 +#define   EDP_PSR_STATUS_STATE_SRDENT         (2<<29)
 +#define   EDP_PSR_STATUS_STATE_BUFOFF         (3<<29)
 +#define   EDP_PSR_STATUS_STATE_BUFON          (4<<29)
 +#define   EDP_PSR_STATUS_STATE_AUXACK         (5<<29)
 +#define   EDP_PSR_STATUS_STATE_SRDOFFACK      (6<<29)
 +#define   EDP_PSR_STATUS_LINK_MASK            (3<<26)
 +#define   EDP_PSR_STATUS_LINK_FULL_OFF                (0<<26)
 +#define   EDP_PSR_STATUS_LINK_FULL_ON         (1<<26)
 +#define   EDP_PSR_STATUS_LINK_STANDBY         (2<<26)
 +#define   EDP_PSR_STATUS_MAX_SLEEP_TIMER_SHIFT        20
 +#define   EDP_PSR_STATUS_MAX_SLEEP_TIMER_MASK 0x1f
 +#define   EDP_PSR_STATUS_COUNT_SHIFT          16
 +#define   EDP_PSR_STATUS_COUNT_MASK           0xf
 +#define   EDP_PSR_STATUS_AUX_ERROR            (1<<15)
 +#define   EDP_PSR_STATUS_AUX_SENDING          (1<<12)
 +#define   EDP_PSR_STATUS_SENDING_IDLE         (1<<9)
 +#define   EDP_PSR_STATUS_SENDING_TP2_TP3      (1<<8)
 +#define   EDP_PSR_STATUS_SENDING_TP1          (1<<4)
 +#define   EDP_PSR_STATUS_IDLE_MASK            0xf
 +
 +#define EDP_PSR_PERF_CNT              0x64844
 +#define   EDP_PSR_PERF_CNT_MASK               0xffffff
 +
 +#define EDP_PSR_DEBUG_CTL             0x64860
 +#define   EDP_PSR_DEBUG_MASK_LPSP     (1<<27)
 +#define   EDP_PSR_DEBUG_MASK_MEMUP    (1<<26)
 +#define   EDP_PSR_DEBUG_MASK_HPD      (1<<25)
 +
  /* VGA port control */
  #define ADPA                  0x61100
  #define PCH_ADPA                0xe1100
  #define CRT_HOTPLUG_DETECT_VOLTAGE_475MV      (1 << 2)
  
  #define PORT_HOTPLUG_STAT     (dev_priv->info->display_mmio_offset + 0x61114)
- /* HDMI/DP bits are gen4+ */
- #define   PORTB_HOTPLUG_LIVE_STATUS               (1 << 29)
+ /*
+  * HDMI/DP bits are gen4+
+  *
+  * WARNING: Bspec for hpd status bits on gen4 seems to be completely confused.
+  * Please check the detailed lore in the commit message for for experimental
+  * evidence.
+  */
+ #define   PORTD_HOTPLUG_LIVE_STATUS               (1 << 29)
  #define   PORTC_HOTPLUG_LIVE_STATUS               (1 << 28)
- #define   PORTD_HOTPLUG_LIVE_STATUS               (1 << 27)
+ #define   PORTB_HOTPLUG_LIVE_STATUS               (1 << 27)
  #define   PORTD_HOTPLUG_INT_STATUS            (3 << 21)
  #define   PORTC_HOTPLUG_INT_STATUS            (3 << 19)
  #define   PORTB_HOTPLUG_INT_STATUS            (3 << 17)
   * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte
   * of the infoframe structure specified by CEA-861. */
  #define   VIDEO_DIP_DATA_SIZE 32
 +#define   VIDEO_DIP_VSC_DATA_SIZE     36
  #define VIDEO_DIP_CTL         0x61170
  /* Pre HSW: */
  #define   VIDEO_DIP_ENABLE            (1 << 31)
  #define BLC_PWM_CPU_CTL2      0x48250
  #define BLC_PWM_CPU_CTL               0x48254
  
 +#define HSW_BLC_PWM2_CTL      0x48350
 +
  /* PCH CTL1 is totally different, all but the below bits are reserved. CTL2 is
   * like the normal CTL from gen4 and earlier. Hooray for confusing naming. */
  #define BLC_PWM_PCH_CTL1      0xc8250
  #define   BLM_PCH_POLARITY                    (1 << 29)
  #define BLC_PWM_PCH_CTL2      0xc8254
  
 +#define UTIL_PIN_CTL          0x48400
 +#define   UTIL_PIN_ENABLE     (1 << 31)
 +
 +#define PCH_GTC_CTL           0xe7000
 +#define   PCH_GTC_ENABLE      (1 << 31)
 +
  /* TV port control */
  #define TV_CTL                        0x68000
  /** Enables the TV encoder */
  #define  MLTR_WM2_SHIFT               8
  /* the unit of memory self-refresh latency time is 0.5us */
  #define  ILK_SRLT_MASK                0x3f
 -#define ILK_LATENCY(shift)    (I915_READ(MLTR_ILK) >> (shift) & ILK_SRLT_MASK)
 -#define ILK_READ_WM1_LATENCY()        ILK_LATENCY(MLTR_WM1_SHIFT)
 -#define ILK_READ_WM2_LATENCY()        ILK_LATENCY(MLTR_WM2_SHIFT)
  
  /* define the fifo size on Ironlake */
  #define ILK_DISPLAY_FIFO      128
  #define SSKPD_WM2_SHIFT               16
  #define SSKPD_WM3_SHIFT               24
  
 -#define SNB_LATENCY(shift)    (I915_READ(MCHBAR_MIRROR_BASE_SNB + SSKPD) >> (shift) & SSKPD_WM_MASK)
 -#define SNB_READ_WM0_LATENCY()                SNB_LATENCY(SSKPD_WM0_SHIFT)
 -#define SNB_READ_WM1_LATENCY()                SNB_LATENCY(SSKPD_WM1_SHIFT)
 -#define SNB_READ_WM2_LATENCY()                SNB_LATENCY(SSKPD_WM2_SHIFT)
 -#define SNB_READ_WM3_LATENCY()                SNB_LATENCY(SSKPD_WM3_SHIFT)
 -
  /*
   * The two pipe frame counter registers are not synchronized, so
   * reading a stable value is somewhat tricky. The following code
  #define DE_PLANEA_FLIP_DONE_IVB               (1<<3)
  #define DE_PIPEA_VBLANK_IVB           (1<<0)
  
 +#define DE_PIPE_VBLANK_ILK(pipe)      (1 << ((pipe * 8) + 7))
 +#define DE_PIPE_VBLANK_IVB(pipe)      (1 << (pipe * 5))
 +
  #define VLV_MASTER_IER                        0x4400c /* Gunit master IER */
  #define   MASTER_INTERRUPT_ENABLE     (1<<31)
  
  #define  SERR_INT_TRANS_C_FIFO_UNDERRUN       (1<<6)
  #define  SERR_INT_TRANS_B_FIFO_UNDERRUN       (1<<3)
  #define  SERR_INT_TRANS_A_FIFO_UNDERRUN       (1<<0)
 +#define  SERR_INT_TRANS_FIFO_UNDERRUN(pipe)   (1<<(pipe*3))
  
  /* digital port hotplug */
  #define PCH_PORT_HOTPLUG        0xc4030               /* SHOTPLUG_CTL */
         _TRANSCODER(trans, HSW_VIDEO_DIP_CTL_A, HSW_VIDEO_DIP_CTL_B)
  #define HSW_TVIDEO_DIP_AVI_DATA(trans) \
         _TRANSCODER(trans, HSW_VIDEO_DIP_AVI_DATA_A, HSW_VIDEO_DIP_AVI_DATA_B)
 +#define HSW_TVIDEO_DIP_VS_DATA(trans) \
 +       _TRANSCODER(trans, HSW_VIDEO_DIP_VS_DATA_A, HSW_VIDEO_DIP_VS_DATA_B)
  #define HSW_TVIDEO_DIP_SPD_DATA(trans) \
         _TRANSCODER(trans, HSW_VIDEO_DIP_SPD_DATA_A, HSW_VIDEO_DIP_SPD_DATA_B)
  #define HSW_TVIDEO_DIP_GCP(trans) \
  #define HSW_TVIDEO_DIP_VSC_DATA(trans) \
         _TRANSCODER(trans, HSW_VIDEO_DIP_VSC_DATA_A, HSW_VIDEO_DIP_VSC_DATA_B)
  
 +#define HSW_STEREO_3D_CTL_A   0x70020
 +#define   S3D_ENABLE          (1<<31)
 +#define HSW_STEREO_3D_CTL_B   0x71020
 +
 +#define HSW_STEREO_3D_CTL(trans) \
 +      _TRANSCODER(trans, HSW_STEREO_3D_CTL_A, HSW_STEREO_3D_CTL_A)
 +
  #define _PCH_TRANS_HTOTAL_B          0xe1000
  #define _PCH_TRANS_HBLANK_B          0xe1004
  #define _PCH_TRANS_HSYNC_B           0xe1008
  #define EDP_LINK_TRAIN_600MV_0DB_IVB          (0x30 <<22)
  #define EDP_LINK_TRAIN_600MV_3_5DB_IVB                (0x36 <<22)
  #define EDP_LINK_TRAIN_800MV_0DB_IVB          (0x38 <<22)
- #define EDP_LINK_TRAIN_800MV_3_5DB_IVB                (0x33 <<22)
+ #define EDP_LINK_TRAIN_800MV_3_5DB_IVB                (0x3e <<22)
  
  /* legacy values */
  #define EDP_LINK_TRAIN_500MV_0DB_IVB          (0x00 <<22)
  #define  GT_FIFO_FREE_ENTRIES                 0x120008
  #define    GT_FIFO_NUM_RESERVED_ENTRIES               20
  
 +#define  HSW_IDICR                            0x9008
 +#define    IDIHASHMSK(x)                      (((x) & 0x3f) << 16)
 +#define  HSW_EDRAM_PRESENT                    0x120010
 +
  #define GEN6_UCGCTL1                          0x9400
  # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE              (1 << 5)
  # define GEN6_CSUNIT_CLOCK_GATE_DISABLE                       (1 << 7)
  #define HSW_PWR_WELL_DRIVER                   0x45404 /* CTL2 */
  #define HSW_PWR_WELL_KVMR                     0x45408 /* CTL3 */
  #define HSW_PWR_WELL_DEBUG                    0x4540C /* CTL4 */
 -#define   HSW_PWR_WELL_ENABLE                 (1<<31)
 -#define   HSW_PWR_WELL_STATE                  (1<<30)
 +#define   HSW_PWR_WELL_ENABLE_REQUEST         (1<<31)
 +#define   HSW_PWR_WELL_STATE_ENABLED          (1<<30)
  #define HSW_PWR_WELL_CTL5                     0x45410
  #define   HSW_PWR_WELL_ENABLE_SINGLE_STEP     (1<<31)
  #define   HSW_PWR_WELL_PWR_GATE_OVERRIDE      (1<<20)
  #define  SBI_SSCAUXDIV6                               0x0610
  #define   SBI_SSCAUXDIV_FINALDIV2SEL(x)               ((x)<<4)
  #define  SBI_DBUFF0                           0x2a00
 -#define   SBI_DBUFF0_ENABLE                   (1<<0)
 +#define  SBI_GEN0                             0x1f00
 +#define   SBI_GEN0_CFG_BUFFENABLE_DISABLE     (1<<0)
  
  /* LPT PIXCLK_GATE */
  #define PIXCLK_GATE                   0xC6020
  #define  LCPLL_CLK_FREQ_450           (0<<26)
  #define  LCPLL_CD_CLOCK_DISABLE               (1<<25)
  #define  LCPLL_CD2X_CLOCK_DISABLE     (1<<23)
 +#define  LCPLL_POWER_DOWN_ALLOW               (1<<22)
  #define  LCPLL_CD_SOURCE_FCLK         (1<<21)
 +#define  LCPLL_CD_SOURCE_FCLK_DONE    (1<<19)
 +
 +#define D_COMP                                (MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
 +#define  D_COMP_RCOMP_IN_PROGRESS     (1<<9)
 +#define  D_COMP_COMP_FORCE            (1<<8)
 +#define  D_COMP_COMP_DISABLE          (1<<0)
  
  /* Pipe WM_LINETIME - watermark line time */
  #define PIPE_WM_LINETIME_A            0x45270
index 10c1db596387396f7ca56c2d836fcd5e1e5edc86,be79f477a38f9e48de386332e4062f09484a3453..38452d82ac7dc4d57c603151de925d8aecf63ae6
@@@ -45,15 -45,6 +45,15 @@@ bool intel_pipe_has_type(struct drm_crt
  static void intel_increase_pllclock(struct drm_crtc *crtc);
  static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on);
  
 +static void i9xx_crtc_clock_get(struct intel_crtc *crtc,
 +                              struct intel_crtc_config *pipe_config);
 +static void ironlake_crtc_clock_get(struct intel_crtc *crtc,
 +                                  struct intel_crtc_config *pipe_config);
 +
 +static int intel_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode,
 +                        int x, int y, struct drm_framebuffer *old_fb);
 +
 +
  typedef struct {
        int     min, max;
  } intel_range_t;
@@@ -63,6 -54,7 +63,6 @@@ typedef struct 
        int     p2_slow, p2_fast;
  } intel_p2_t;
  
 -#define INTEL_P2_NUM                2
  typedef struct intel_limit intel_limit_t;
  struct intel_limit {
        intel_range_t   dot, vco, n, m, m1, m2, p, p1;
@@@ -92,7 -84,7 +92,7 @@@ intel_fdi_link_freq(struct drm_device *
                return 27;
  }
  
 -static const intel_limit_t intel_limits_i8xx_dvo = {
 +static const intel_limit_t intel_limits_i8xx_dac = {
        .dot = { .min = 25000, .max = 350000 },
        .vco = { .min = 930000, .max = 1400000 },
        .n = { .min = 3, .max = 16 },
                .p2_slow = 4, .p2_fast = 2 },
  };
  
 +static const intel_limit_t intel_limits_i8xx_dvo = {
 +      .dot = { .min = 25000, .max = 350000 },
 +      .vco = { .min = 930000, .max = 1400000 },
 +      .n = { .min = 3, .max = 16 },
 +      .m = { .min = 96, .max = 140 },
 +      .m1 = { .min = 18, .max = 26 },
 +      .m2 = { .min = 6, .max = 16 },
 +      .p = { .min = 4, .max = 128 },
 +      .p1 = { .min = 2, .max = 33 },
 +      .p2 = { .dot_limit = 165000,
 +              .p2_slow = 4, .p2_fast = 4 },
 +};
 +
  static const intel_limit_t intel_limits_i8xx_lvds = {
        .dot = { .min = 25000, .max = 350000 },
        .vco = { .min = 930000, .max = 1400000 },
@@@ -426,10 -405,8 +426,10 @@@ static const intel_limit_t *intel_limit
        } else {
                if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS))
                        limit = &intel_limits_i8xx_lvds;
 -              else
 +              else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DVO))
                        limit = &intel_limits_i8xx_dvo;
 +              else
 +                      limit = &intel_limits_i8xx_dac;
        }
        return limit;
  }
@@@ -690,7 -667,7 +690,7 @@@ vlv_find_best_dpll(const intel_limit_t 
  {
        u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2;
        u32 m, n, fastclk;
 -      u32 updrate, minupdate, fracbits, p;
 +      u32 updrate, minupdate, p;
        unsigned long bestppm, ppm, absppm;
        int dotclk, flag;
  
        fastclk = dotclk / (2*100);
        updrate = 0;
        minupdate = 19200;
 -      fracbits = 1;
        n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0;
        bestm1 = bestm2 = bestp1 = bestp2 = 0;
  
@@@ -914,8 -892,8 +914,8 @@@ static const char *state_string(bool en
  }
  
  /* Only for pre-ILK configs */
 -static void assert_pll(struct drm_i915_private *dev_priv,
 -                     enum pipe pipe, bool state)
 +void assert_pll(struct drm_i915_private *dev_priv,
 +              enum pipe pipe, bool state)
  {
        int reg;
        u32 val;
             "PLL state assertion failure (expected %s, current %s)\n",
             state_string(state), state_string(cur_state));
  }
 -#define assert_pll_enabled(d, p) assert_pll(d, p, true)
 -#define assert_pll_disabled(d, p) assert_pll(d, p, false)
  
 -static struct intel_shared_dpll *
 +struct intel_shared_dpll *
  intel_crtc_to_shared_dpll(struct intel_crtc *crtc)
  {
        struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
  }
  
  /* For ILK+ */
 -static void assert_shared_dpll(struct drm_i915_private *dev_priv,
 -                             struct intel_shared_dpll *pll,
 -                             bool state)
 +void assert_shared_dpll(struct drm_i915_private *dev_priv,
 +                      struct intel_shared_dpll *pll,
 +                      bool state)
  {
        bool cur_state;
        struct intel_dpll_hw_state hw_state;
             "%s assertion failure (expected %s, current %s)\n",
             pll->name, state_string(state), state_string(cur_state));
  }
 -#define assert_shared_dpll_enabled(d, p) assert_shared_dpll(d, p, true)
 -#define assert_shared_dpll_disabled(d, p) assert_shared_dpll(d, p, false)
  
  static void assert_fdi_tx(struct drm_i915_private *dev_priv,
                          enum pipe pipe, bool state)
@@@ -1025,19 -1007,15 +1025,19 @@@ static void assert_fdi_tx_pll_enabled(s
        WARN(!(val & FDI_TX_PLL_ENABLE), "FDI TX PLL assertion failure, should be active but is disabled\n");
  }
  
 -static void assert_fdi_rx_pll_enabled(struct drm_i915_private *dev_priv,
 -                                    enum pipe pipe)
 +void assert_fdi_rx_pll(struct drm_i915_private *dev_priv,
 +                     enum pipe pipe, bool state)
  {
        int reg;
        u32 val;
 +      bool cur_state;
  
        reg = FDI_RX_CTL(pipe);
        val = I915_READ(reg);
 -      WARN(!(val & FDI_RX_PLL_ENABLE), "FDI RX PLL assertion failure, should be active but is disabled\n");
 +      cur_state = !!(val & FDI_RX_PLL_ENABLE);
 +      WARN(cur_state != state,
 +           "FDI RX PLL assertion failure (expected %s, current %s)\n",
 +           state_string(state), state_string(cur_state));
  }
  
  static void assert_panel_unlocked(struct drm_i915_private *dev_priv,
@@@ -1133,7 -1111,7 +1133,7 @@@ static void assert_planes_disabled(stru
        }
  
        /* Need to check both planes against the pipe */
 -      for (i = 0; i < INTEL_INFO(dev)->num_pipes; i++) {
 +      for_each_pipe(i) {
                reg = DSPCNTR(i);
                val = I915_READ(reg);
                cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >>
@@@ -1323,92 -1301,51 +1323,92 @@@ static void assert_pch_ports_disabled(s
        assert_pch_hdmi_disabled(dev_priv, pipe, PCH_HDMID);
  }
  
 -/**
 - * intel_enable_pll - enable a PLL
 - * @dev_priv: i915 private structure
 - * @pipe: pipe PLL to enable
 - *
 - * Enable @pipe's PLL so we can start pumping pixels from a plane.  Check to
 - * make sure the PLL reg is writable first though, since the panel write
 - * protect mechanism may be enabled.
 - *
 - * Note!  This is for pre-ILK only.
 - *
 - * Unfortunately needed by dvo_ns2501 since the dvo depends on it running.
 - */
 -static void intel_enable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 +static void vlv_enable_pll(struct intel_crtc *crtc)
  {
 -      int reg;
 -      u32 val;
 +      struct drm_device *dev = crtc->base.dev;
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +      int reg = DPLL(crtc->pipe);
 +      u32 dpll = crtc->config.dpll_hw_state.dpll;
  
 -      assert_pipe_disabled(dev_priv, pipe);
 +      assert_pipe_disabled(dev_priv, crtc->pipe);
  
        /* No really, not for ILK+ */
 -      BUG_ON(!IS_VALLEYVIEW(dev_priv->dev) && dev_priv->info->gen >= 5);
 +      BUG_ON(!IS_VALLEYVIEW(dev_priv->dev));
  
        /* PLL is protected by panel, make sure we can write it */
        if (IS_MOBILE(dev_priv->dev) && !IS_I830(dev_priv->dev))
 -              assert_panel_unlocked(dev_priv, pipe);
 +              assert_panel_unlocked(dev_priv, crtc->pipe);
  
 -      reg = DPLL(pipe);
 -      val = I915_READ(reg);
 -      val |= DPLL_VCO_ENABLE;
 +      I915_WRITE(reg, dpll);
 +      POSTING_READ(reg);
 +      udelay(150);
 +
 +      if (wait_for(((I915_READ(reg) & DPLL_LOCK_VLV) == DPLL_LOCK_VLV), 1))
 +              DRM_ERROR("DPLL %d failed to lock\n", crtc->pipe);
 +
 +      I915_WRITE(DPLL_MD(crtc->pipe), crtc->config.dpll_hw_state.dpll_md);
 +      POSTING_READ(DPLL_MD(crtc->pipe));
  
        /* We do this three times for luck */
 -      I915_WRITE(reg, val);
 +      I915_WRITE(reg, dpll);
        POSTING_READ(reg);
        udelay(150); /* wait for warmup */
 -      I915_WRITE(reg, val);
 +      I915_WRITE(reg, dpll);
        POSTING_READ(reg);
        udelay(150); /* wait for warmup */
 -      I915_WRITE(reg, val);
 +      I915_WRITE(reg, dpll);
 +      POSTING_READ(reg);
 +      udelay(150); /* wait for warmup */
 +}
 +
 +static void i9xx_enable_pll(struct intel_crtc *crtc)
 +{
 +      struct drm_device *dev = crtc->base.dev;
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +      int reg = DPLL(crtc->pipe);
 +      u32 dpll = crtc->config.dpll_hw_state.dpll;
 +
 +      assert_pipe_disabled(dev_priv, crtc->pipe);
 +
 +      /* No really, not for ILK+ */
 +      BUG_ON(dev_priv->info->gen >= 5);
 +
 +      /* PLL is protected by panel, make sure we can write it */
 +      if (IS_MOBILE(dev) && !IS_I830(dev))
 +              assert_panel_unlocked(dev_priv, crtc->pipe);
 +
 +      I915_WRITE(reg, dpll);
 +
 +      /* Wait for the clocks to stabilize. */
 +      POSTING_READ(reg);
 +      udelay(150);
 +
 +      if (INTEL_INFO(dev)->gen >= 4) {
 +              I915_WRITE(DPLL_MD(crtc->pipe),
 +                         crtc->config.dpll_hw_state.dpll_md);
 +      } else {
 +              /* The pixel multiplier can only be updated once the
 +               * DPLL is enabled and the clocks are stable.
 +               *
 +               * So write it again.
 +               */
 +              I915_WRITE(reg, dpll);
 +      }
 +
 +      /* We do this three times for luck */
 +      I915_WRITE(reg, dpll);
 +      POSTING_READ(reg);
 +      udelay(150); /* wait for warmup */
 +      I915_WRITE(reg, dpll);
 +      POSTING_READ(reg);
 +      udelay(150); /* wait for warmup */
 +      I915_WRITE(reg, dpll);
        POSTING_READ(reg);
        udelay(150); /* wait for warmup */
  }
  
  /**
 - * intel_disable_pll - disable a PLL
 + * i9xx_disable_pll - disable a PLL
   * @dev_priv: i915 private structure
   * @pipe: pipe PLL to disable
   *
   *
   * Note!  This is for pre-ILK only.
   */
 -static void intel_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 +static void i9xx_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
  {
 -      int reg;
 -      u32 val;
 -
        /* Don't disable pipe A or pipe A PLLs if needed */
        if (pipe == PIPE_A && (dev_priv->quirks & QUIRK_PIPEA_FORCE))
                return;
        /* Make sure the pipe isn't still relying on us */
        assert_pipe_disabled(dev_priv, pipe);
  
 -      reg = DPLL(pipe);
 -      val = I915_READ(reg);
 -      val &= ~DPLL_VCO_ENABLE;
 -      I915_WRITE(reg, val);
 -      POSTING_READ(reg);
 +      I915_WRITE(DPLL(pipe), 0);
 +      POSTING_READ(DPLL(pipe));
  }
  
  void vlv_wait_port_ready(struct drm_i915_private *dev_priv, int port)
@@@ -1876,7 -1819,7 +1876,7 @@@ intel_pin_and_fence_fb_obj(struct drm_d
        return 0;
  
  err_unpin:
 -      i915_gem_object_unpin(obj);
 +      i915_gem_object_unpin_from_display_plane(obj);
  err_interruptible:
        dev_priv->mm.interruptible = true;
        return ret;
  void intel_unpin_fb_obj(struct drm_i915_gem_object *obj)
  {
        i915_gem_object_unpin_fence(obj);
 -      i915_gem_object_unpin(obj);
 +      i915_gem_object_unpin_from_display_plane(obj);
  }
  
  /* Computes the linear offset to the base tile and adjusts x, y. bytes per pixel
@@@ -1999,17 -1942,16 +1999,17 @@@ static int i9xx_update_plane(struct drm
                intel_crtc->dspaddr_offset = linear_offset;
        }
  
 -      DRM_DEBUG_KMS("Writing base %08X %08lX %d %d %d\n",
 -                    obj->gtt_offset, linear_offset, x, y, fb->pitches[0]);
 +      DRM_DEBUG_KMS("Writing base %08lX %08lX %d %d %d\n",
 +                    i915_gem_obj_ggtt_offset(obj), linear_offset, x, y,
 +                    fb->pitches[0]);
        I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
        if (INTEL_INFO(dev)->gen >= 4) {
                I915_MODIFY_DISPBASE(DSPSURF(plane),
 -                                   obj->gtt_offset + intel_crtc->dspaddr_offset);
 +                                   i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
                I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
                I915_WRITE(DSPLINOFF(plane), linear_offset);
        } else
 -              I915_WRITE(DSPADDR(plane), obj->gtt_offset + linear_offset);
 +              I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset);
        POSTING_READ(reg);
  
        return 0;
@@@ -2089,12 -2031,11 +2089,12 @@@ static int ironlake_update_plane(struc
                                               fb->pitches[0]);
        linear_offset -= intel_crtc->dspaddr_offset;
  
 -      DRM_DEBUG_KMS("Writing base %08X %08lX %d %d %d\n",
 -                    obj->gtt_offset, linear_offset, x, y, fb->pitches[0]);
 +      DRM_DEBUG_KMS("Writing base %08lX %08lX %d %d %d\n",
 +                    i915_gem_obj_ggtt_offset(obj), linear_offset, x, y,
 +                    fb->pitches[0]);
        I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
        I915_MODIFY_DISPBASE(DSPSURF(plane),
 -                           obj->gtt_offset + intel_crtc->dspaddr_offset);
 +                           i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
        if (IS_HASWELL(dev)) {
                I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
        } else {
@@@ -2242,20 -2183,6 +2242,20 @@@ intel_pipe_set_base(struct drm_crtc *cr
                return ret;
        }
  
 +      /* Update pipe size and adjust fitter if needed */
 +      if (i915_fastboot) {
 +              I915_WRITE(PIPESRC(intel_crtc->pipe),
 +                         ((crtc->mode.hdisplay - 1) << 16) |
 +                         (crtc->mode.vdisplay - 1));
 +              if (!intel_crtc->config.pch_pfit.size &&
 +                  (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) ||
 +                   intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))) {
 +                      I915_WRITE(PF_CTL(intel_crtc->pipe), 0);
 +                      I915_WRITE(PF_WIN_POS(intel_crtc->pipe), 0);
 +                      I915_WRITE(PF_WIN_SZ(intel_crtc->pipe), 0);
 +              }
 +      }
 +
        ret = dev_priv->display.update_plane(crtc, fb, x, y);
        if (ret) {
                intel_unpin_fb_obj(to_intel_framebuffer(fb)->obj);
        }
  
        intel_update_fbc(dev);
 +      intel_edp_psr_update(dev);
        mutex_unlock(&dev->struct_mutex);
  
        intel_crtc_update_sarea_pos(crtc, x, y);
@@@ -2597,7 -2523,7 +2597,7 @@@ static void ivb_manual_fdi_link_train(s
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        int pipe = intel_crtc->pipe;
 -      u32 reg, temp, i;
 +      u32 reg, temp, i, j;
  
        /* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit
           for train result */
        DRM_DEBUG_KMS("FDI_RX_IIR before link train 0x%x\n",
                      I915_READ(FDI_RX_IIR(pipe)));
  
 -      /* enable CPU FDI TX and PCH FDI RX */
 -      reg = FDI_TX_CTL(pipe);
 -      temp = I915_READ(reg);
 -      temp &= ~FDI_DP_PORT_WIDTH_MASK;
 -      temp |= FDI_DP_PORT_WIDTH(intel_crtc->config.fdi_lanes);
 -      temp &= ~(FDI_LINK_TRAIN_AUTO | FDI_LINK_TRAIN_NONE_IVB);
 -      temp |= FDI_LINK_TRAIN_PATTERN_1_IVB;
 -      temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
 -      temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B;
 -      temp |= FDI_COMPOSITE_SYNC;
 -      I915_WRITE(reg, temp | FDI_TX_ENABLE);
 -
 -      I915_WRITE(FDI_RX_MISC(pipe),
 -                 FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90);
 -
 -      reg = FDI_RX_CTL(pipe);
 -      temp = I915_READ(reg);
 -      temp &= ~FDI_LINK_TRAIN_AUTO;
 -      temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
 -      temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
 -      temp |= FDI_COMPOSITE_SYNC;
 -      I915_WRITE(reg, temp | FDI_RX_ENABLE);
 +      /* Try each vswing and preemphasis setting twice before moving on */
 +      for (j = 0; j < ARRAY_SIZE(snb_b_fdi_train_param) * 2; j++) {
 +              /* disable first in case we need to retry */
 +              reg = FDI_TX_CTL(pipe);
 +              temp = I915_READ(reg);
 +              temp &= ~(FDI_LINK_TRAIN_AUTO | FDI_LINK_TRAIN_NONE_IVB);
 +              temp &= ~FDI_TX_ENABLE;
 +              I915_WRITE(reg, temp);
  
 -      POSTING_READ(reg);
 -      udelay(150);
 +              reg = FDI_RX_CTL(pipe);
 +              temp = I915_READ(reg);
 +              temp &= ~FDI_LINK_TRAIN_AUTO;
 +              temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
 +              temp &= ~FDI_RX_ENABLE;
 +              I915_WRITE(reg, temp);
  
 -      for (i = 0; i < 4; i++) {
 +              /* enable CPU FDI TX and PCH FDI RX */
                reg = FDI_TX_CTL(pipe);
                temp = I915_READ(reg);
 +              temp &= ~FDI_DP_PORT_WIDTH_MASK;
 +              temp |= FDI_DP_PORT_WIDTH(intel_crtc->config.fdi_lanes);
 +              temp |= FDI_LINK_TRAIN_PATTERN_1_IVB;
                temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
 -              temp |= snb_b_fdi_train_param[i];
 -              I915_WRITE(reg, temp);
 +              temp |= snb_b_fdi_train_param[j/2];
 +              temp |= FDI_COMPOSITE_SYNC;
 +              I915_WRITE(reg, temp | FDI_TX_ENABLE);
  
 -              POSTING_READ(reg);
 -              udelay(500);
 +              I915_WRITE(FDI_RX_MISC(pipe),
 +                         FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90);
  
 -              reg = FDI_RX_IIR(pipe);
 +              reg = FDI_RX_CTL(pipe);
                temp = I915_READ(reg);
 -              DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
 -
 -              if (temp & FDI_RX_BIT_LOCK ||
 -                  (I915_READ(reg) & FDI_RX_BIT_LOCK)) {
 -                      I915_WRITE(reg, temp | FDI_RX_BIT_LOCK);
 -                      DRM_DEBUG_KMS("FDI train 1 done, level %i.\n", i);
 -                      break;
 -              }
 -      }
 -      if (i == 4)
 -              DRM_ERROR("FDI train 1 fail!\n");
 +              temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
 +              temp |= FDI_COMPOSITE_SYNC;
 +              I915_WRITE(reg, temp | FDI_RX_ENABLE);
  
 -      /* Train 2 */
 -      reg = FDI_TX_CTL(pipe);
 -      temp = I915_READ(reg);
 -      temp &= ~FDI_LINK_TRAIN_NONE_IVB;
 -      temp |= FDI_LINK_TRAIN_PATTERN_2_IVB;
 -      temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
 -      temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B;
 -      I915_WRITE(reg, temp);
 +              POSTING_READ(reg);
 +              udelay(1); /* should be 0.5us */
  
 -      reg = FDI_RX_CTL(pipe);
 -      temp = I915_READ(reg);
 -      temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
 -      temp |= FDI_LINK_TRAIN_PATTERN_2_CPT;
 -      I915_WRITE(reg, temp);
 +              for (i = 0; i < 4; i++) {
 +                      reg = FDI_RX_IIR(pipe);
 +                      temp = I915_READ(reg);
 +                      DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
  
 -      POSTING_READ(reg);
 -      udelay(150);
 +                      if (temp & FDI_RX_BIT_LOCK ||
 +                          (I915_READ(reg) & FDI_RX_BIT_LOCK)) {
 +                              I915_WRITE(reg, temp | FDI_RX_BIT_LOCK);
 +                              DRM_DEBUG_KMS("FDI train 1 done, level %i.\n",
 +                                            i);
 +                              break;
 +                      }
 +                      udelay(1); /* should be 0.5us */
 +              }
 +              if (i == 4) {
 +                      DRM_DEBUG_KMS("FDI train 1 fail on vswing %d\n", j / 2);
 +                      continue;
 +              }
  
 -      for (i = 0; i < 4; i++) {
 +              /* Train 2 */
                reg = FDI_TX_CTL(pipe);
                temp = I915_READ(reg);
 -              temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
 -              temp |= snb_b_fdi_train_param[i];
 +              temp &= ~FDI_LINK_TRAIN_NONE_IVB;
 +              temp |= FDI_LINK_TRAIN_PATTERN_2_IVB;
 +              I915_WRITE(reg, temp);
 +
 +              reg = FDI_RX_CTL(pipe);
 +              temp = I915_READ(reg);
 +              temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
 +              temp |= FDI_LINK_TRAIN_PATTERN_2_CPT;
                I915_WRITE(reg, temp);
  
                POSTING_READ(reg);
 -              udelay(500);
 +              udelay(2); /* should be 1.5us */
  
 -              reg = FDI_RX_IIR(pipe);
 -              temp = I915_READ(reg);
 -              DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
 +              for (i = 0; i < 4; i++) {
 +                      reg = FDI_RX_IIR(pipe);
 +                      temp = I915_READ(reg);
 +                      DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
  
 -              if (temp & FDI_RX_SYMBOL_LOCK) {
 -                      I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK);
 -                      DRM_DEBUG_KMS("FDI train 2 done, level %i.\n", i);
 -                      break;
 +                      if (temp & FDI_RX_SYMBOL_LOCK ||
 +                          (I915_READ(reg) & FDI_RX_SYMBOL_LOCK)) {
 +                              I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK);
 +                              DRM_DEBUG_KMS("FDI train 2 done, level %i.\n",
 +                                            i);
 +                              goto train_done;
 +                      }
 +                      udelay(2); /* should be 1.5us */
                }
 +              if (i == 4)
 +                      DRM_DEBUG_KMS("FDI train 2 fail on vswing %d\n", j / 2);
        }
 -      if (i == 4)
 -              DRM_ERROR("FDI train 2 fail!\n");
  
 +train_done:
        DRM_DEBUG_KMS("FDI train done.\n");
  }
  
@@@ -3003,8 -2927,15 +3003,8 @@@ static void ironlake_pch_enable(struct 
        /* For PCH output, training FDI link */
        dev_priv->display.fdi_link_train(crtc);
  
 -      /* XXX: pch pll's can be enabled any time before we enable the PCH
 -       * transcoder, and we actually should do this to not upset any PCH
 -       * transcoder that already use the clock when we share it.
 -       *
 -       * Note that enable_shared_dpll tries to do the right thing, but
 -       * get_shared_dpll unconditionally resets the pll - we need that to have
 -       * the right LVDS enable sequence. */
 -      ironlake_enable_shared_dpll(intel_crtc);
 -
 +      /* We need to program the right clock selection before writing the pixel
 +       * mutliplier into the DPLL. */
        if (HAS_PCH_CPT(dev)) {
                u32 sel;
  
                I915_WRITE(PCH_DPLL_SEL, temp);
        }
  
 +      /* XXX: pch pll's can be enabled any time before we enable the PCH
 +       * transcoder, and we actually should do this to not upset any PCH
 +       * transcoder that already use the clock when we share it.
 +       *
 +       * Note that enable_shared_dpll tries to do the right thing, but
 +       * get_shared_dpll unconditionally resets the pll - we need that to have
 +       * the right LVDS enable sequence. */
 +      ironlake_enable_shared_dpll(intel_crtc);
 +
        /* set transcoder timing, panel must allow it */
        assert_panel_unlocked(dev_priv, pipe);
        ironlake_pch_transcoder_set_timings(intel_crtc, pipe);
@@@ -3109,7 -3031,7 +3109,7 @@@ static void intel_put_shared_dpll(struc
        crtc->config.shared_dpll = DPLL_ID_PRIVATE;
  }
  
 -static struct intel_shared_dpll *intel_get_shared_dpll(struct intel_crtc *crtc, u32 dpll, u32 fp)
 +static struct intel_shared_dpll *intel_get_shared_dpll(struct intel_crtc *crtc)
  {
        struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
        struct intel_shared_dpll *pll = intel_crtc_to_shared_dpll(crtc);
  
        if (HAS_PCH_IBX(dev_priv->dev)) {
                /* Ironlake PCH has a fixed PLL->PCH pipe mapping. */
 -              i = crtc->pipe;
 +              i = (enum intel_dpll_id) crtc->pipe;
                pll = &dev_priv->shared_dplls[i];
  
                DRM_DEBUG_KMS("CRTC:%d using pre-allocated %s\n",
                if (pll->refcount == 0)
                        continue;
  
 -              if (dpll == (I915_READ(PCH_DPLL(pll->id)) & 0x7fffffff) &&
 -                  fp == I915_READ(PCH_FP0(pll->id))) {
 +              if (memcmp(&crtc->config.dpll_hw_state, &pll->hw_state,
 +                         sizeof(pll->hw_state)) == 0) {
                        DRM_DEBUG_KMS("CRTC:%d sharing existing %s (refcount %d, ative %d)\n",
                                      crtc->base.base.id,
                                      pll->name, pll->refcount, pll->active);
@@@ -3174,7 -3096,13 +3174,7 @@@ found
                WARN_ON(pll->on);
                assert_shared_dpll_disabled(dev_priv, pll);
  
 -              /* Wait for the clocks to stabilize before rewriting the regs */
 -              I915_WRITE(PCH_DPLL(pll->id), dpll & ~DPLL_VCO_ENABLE);
 -              POSTING_READ(PCH_DPLL(pll->id));
 -              udelay(150);
 -
 -              I915_WRITE(PCH_FP0(pll->id), fp);
 -              I915_WRITE(PCH_DPLL(pll->id), dpll & ~DPLL_VCO_ENABLE);
 +              pll->mode_set(dev_priv, pll);
        }
        pll->refcount++;
  
@@@ -3246,6 -3174,7 +3246,6 @@@ static void ironlake_crtc_enable(struc
        struct intel_encoder *encoder;
        int pipe = intel_crtc->pipe;
        int plane = intel_crtc->plane;
 -      u32 temp;
  
        WARN_ON(!crtc->enabled);
  
  
        intel_update_watermarks(dev);
  
 -      if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
 -              temp = I915_READ(PCH_LVDS);
 -              if ((temp & LVDS_PORT_EN) == 0)
 -                      I915_WRITE(PCH_LVDS, temp | LVDS_PORT_EN);
 -      }
 -
 +      for_each_encoder_on_crtc(dev, crtc, encoder)
 +              if (encoder->pre_enable)
 +                      encoder->pre_enable(encoder);
  
        if (intel_crtc->config.has_pch_encoder) {
                /* Note: FDI PLL enabling _must_ be done before we enable the
                assert_fdi_rx_disabled(dev_priv, pipe);
        }
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->pre_enable)
 -                      encoder->pre_enable(encoder);
 -
        ironlake_pfit_enable(intel_crtc);
  
        /*
@@@ -3453,7 -3389,7 +3453,7 @@@ static void ironlake_crtc_disable(struc
        intel_crtc_wait_for_pending_flips(crtc);
        drm_vblank_off(dev, pipe);
  
 -      if (dev_priv->cfb_plane == plane)
 +      if (dev_priv->fbc.plane == plane)
                intel_disable_fbc(dev);
  
        intel_crtc_update_cursor(crtc, false);
@@@ -3526,7 -3462,7 +3526,7 @@@ static void haswell_crtc_disable(struc
        drm_vblank_off(dev, pipe);
  
        /* FBC must be disabled before disabling the plane on HSW. */
 -      if (dev_priv->cfb_plane == plane)
 +      if (dev_priv->fbc.plane == plane)
                intel_disable_fbc(dev);
  
        hsw_disable_ips(intel_crtc);
@@@ -3657,16 -3593,22 +3657,16 @@@ static void valleyview_crtc_enable(stru
        intel_crtc->active = true;
        intel_update_watermarks(dev);
  
 -      mutex_lock(&dev_priv->dpio_lock);
 -
        for_each_encoder_on_crtc(dev, crtc, encoder)
                if (encoder->pre_pll_enable)
                        encoder->pre_pll_enable(encoder);
  
 -      intel_enable_pll(dev_priv, pipe);
 +      vlv_enable_pll(intel_crtc);
  
        for_each_encoder_on_crtc(dev, crtc, encoder)
                if (encoder->pre_enable)
                        encoder->pre_enable(encoder);
  
 -      /* VLV wants encoder enabling _before_ the pipe is up. */
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              encoder->enable(encoder);
 -
        i9xx_pfit_enable(intel_crtc);
  
        intel_crtc_load_lut(crtc);
  
        intel_update_fbc(dev);
  
 -      mutex_unlock(&dev_priv->dpio_lock);
 +      for_each_encoder_on_crtc(dev, crtc, encoder)
 +              encoder->enable(encoder);
  }
  
  static void i9xx_crtc_enable(struct drm_crtc *crtc)
        intel_crtc->active = true;
        intel_update_watermarks(dev);
  
 -      intel_enable_pll(dev_priv, pipe);
 -
        for_each_encoder_on_crtc(dev, crtc, encoder)
                if (encoder->pre_enable)
                        encoder->pre_enable(encoder);
  
 +      i9xx_enable_pll(intel_crtc);
 +
        i9xx_pfit_enable(intel_crtc);
  
        intel_crtc_load_lut(crtc);
@@@ -3760,7 -3701,7 +3760,7 @@@ static void i9xx_crtc_disable(struct dr
        intel_crtc_wait_for_pending_flips(crtc);
        drm_vblank_off(dev, pipe);
  
 -      if (dev_priv->cfb_plane == plane)
 +      if (dev_priv->fbc.plane == plane)
                intel_disable_fbc(dev);
  
        intel_crtc_dpms_overlay(intel_crtc, false);
                if (encoder->post_disable)
                        encoder->post_disable(encoder);
  
 -      intel_disable_pll(dev_priv, pipe);
 +      i9xx_disable_pll(dev_priv, pipe);
  
        intel_crtc->active = false;
        intel_update_fbc(dev);
@@@ -3876,6 -3817,16 +3876,6 @@@ static void intel_crtc_disable(struct d
        }
  }
  
 -void intel_modeset_disable(struct drm_device *dev)
 -{
 -      struct drm_crtc *crtc;
 -
 -      list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 -              if (crtc->enabled)
 -                      intel_crtc_disable(crtc);
 -      }
 -}
 -
  void intel_encoder_destroy(struct drm_encoder *encoder)
  {
        struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
        kfree(intel_encoder);
  }
  
 -/* Simple dpms helper for encodres with just one connector, no cloning and only
 +/* Simple dpms helper for encoders with just one connector, no cloning and only
   * one kind of off state. It clamps all !ON modes to fully OFF and changes the
   * state of the entire output pipe. */
 -void intel_encoder_dpms(struct intel_encoder *encoder, int mode)
 +static void intel_encoder_dpms(struct intel_encoder *encoder, int mode)
  {
        if (mode == DRM_MODE_DPMS_ON) {
                encoder->connectors_active = true;
@@@ -4081,7 -4032,7 +4081,7 @@@ static void hsw_compute_ips_config(stru
  {
        pipe_config->ips_enabled = i915_enable_ips &&
                                   hsw_crtc_supports_ips(crtc) &&
 -                                 pipe_config->pipe_bpp == 24;
 +                                 pipe_config->pipe_bpp <= 24;
  }
  
  static int intel_crtc_compute_config(struct intel_crtc *crtc,
                        return -EINVAL;
        }
  
 -      /* All interlaced capable intel hw wants timings in frames. Note though
 -       * that intel_lvds_mode_fixup does some funny tricks with the crtc
 -       * timings, so we need to be careful not to clobber these.*/
 -      if (!pipe_config->timings_set)
 -              drm_mode_set_crtcinfo(adjusted_mode, 0);
 -
        /* Cantiga+ cannot handle modes with a hsync front porch of 0.
         * WaPruneModeWithIncorrectHsyncOffset:ctg,elk,ilk,snb,ivb,vlv,hsw.
         */
@@@ -4146,30 -4103,6 +4146,30 @@@ static int i9xx_misc_get_display_clock_
        return 200000;
  }
  
 +static int pnv_get_display_clock_speed(struct drm_device *dev)
 +{
 +      u16 gcfgc = 0;
 +
 +      pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
 +
 +      switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
 +      case GC_DISPLAY_CLOCK_267_MHZ_PNV:
 +              return 267000;
 +      case GC_DISPLAY_CLOCK_333_MHZ_PNV:
 +              return 333000;
 +      case GC_DISPLAY_CLOCK_444_MHZ_PNV:
 +              return 444000;
 +      case GC_DISPLAY_CLOCK_200_MHZ_PNV:
 +              return 200000;
 +      default:
 +              DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc);
 +      case GC_DISPLAY_CLOCK_133_MHZ_PNV:
 +              return 133000;
 +      case GC_DISPLAY_CLOCK_167_MHZ_PNV:
 +              return 167000;
 +      }
 +}
 +
  static int i915gm_get_display_clock_speed(struct drm_device *dev)
  {
        u16 gcfgc = 0;
@@@ -4333,17 -4266,14 +4333,17 @@@ static void i9xx_update_pll_dividers(st
        }
  
        I915_WRITE(FP0(pipe), fp);
 +      crtc->config.dpll_hw_state.fp0 = fp;
  
        crtc->lowfreq_avail = false;
        if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_LVDS) &&
            reduced_clock && i915_powersave) {
                I915_WRITE(FP1(pipe), fp2);
 +              crtc->config.dpll_hw_state.fp1 = fp2;
                crtc->lowfreq_avail = true;
        } else {
                I915_WRITE(FP1(pipe), fp);
 +              crtc->config.dpll_hw_state.fp1 = fp;
        }
  }
  
@@@ -4421,13 -4351,17 +4421,13 @@@ static void vlv_update_pll(struct intel
  {
        struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 -      struct intel_encoder *encoder;
        int pipe = crtc->pipe;
        u32 dpll, mdiv;
        u32 bestn, bestm1, bestm2, bestp1, bestp2;
 -      bool is_hdmi;
        u32 coreclk, reg_val, dpll_md;
  
        mutex_lock(&dev_priv->dpio_lock);
  
 -      is_hdmi = intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_HDMI);
 -
        bestn = crtc->config.dpll.n;
        bestm1 = crtc->config.dpll.m1;
        bestm2 = crtc->config.dpll.m2;
            intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_ANALOG) ||
            intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_HDMI))
                vlv_dpio_write(dev_priv, DPIO_LPF_COEFF(pipe),
 -                               0x005f0021);
 +                               0x009f0003);
        else
                vlv_dpio_write(dev_priv, DPIO_LPF_COEFF(pipe),
                                 0x00d0000f);
  
        vlv_dpio_write(dev_priv, DPIO_PLL_CML(pipe), 0x87871000);
  
 -      for_each_encoder_on_crtc(dev, &crtc->base, encoder)
 -              if (encoder->pre_pll_enable)
 -                      encoder->pre_pll_enable(encoder);
 -
        /* Enable DPIO clock input */
        dpll = DPLL_EXT_BUFFER_ENABLE_VLV | DPLL_REFA_CLK_ENABLE_VLV |
                DPLL_VGA_MODE_DIS | DPLL_INTEGRATED_CLOCK_VLV;
                dpll |= DPLL_INTEGRATED_CRI_CLK_VLV;
  
        dpll |= DPLL_VCO_ENABLE;
 -      I915_WRITE(DPLL(pipe), dpll);
 -      POSTING_READ(DPLL(pipe));
 -      udelay(150);
 -
 -      if (wait_for(((I915_READ(DPLL(pipe)) & DPLL_LOCK_VLV) == DPLL_LOCK_VLV), 1))
 -              DRM_ERROR("DPLL %d failed to lock\n", pipe);
 +      crtc->config.dpll_hw_state.dpll = dpll;
  
        dpll_md = (crtc->config.pixel_multiplier - 1)
                << DPLL_MD_UDI_MULTIPLIER_SHIFT;
 -      I915_WRITE(DPLL_MD(pipe), dpll_md);
 -      POSTING_READ(DPLL_MD(pipe));
 +      crtc->config.dpll_hw_state.dpll_md = dpll_md;
  
        if (crtc->config.has_dp_encoder)
                intel_dp_set_m_n(crtc);
@@@ -4531,6 -4475,8 +4531,6 @@@ static void i9xx_update_pll(struct inte
  {
        struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 -      struct intel_encoder *encoder;
 -      int pipe = crtc->pipe;
        u32 dpll;
        bool is_sdvo;
        struct dpll *clock = &crtc->config.dpll;
        }
  
        if (is_sdvo)
 -              dpll |= DPLL_DVO_HIGH_SPEED;
 +              dpll |= DPLL_SDVO_HIGH_SPEED;
  
        if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_DISPLAYPORT))
 -              dpll |= DPLL_DVO_HIGH_SPEED;
 +              dpll |= DPLL_SDVO_HIGH_SPEED;
  
        /* compute bitmask from p1 value */
        if (IS_PINEVIEW(dev))
                dpll |= PLL_REF_INPUT_DREFCLK;
  
        dpll |= DPLL_VCO_ENABLE;
 -      I915_WRITE(DPLL(pipe), dpll & ~DPLL_VCO_ENABLE);
 -      POSTING_READ(DPLL(pipe));
 -      udelay(150);
 -
 -      for_each_encoder_on_crtc(dev, &crtc->base, encoder)
 -              if (encoder->pre_pll_enable)
 -                      encoder->pre_pll_enable(encoder);
 -
 -      if (crtc->config.has_dp_encoder)
 -              intel_dp_set_m_n(crtc);
 -
 -      I915_WRITE(DPLL(pipe), dpll);
 -
 -      /* Wait for the clocks to stabilize. */
 -      POSTING_READ(DPLL(pipe));
 -      udelay(150);
 +      crtc->config.dpll_hw_state.dpll = dpll;
  
        if (INTEL_INFO(dev)->gen >= 4) {
                u32 dpll_md = (crtc->config.pixel_multiplier - 1)
                        << DPLL_MD_UDI_MULTIPLIER_SHIFT;
 -              I915_WRITE(DPLL_MD(pipe), dpll_md);
 -      } else {
 -              /* The pixel multiplier can only be updated once the
 -               * DPLL is enabled and the clocks are stable.
 -               *
 -               * So write it again.
 -               */
 -              I915_WRITE(DPLL(pipe), dpll);
 +              crtc->config.dpll_hw_state.dpll_md = dpll_md;
        }
 +
 +      if (crtc->config.has_dp_encoder)
 +              intel_dp_set_m_n(crtc);
  }
  
  static void i8xx_update_pll(struct intel_crtc *crtc,
  {
        struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 -      struct intel_encoder *encoder;
 -      int pipe = crtc->pipe;
        u32 dpll;
        struct dpll *clock = &crtc->config.dpll;
  
                        dpll |= PLL_P2_DIVIDE_BY_4;
        }
  
 +      if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_DVO))
 +              dpll |= DPLL_DVO_2X_MODE;
 +
        if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_LVDS) &&
                 intel_panel_use_ssc(dev_priv) && num_connectors < 2)
                dpll |= PLLB_REF_INPUT_SPREADSPECTRUMIN;
                dpll |= PLL_REF_INPUT_DREFCLK;
  
        dpll |= DPLL_VCO_ENABLE;
 -      I915_WRITE(DPLL(pipe), dpll & ~DPLL_VCO_ENABLE);
 -      POSTING_READ(DPLL(pipe));
 -      udelay(150);
 -
 -      for_each_encoder_on_crtc(dev, &crtc->base, encoder)
 -              if (encoder->pre_pll_enable)
 -                      encoder->pre_pll_enable(encoder);
 -
 -      I915_WRITE(DPLL(pipe), dpll);
 -
 -      /* Wait for the clocks to stabilize. */
 -      POSTING_READ(DPLL(pipe));
 -      udelay(150);
 -
 -      /* The pixel multiplier can only be updated once the
 -       * DPLL is enabled and the clocks are stable.
 -       *
 -       * So write it again.
 -       */
 -      I915_WRITE(DPLL(pipe), dpll);
 +      crtc->config.dpll_hw_state.dpll = dpll;
  }
  
  static void intel_set_pipe_timings(struct intel_crtc *intel_crtc)
@@@ -4744,27 -4727,6 +4744,27 @@@ static void intel_get_pipe_timings(stru
        pipe_config->requested_mode.hdisplay = ((tmp >> 16) & 0xffff) + 1;
  }
  
 +static void intel_crtc_mode_from_pipe_config(struct intel_crtc *intel_crtc,
 +                                           struct intel_crtc_config *pipe_config)
 +{
 +      struct drm_crtc *crtc = &intel_crtc->base;
 +
 +      crtc->mode.hdisplay = pipe_config->adjusted_mode.crtc_hdisplay;
 +      crtc->mode.htotal = pipe_config->adjusted_mode.crtc_htotal;
 +      crtc->mode.hsync_start = pipe_config->adjusted_mode.crtc_hsync_start;
 +      crtc->mode.hsync_end = pipe_config->adjusted_mode.crtc_hsync_end;
 +
 +      crtc->mode.vdisplay = pipe_config->adjusted_mode.crtc_vdisplay;
 +      crtc->mode.vtotal = pipe_config->adjusted_mode.crtc_vtotal;
 +      crtc->mode.vsync_start = pipe_config->adjusted_mode.crtc_vsync_start;
 +      crtc->mode.vsync_end = pipe_config->adjusted_mode.crtc_vsync_end;
 +
 +      crtc->mode.flags = pipe_config->adjusted_mode.flags;
 +
 +      crtc->mode.clock = pipe_config->adjusted_mode.clock;
 +      crtc->mode.flags |= pipe_config->adjusted_mode.flags;
 +}
 +
  static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc)
  {
        struct drm_device *dev = intel_crtc->base.dev;
@@@ -4977,7 -4939,7 +4977,7 @@@ static bool i9xx_get_pipe_config(struc
        struct drm_i915_private *dev_priv = dev->dev_private;
        uint32_t tmp;
  
 -      pipe_config->cpu_transcoder = crtc->pipe;
 +      pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
        pipe_config->shared_dpll = DPLL_ID_PRIVATE;
  
        tmp = I915_READ(PIPECONF(crtc->pipe));
                pipe_config->pixel_multiplier =
                        ((tmp & DPLL_MD_UDI_MULTIPLIER_MASK)
                         >> DPLL_MD_UDI_MULTIPLIER_SHIFT) + 1;
 +              pipe_config->dpll_hw_state.dpll_md = tmp;
        } else if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) {
                tmp = I915_READ(DPLL(crtc->pipe));
                pipe_config->pixel_multiplier =
                 * function. */
                pipe_config->pixel_multiplier = 1;
        }
 +      pipe_config->dpll_hw_state.dpll = I915_READ(DPLL(crtc->pipe));
 +      if (!IS_VALLEYVIEW(dev)) {
 +              pipe_config->dpll_hw_state.fp0 = I915_READ(FP0(crtc->pipe));
 +              pipe_config->dpll_hw_state.fp1 = I915_READ(FP1(crtc->pipe));
 +      } else {
 +              /* Mask out read-only status bits. */
 +              pipe_config->dpll_hw_state.dpll &= ~(DPLL_LOCK_VLV |
 +                                                   DPLL_PORTC_READY_MASK |
 +                                                   DPLL_PORTB_READY_MASK);
 +      }
  
        return true;
  }
@@@ -5168,37 -5119,74 +5168,37 @@@ static void ironlake_init_pch_refclk(st
        BUG_ON(val != final);
  }
  
 -/* Sequence to enable CLKOUT_DP for FDI usage and configure PCH FDI I/O. */
 -static void lpt_init_pch_refclk(struct drm_device *dev)
 +static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv)
  {
 -      struct drm_i915_private *dev_priv = dev->dev_private;
 -      struct drm_mode_config *mode_config = &dev->mode_config;
 -      struct intel_encoder *encoder;
 -      bool has_vga = false;
 -      bool is_sdv = false;
 -      u32 tmp;
 -
 -      list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
 -              switch (encoder->type) {
 -              case INTEL_OUTPUT_ANALOG:
 -                      has_vga = true;
 -                      break;
 -              }
 -      }
 -
 -      if (!has_vga)
 -              return;
 -
 -      mutex_lock(&dev_priv->dpio_lock);
 -
 -      /* XXX: Rip out SDV support once Haswell ships for real. */
 -      if (IS_HASWELL(dev) && (dev->pci_device & 0xFF00) == 0x0C00)
 -              is_sdv = true;
 -
 -      tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
 -      tmp &= ~SBI_SSCCTL_DISABLE;
 -      tmp |= SBI_SSCCTL_PATHALT;
 -      intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
 -
 -      udelay(24);
 +      uint32_t tmp;
  
 -      tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
 -      tmp &= ~SBI_SSCCTL_PATHALT;
 -      intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
 +      tmp = I915_READ(SOUTH_CHICKEN2);
 +      tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
 +      I915_WRITE(SOUTH_CHICKEN2, tmp);
  
 -      if (!is_sdv) {
 -              tmp = I915_READ(SOUTH_CHICKEN2);
 -              tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
 -              I915_WRITE(SOUTH_CHICKEN2, tmp);
 +      if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
 +                             FDI_MPHY_IOSFSB_RESET_STATUS, 100))
 +              DRM_ERROR("FDI mPHY reset assert timeout\n");
  
 -              if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
 -                                     FDI_MPHY_IOSFSB_RESET_STATUS, 100))
 -                      DRM_ERROR("FDI mPHY reset assert timeout\n");
 +      tmp = I915_READ(SOUTH_CHICKEN2);
 +      tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
 +      I915_WRITE(SOUTH_CHICKEN2, tmp);
  
 -              tmp = I915_READ(SOUTH_CHICKEN2);
 -              tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
 -              I915_WRITE(SOUTH_CHICKEN2, tmp);
 +      if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
 +                              FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100))
 +              DRM_ERROR("FDI mPHY reset de-assert timeout\n");
 +}
  
 -              if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
 -                                      FDI_MPHY_IOSFSB_RESET_STATUS) == 0,
 -                                     100))
 -                      DRM_ERROR("FDI mPHY reset de-assert timeout\n");
 -      }
 +/* WaMPhyProgramming:hsw */
 +static void lpt_program_fdi_mphy(struct drm_i915_private *dev_priv)
 +{
 +      uint32_t tmp;
  
        tmp = intel_sbi_read(dev_priv, 0x8008, SBI_MPHY);
        tmp &= ~(0xFF << 24);
        tmp |= (0x12 << 24);
        intel_sbi_write(dev_priv, 0x8008, tmp, SBI_MPHY);
  
 -      if (is_sdv) {
 -              tmp = intel_sbi_read(dev_priv, 0x800C, SBI_MPHY);
 -              tmp |= 0x7FFF;
 -              intel_sbi_write(dev_priv, 0x800C, tmp, SBI_MPHY);
 -      }
 -
        tmp = intel_sbi_read(dev_priv, 0x2008, SBI_MPHY);
        tmp |= (1 << 11);
        intel_sbi_write(dev_priv, 0x2008, tmp, SBI_MPHY);
        tmp |= (1 << 11);
        intel_sbi_write(dev_priv, 0x2108, tmp, SBI_MPHY);
  
 -      if (is_sdv) {
 -              tmp = intel_sbi_read(dev_priv, 0x2038, SBI_MPHY);
 -              tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16);
 -              intel_sbi_write(dev_priv, 0x2038, tmp, SBI_MPHY);
 -
 -              tmp = intel_sbi_read(dev_priv, 0x2138, SBI_MPHY);
 -              tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16);
 -              intel_sbi_write(dev_priv, 0x2138, tmp, SBI_MPHY);
 -
 -              tmp = intel_sbi_read(dev_priv, 0x203C, SBI_MPHY);
 -              tmp |= (0x3F << 8);
 -              intel_sbi_write(dev_priv, 0x203C, tmp, SBI_MPHY);
 -
 -              tmp = intel_sbi_read(dev_priv, 0x213C, SBI_MPHY);
 -              tmp |= (0x3F << 8);
 -              intel_sbi_write(dev_priv, 0x213C, tmp, SBI_MPHY);
 -      }
 -
        tmp = intel_sbi_read(dev_priv, 0x206C, SBI_MPHY);
        tmp |= (1 << 24) | (1 << 21) | (1 << 18);
        intel_sbi_write(dev_priv, 0x206C, tmp, SBI_MPHY);
        tmp |= (1 << 24) | (1 << 21) | (1 << 18);
        intel_sbi_write(dev_priv, 0x216C, tmp, SBI_MPHY);
  
 -      if (!is_sdv) {
 -              tmp = intel_sbi_read(dev_priv, 0x2080, SBI_MPHY);
 -              tmp &= ~(7 << 13);
 -              tmp |= (5 << 13);
 -              intel_sbi_write(dev_priv, 0x2080, tmp, SBI_MPHY);
 +      tmp = intel_sbi_read(dev_priv, 0x2080, SBI_MPHY);
 +      tmp &= ~(7 << 13);
 +      tmp |= (5 << 13);
 +      intel_sbi_write(dev_priv, 0x2080, tmp, SBI_MPHY);
  
 -              tmp = intel_sbi_read(dev_priv, 0x2180, SBI_MPHY);
 -              tmp &= ~(7 << 13);
 -              tmp |= (5 << 13);
 -              intel_sbi_write(dev_priv, 0x2180, tmp, SBI_MPHY);
 -      }
 +      tmp = intel_sbi_read(dev_priv, 0x2180, SBI_MPHY);
 +      tmp &= ~(7 << 13);
 +      tmp |= (5 << 13);
 +      intel_sbi_write(dev_priv, 0x2180, tmp, SBI_MPHY);
  
        tmp = intel_sbi_read(dev_priv, 0x208C, SBI_MPHY);
        tmp &= ~0xFF;
        tmp |= (0x1C << 16);
        intel_sbi_write(dev_priv, 0x2198, tmp, SBI_MPHY);
  
 -      if (!is_sdv) {
 -              tmp = intel_sbi_read(dev_priv, 0x20C4, SBI_MPHY);
 -              tmp |= (1 << 27);
 -              intel_sbi_write(dev_priv, 0x20C4, tmp, SBI_MPHY);
 +      tmp = intel_sbi_read(dev_priv, 0x20C4, SBI_MPHY);
 +      tmp |= (1 << 27);
 +      intel_sbi_write(dev_priv, 0x20C4, tmp, SBI_MPHY);
 +
 +      tmp = intel_sbi_read(dev_priv, 0x21C4, SBI_MPHY);
 +      tmp |= (1 << 27);
 +      intel_sbi_write(dev_priv, 0x21C4, tmp, SBI_MPHY);
 +
 +      tmp = intel_sbi_read(dev_priv, 0x20EC, SBI_MPHY);
 +      tmp &= ~(0xF << 28);
 +      tmp |= (4 << 28);
 +      intel_sbi_write(dev_priv, 0x20EC, tmp, SBI_MPHY);
 +
 +      tmp = intel_sbi_read(dev_priv, 0x21EC, SBI_MPHY);
 +      tmp &= ~(0xF << 28);
 +      tmp |= (4 << 28);
 +      intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY);
 +}
 +
 +/* Implements 3 different sequences from BSpec chapter "Display iCLK
 + * Programming" based on the parameters passed:
 + * - Sequence to enable CLKOUT_DP
 + * - Sequence to enable CLKOUT_DP without spread
 + * - Sequence to enable CLKOUT_DP for FDI usage and configure PCH FDI I/O
 + */
 +static void lpt_enable_clkout_dp(struct drm_device *dev, bool with_spread,
 +                               bool with_fdi)
 +{
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +      uint32_t reg, tmp;
 +
 +      if (WARN(with_fdi && !with_spread, "FDI requires downspread\n"))
 +              with_spread = true;
 +      if (WARN(dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE &&
 +               with_fdi, "LP PCH doesn't have FDI\n"))
 +              with_fdi = false;
 +
 +      mutex_lock(&dev_priv->dpio_lock);
  
 -              tmp = intel_sbi_read(dev_priv, 0x21C4, SBI_MPHY);
 -              tmp |= (1 << 27);
 -              intel_sbi_write(dev_priv, 0x21C4, tmp, SBI_MPHY);
 +      tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
 +      tmp &= ~SBI_SSCCTL_DISABLE;
 +      tmp |= SBI_SSCCTL_PATHALT;
 +      intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
 +
 +      udelay(24);
  
 -              tmp = intel_sbi_read(dev_priv, 0x20EC, SBI_MPHY);
 -              tmp &= ~(0xF << 28);
 -              tmp |= (4 << 28);
 -              intel_sbi_write(dev_priv, 0x20EC, tmp, SBI_MPHY);
 +      if (with_spread) {
 +              tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
 +              tmp &= ~SBI_SSCCTL_PATHALT;
 +              intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
  
 -              tmp = intel_sbi_read(dev_priv, 0x21EC, SBI_MPHY);
 -              tmp &= ~(0xF << 28);
 -              tmp |= (4 << 28);
 -              intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY);
 +              if (with_fdi) {
 +                      lpt_reset_fdi_mphy(dev_priv);
 +                      lpt_program_fdi_mphy(dev_priv);
 +              }
        }
  
 -      /* ULT uses SBI_GEN0, but ULT doesn't have VGA, so we don't care. */
 -      tmp = intel_sbi_read(dev_priv, SBI_DBUFF0, SBI_ICLK);
 -      tmp |= SBI_DBUFF0_ENABLE;
 -      intel_sbi_write(dev_priv, SBI_DBUFF0, tmp, SBI_ICLK);
 +      reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ?
 +             SBI_GEN0 : SBI_DBUFF0;
 +      tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK);
 +      tmp |= SBI_GEN0_CFG_BUFFENABLE_DISABLE;
 +      intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
 +
 +      mutex_unlock(&dev_priv->dpio_lock);
 +}
 +
 +/* Sequence to disable CLKOUT_DP */
 +static void lpt_disable_clkout_dp(struct drm_device *dev)
 +{
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +      uint32_t reg, tmp;
 +
 +      mutex_lock(&dev_priv->dpio_lock);
 +
 +      reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ?
 +             SBI_GEN0 : SBI_DBUFF0;
 +      tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK);
 +      tmp &= ~SBI_GEN0_CFG_BUFFENABLE_DISABLE;
 +      intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
 +
 +      tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
 +      if (!(tmp & SBI_SSCCTL_DISABLE)) {
 +              if (!(tmp & SBI_SSCCTL_PATHALT)) {
 +                      tmp |= SBI_SSCCTL_PATHALT;
 +                      intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
 +                      udelay(32);
 +              }
 +              tmp |= SBI_SSCCTL_DISABLE;
 +              intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
 +      }
  
        mutex_unlock(&dev_priv->dpio_lock);
  }
  
 +static void lpt_init_pch_refclk(struct drm_device *dev)
 +{
 +      struct drm_mode_config *mode_config = &dev->mode_config;
 +      struct intel_encoder *encoder;
 +      bool has_vga = false;
 +
 +      list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
 +              switch (encoder->type) {
 +              case INTEL_OUTPUT_ANALOG:
 +                      has_vga = true;
 +                      break;
 +              }
 +      }
 +
 +      if (has_vga)
 +              lpt_enable_clkout_dp(dev, true, true);
 +      else
 +              lpt_disable_clkout_dp(dev);
 +}
 +
  /*
   * Initialize reference clocks when the driver loads
   */
@@@ -5688,9 -5610,9 +5688,9 @@@ static uint32_t ironlake_compute_dpll(s
                << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
  
        if (is_sdvo)
 -              dpll |= DPLL_DVO_HIGH_SPEED;
 +              dpll |= DPLL_SDVO_HIGH_SPEED;
        if (intel_crtc->config.has_dp_encoder)
 -              dpll |= DPLL_DVO_HIGH_SPEED;
 +              dpll |= DPLL_SDVO_HIGH_SPEED;
  
        /* compute bitmask from p1 value */
        dpll |= (1 << (intel_crtc->config.dpll.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
@@@ -5786,7 -5708,7 +5786,7 @@@ static int ironlake_crtc_mode_set(struc
                else
                        intel_crtc->config.dpll_hw_state.fp1 = fp;
  
 -              pll = intel_get_shared_dpll(intel_crtc, dpll, fp);
 +              pll = intel_get_shared_dpll(intel_crtc);
                if (pll == NULL) {
                        DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
                                         pipe_name(pipe));
                intel_put_shared_dpll(intel_crtc);
  
        if (intel_crtc->config.has_dp_encoder)
 -              intel_dp_set_m_n(intel_crtc);
 -
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->pre_pll_enable)
 -                      encoder->pre_pll_enable(encoder);
 +              intel_dp_set_m_n(intel_crtc);
  
        if (is_lvds && has_reduced_clock && i915_powersave)
                intel_crtc->lowfreq_avail = true;
        if (intel_crtc->config.has_pch_encoder) {
                pll = intel_crtc_to_shared_dpll(intel_crtc);
  
 -              I915_WRITE(PCH_DPLL(pll->id), dpll);
 -
 -              /* Wait for the clocks to stabilize. */
 -              POSTING_READ(PCH_DPLL(pll->id));
 -              udelay(150);
 -
 -              /* The pixel multiplier can only be updated once the
 -               * DPLL is enabled and the clocks are stable.
 -               *
 -               * So write it again.
 -               */
 -              I915_WRITE(PCH_DPLL(pll->id), dpll);
 -
 -              if (has_reduced_clock)
 -                      I915_WRITE(PCH_FP1(pll->id), fp2);
 -              else
 -                      I915_WRITE(PCH_FP1(pll->id), fp);
        }
  
        intel_set_pipe_timings(intel_crtc);
@@@ -5877,7 -5820,7 +5877,7 @@@ static bool ironlake_get_pipe_config(st
        struct drm_i915_private *dev_priv = dev->dev_private;
        uint32_t tmp;
  
 -      pipe_config->cpu_transcoder = crtc->pipe;
 +      pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
        pipe_config->shared_dpll = DPLL_ID_PRIVATE;
  
        tmp = I915_READ(PIPECONF(crtc->pipe));
  
                ironlake_get_fdi_m_n_config(crtc, pipe_config);
  
 -              /* XXX: Can't properly read out the pch dpll pixel multiplier
 -               * since we don't have state tracking for pch clocks yet. */
 -              pipe_config->pixel_multiplier = 1;
 -
                if (HAS_PCH_IBX(dev_priv->dev)) {
 -                      pipe_config->shared_dpll = crtc->pipe;
 +                      pipe_config->shared_dpll =
 +                              (enum intel_dpll_id) crtc->pipe;
                } else {
                        tmp = I915_READ(PCH_DPLL_SEL);
                        if (tmp & TRANS_DPLLB_SEL(crtc->pipe))
  
                WARN_ON(!pll->get_hw_state(dev_priv, pll,
                                           &pipe_config->dpll_hw_state));
 +
 +              tmp = pipe_config->dpll_hw_state.dpll;
 +              pipe_config->pixel_multiplier =
 +                      ((tmp & PLL_REF_SDVO_HDMI_MULTIPLIER_MASK)
 +                       >> PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT) + 1;
        } else {
                pipe_config->pixel_multiplier = 1;
        }
        return true;
  }
  
 +static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
 +{
 +      struct drm_device *dev = dev_priv->dev;
 +      struct intel_ddi_plls *plls = &dev_priv->ddi_plls;
 +      struct intel_crtc *crtc;
 +      unsigned long irqflags;
 +      uint32_t val;
 +
 +      list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
 +              WARN(crtc->base.enabled, "CRTC for pipe %c enabled\n",
 +                   pipe_name(crtc->pipe));
 +
 +      WARN(I915_READ(HSW_PWR_WELL_DRIVER), "Power well on\n");
 +      WARN(plls->spll_refcount, "SPLL enabled\n");
 +      WARN(plls->wrpll1_refcount, "WRPLL1 enabled\n");
 +      WARN(plls->wrpll2_refcount, "WRPLL2 enabled\n");
 +      WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n");
 +      WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
 +           "CPU PWM1 enabled\n");
 +      WARN(I915_READ(HSW_BLC_PWM2_CTL) & BLM_PWM_ENABLE,
 +           "CPU PWM2 enabled\n");
 +      WARN(I915_READ(BLC_PWM_PCH_CTL1) & BLM_PCH_PWM_ENABLE,
 +           "PCH PWM1 enabled\n");
 +      WARN(I915_READ(UTIL_PIN_CTL) & UTIL_PIN_ENABLE,
 +           "Utility pin enabled\n");
 +      WARN(I915_READ(PCH_GTC_CTL) & PCH_GTC_ENABLE, "PCH GTC enabled\n");
 +
 +      spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
 +      val = I915_READ(DEIMR);
 +      WARN((val & ~DE_PCH_EVENT_IVB) != val,
 +           "Unexpected DEIMR bits enabled: 0x%x\n", val);
 +      val = I915_READ(SDEIMR);
 +      WARN((val | SDE_HOTPLUG_MASK_CPT) != 0xffffffff,
 +           "Unexpected SDEIMR bits enabled: 0x%x\n", val);
 +      spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 +}
 +
 +/*
 + * This function implements pieces of two sequences from BSpec:
 + * - Sequence for display software to disable LCPLL
 + * - Sequence for display software to allow package C8+
 + * The steps implemented here are just the steps that actually touch the LCPLL
 + * register. Callers should take care of disabling all the display engine
 + * functions, doing the mode unset, fixing interrupts, etc.
 + */
 +void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
 +                     bool switch_to_fclk, bool allow_power_down)
 +{
 +      uint32_t val;
 +
 +      assert_can_disable_lcpll(dev_priv);
 +
 +      val = I915_READ(LCPLL_CTL);
 +
 +      if (switch_to_fclk) {
 +              val |= LCPLL_CD_SOURCE_FCLK;
 +              I915_WRITE(LCPLL_CTL, val);
 +
 +              if (wait_for_atomic_us(I915_READ(LCPLL_CTL) &
 +                                     LCPLL_CD_SOURCE_FCLK_DONE, 1))
 +                      DRM_ERROR("Switching to FCLK failed\n");
 +
 +              val = I915_READ(LCPLL_CTL);
 +      }
 +
 +      val |= LCPLL_PLL_DISABLE;
 +      I915_WRITE(LCPLL_CTL, val);
 +      POSTING_READ(LCPLL_CTL);
 +
 +      if (wait_for((I915_READ(LCPLL_CTL) & LCPLL_PLL_LOCK) == 0, 1))
 +              DRM_ERROR("LCPLL still locked\n");
 +
 +      val = I915_READ(D_COMP);
 +      val |= D_COMP_COMP_DISABLE;
 +      I915_WRITE(D_COMP, val);
 +      POSTING_READ(D_COMP);
 +      ndelay(100);
 +
 +      if (wait_for((I915_READ(D_COMP) & D_COMP_RCOMP_IN_PROGRESS) == 0, 1))
 +              DRM_ERROR("D_COMP RCOMP still in progress\n");
 +
 +      if (allow_power_down) {
 +              val = I915_READ(LCPLL_CTL);
 +              val |= LCPLL_POWER_DOWN_ALLOW;
 +              I915_WRITE(LCPLL_CTL, val);
 +              POSTING_READ(LCPLL_CTL);
 +      }
 +}
 +
 +/*
 + * Fully restores LCPLL, disallowing power down and switching back to LCPLL
 + * source.
 + */
 +void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
 +{
 +      uint32_t val;
 +
 +      val = I915_READ(LCPLL_CTL);
 +
 +      if ((val & (LCPLL_PLL_LOCK | LCPLL_PLL_DISABLE | LCPLL_CD_SOURCE_FCLK |
 +                  LCPLL_POWER_DOWN_ALLOW)) == LCPLL_PLL_LOCK)
 +              return;
 +
 +      /* Make sure we're not on PC8 state before disabling PC8, otherwise
 +       * we'll hang the machine! */
 +      dev_priv->uncore.funcs.force_wake_get(dev_priv);
 +
 +      if (val & LCPLL_POWER_DOWN_ALLOW) {
 +              val &= ~LCPLL_POWER_DOWN_ALLOW;
 +              I915_WRITE(LCPLL_CTL, val);
 +              POSTING_READ(LCPLL_CTL);
 +      }
 +
 +      val = I915_READ(D_COMP);
 +      val |= D_COMP_COMP_FORCE;
 +      val &= ~D_COMP_COMP_DISABLE;
 +      I915_WRITE(D_COMP, val);
 +      POSTING_READ(D_COMP);
 +
 +      val = I915_READ(LCPLL_CTL);
 +      val &= ~LCPLL_PLL_DISABLE;
 +      I915_WRITE(LCPLL_CTL, val);
 +
 +      if (wait_for(I915_READ(LCPLL_CTL) & LCPLL_PLL_LOCK, 5))
 +              DRM_ERROR("LCPLL not locked yet\n");
 +
 +      if (val & LCPLL_CD_SOURCE_FCLK) {
 +              val = I915_READ(LCPLL_CTL);
 +              val &= ~LCPLL_CD_SOURCE_FCLK;
 +              I915_WRITE(LCPLL_CTL, val);
 +
 +              if (wait_for_atomic_us((I915_READ(LCPLL_CTL) &
 +                                      LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
 +                      DRM_ERROR("Switching back to LCPLL failed\n");
 +      }
 +
 +      dev_priv->uncore.funcs.force_wake_put(dev_priv);
 +}
 +
 +void hsw_enable_pc8_work(struct work_struct *__work)
 +{
 +      struct drm_i915_private *dev_priv =
 +              container_of(to_delayed_work(__work), struct drm_i915_private,
 +                           pc8.enable_work);
 +      struct drm_device *dev = dev_priv->dev;
 +      uint32_t val;
 +
 +      if (dev_priv->pc8.enabled)
 +              return;
 +
 +      DRM_DEBUG_KMS("Enabling package C8+\n");
 +
 +      dev_priv->pc8.enabled = true;
 +
 +      if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
 +              val = I915_READ(SOUTH_DSPCLK_GATE_D);
 +              val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
 +              I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
 +      }
 +
 +      lpt_disable_clkout_dp(dev);
 +      hsw_pc8_disable_interrupts(dev);
 +      hsw_disable_lcpll(dev_priv, true, true);
 +}
 +
 +static void __hsw_enable_package_c8(struct drm_i915_private *dev_priv)
 +{
 +      WARN_ON(!mutex_is_locked(&dev_priv->pc8.lock));
 +      WARN(dev_priv->pc8.disable_count < 1,
 +           "pc8.disable_count: %d\n", dev_priv->pc8.disable_count);
 +
 +      dev_priv->pc8.disable_count--;
 +      if (dev_priv->pc8.disable_count != 0)
 +              return;
 +
 +      schedule_delayed_work(&dev_priv->pc8.enable_work,
 +                            msecs_to_jiffies(i915_pc8_timeout));
 +}
 +
 +static void __hsw_disable_package_c8(struct drm_i915_private *dev_priv)
 +{
 +      struct drm_device *dev = dev_priv->dev;
 +      uint32_t val;
 +
 +      WARN_ON(!mutex_is_locked(&dev_priv->pc8.lock));
 +      WARN(dev_priv->pc8.disable_count < 0,
 +           "pc8.disable_count: %d\n", dev_priv->pc8.disable_count);
 +
 +      dev_priv->pc8.disable_count++;
 +      if (dev_priv->pc8.disable_count != 1)
 +              return;
 +
 +      cancel_delayed_work_sync(&dev_priv->pc8.enable_work);
 +      if (!dev_priv->pc8.enabled)
 +              return;
 +
 +      DRM_DEBUG_KMS("Disabling package C8+\n");
 +
 +      hsw_restore_lcpll(dev_priv);
 +      hsw_pc8_restore_interrupts(dev);
 +      lpt_init_pch_refclk(dev);
 +
 +      if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
 +              val = I915_READ(SOUTH_DSPCLK_GATE_D);
 +              val |= PCH_LP_PARTITION_LEVEL_DISABLE;
 +              I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
 +      }
 +
 +      intel_prepare_ddi(dev);
 +      i915_gem_init_swizzling(dev);
 +      mutex_lock(&dev_priv->rps.hw_lock);
 +      gen6_update_ring_freq(dev);
 +      mutex_unlock(&dev_priv->rps.hw_lock);
 +      dev_priv->pc8.enabled = false;
 +}
 +
 +void hsw_enable_package_c8(struct drm_i915_private *dev_priv)
 +{
 +      mutex_lock(&dev_priv->pc8.lock);
 +      __hsw_enable_package_c8(dev_priv);
 +      mutex_unlock(&dev_priv->pc8.lock);
 +}
 +
 +void hsw_disable_package_c8(struct drm_i915_private *dev_priv)
 +{
 +      mutex_lock(&dev_priv->pc8.lock);
 +      __hsw_disable_package_c8(dev_priv);
 +      mutex_unlock(&dev_priv->pc8.lock);
 +}
 +
 +static bool hsw_can_enable_package_c8(struct drm_i915_private *dev_priv)
 +{
 +      struct drm_device *dev = dev_priv->dev;
 +      struct intel_crtc *crtc;
 +      uint32_t val;
 +
 +      list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
 +              if (crtc->base.enabled)
 +                      return false;
 +
 +      /* This case is still possible since we have the i915.disable_power_well
 +       * parameter and also the KVMr or something else might be requesting the
 +       * power well. */
 +      val = I915_READ(HSW_PWR_WELL_DRIVER);
 +      if (val != 0) {
 +              DRM_DEBUG_KMS("Not enabling PC8: power well on\n");
 +              return false;
 +      }
 +
 +      return true;
 +}
 +
 +/* Since we're called from modeset_global_resources there's no way to
 + * symmetrically increase and decrease the refcount, so we use
 + * dev_priv->pc8.requirements_met to track whether we already have the refcount
 + * or not.
 + */
 +static void hsw_update_package_c8(struct drm_device *dev)
 +{
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +      bool allow;
 +
 +      if (!i915_enable_pc8)
 +              return;
 +
 +      mutex_lock(&dev_priv->pc8.lock);
 +
 +      allow = hsw_can_enable_package_c8(dev_priv);
 +
 +      if (allow == dev_priv->pc8.requirements_met)
 +              goto done;
 +
 +      dev_priv->pc8.requirements_met = allow;
 +
 +      if (allow)
 +              __hsw_enable_package_c8(dev_priv);
 +      else
 +              __hsw_disable_package_c8(dev_priv);
 +
 +done:
 +      mutex_unlock(&dev_priv->pc8.lock);
 +}
 +
 +static void hsw_package_c8_gpu_idle(struct drm_i915_private *dev_priv)
 +{
 +      if (!dev_priv->pc8.gpu_idle) {
 +              dev_priv->pc8.gpu_idle = true;
 +              hsw_enable_package_c8(dev_priv);
 +      }
 +}
 +
 +static void hsw_package_c8_gpu_busy(struct drm_i915_private *dev_priv)
 +{
 +      if (dev_priv->pc8.gpu_idle) {
 +              dev_priv->pc8.gpu_idle = false;
 +              hsw_disable_package_c8(dev_priv);
 +      }
 +}
 +
  static void haswell_modeset_global_resources(struct drm_device *dev)
  {
        bool enable = false;
        }
  
        intel_set_power_well(dev, enable);
 +
 +      hsw_update_package_c8(dev);
  }
  
  static int haswell_crtc_mode_set(struct drm_crtc *crtc,
@@@ -6295,7 -5935,7 +6295,7 @@@ static bool haswell_get_pipe_config(str
        enum intel_display_power_domain pfit_domain;
        uint32_t tmp;
  
 -      pipe_config->cpu_transcoder = crtc->pipe;
 +      pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
        pipe_config->shared_dpll = DPLL_ID_PRIVATE;
  
        tmp = I915_READ(TRANS_DDI_FUNC_CTL(TRANSCODER_EDP));
@@@ -6365,8 -6005,11 +6365,8 @@@ static int intel_crtc_mode_set(struct d
  {
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 -      struct drm_encoder_helper_funcs *encoder_funcs;
        struct intel_encoder *encoder;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 -      struct drm_display_mode *adjusted_mode =
 -              &intel_crtc->config.adjusted_mode;
        struct drm_display_mode *mode = &intel_crtc->config.requested_mode;
        int pipe = intel_crtc->pipe;
        int ret;
                        encoder->base.base.id,
                        drm_get_encoder_name(&encoder->base),
                        mode->base.id, mode->name);
 -              if (encoder->mode_set) {
 -                      encoder->mode_set(encoder);
 -              } else {
 -                      encoder_funcs = encoder->base.helper_private;
 -                      encoder_funcs->mode_set(&encoder->base, mode, adjusted_mode);
 -              }
 +              encoder->mode_set(encoder);
        }
  
        return 0;
@@@ -6900,7 -6548,7 +6900,7 @@@ static int intel_crtc_cursor_set(struc
                        goto fail_unpin;
                }
  
 -              addr = obj->gtt_offset;
 +              addr = i915_gem_obj_ggtt_offset(obj);
        } else {
                int align = IS_I830(dev) ? 16 * 1024 : 256;
                ret = i915_gem_attach_phys_object(dev, obj,
                        if (intel_crtc->cursor_bo != obj)
                                i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo);
                } else
 -                      i915_gem_object_unpin(intel_crtc->cursor_bo);
 +                      i915_gem_object_unpin_from_display_plane(intel_crtc->cursor_bo);
                drm_gem_object_unreference(&intel_crtc->cursor_bo->base);
        }
  
  
        return 0;
  fail_unpin:
 -      i915_gem_object_unpin(obj);
 +      i915_gem_object_unpin_from_display_plane(obj);
  fail_locked:
        mutex_unlock(&dev->struct_mutex);
  fail:
@@@ -7227,12 -6875,11 +7227,12 @@@ void intel_release_load_detect_pipe(str
  }
  
  /* Returns the clock of the currently programmed mode of the given pipe. */
 -static int intel_crtc_clock_get(struct drm_device *dev, struct drm_crtc *crtc)
 +static void i9xx_crtc_clock_get(struct intel_crtc *crtc,
 +                              struct intel_crtc_config *pipe_config)
  {
 +      struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 -      struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 -      int pipe = intel_crtc->pipe;
 +      int pipe = pipe_config->cpu_transcoder;
        u32 dpll = I915_READ(DPLL(pipe));
        u32 fp;
        intel_clock_t clock;
                default:
                        DRM_DEBUG_KMS("Unknown DPLL mode %08x in programmed "
                                  "mode\n", (int)(dpll & DPLL_MODE_MASK));
 -                      return 0;
 +                      pipe_config->adjusted_mode.clock = 0;
 +                      return;
                }
  
                if (IS_PINEVIEW(dev))
                }
        }
  
 -      /* XXX: It would be nice to validate the clocks, but we can't reuse
 -       * i830PllIsValid() because it relies on the xf86_config connector
 -       * configuration being accurate, which it isn't necessarily.
 +      pipe_config->adjusted_mode.clock = clock.dot *
 +              pipe_config->pixel_multiplier;
 +}
 +
 +static void ironlake_crtc_clock_get(struct intel_crtc *crtc,
 +                                  struct intel_crtc_config *pipe_config)
 +{
 +      struct drm_device *dev = crtc->base.dev;
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +      enum transcoder cpu_transcoder = pipe_config->cpu_transcoder;
 +      int link_freq, repeat;
 +      u64 clock;
 +      u32 link_m, link_n;
 +
 +      repeat = pipe_config->pixel_multiplier;
 +
 +      /*
 +       * The calculation for the data clock is:
 +       * pixel_clock = ((m/n)*(link_clock * nr_lanes * repeat))/bpp
 +       * But we want to avoid losing precison if possible, so:
 +       * pixel_clock = ((m * link_clock * nr_lanes * repeat)/(n*bpp))
 +       *
 +       * and the link clock is simpler:
 +       * link_clock = (m * link_clock * repeat) / n
 +       */
 +
 +      /*
 +       * We need to get the FDI or DP link clock here to derive
 +       * the M/N dividers.
 +       *
 +       * For FDI, we read it from the BIOS or use a fixed 2.7GHz.
 +       * For DP, it's either 1.62GHz or 2.7GHz.
 +       * We do our calculations in 10*MHz since we don't need much precison.
         */
 +      if (pipe_config->has_pch_encoder)
 +              link_freq = intel_fdi_link_freq(dev) * 10000;
 +      else
 +              link_freq = pipe_config->port_clock;
 +
 +      link_m = I915_READ(PIPE_LINK_M1(cpu_transcoder));
 +      link_n = I915_READ(PIPE_LINK_N1(cpu_transcoder));
 +
 +      if (!link_m || !link_n)
 +              return;
  
 -      return clock.dot;
 +      clock = ((u64)link_m * (u64)link_freq * (u64)repeat);
 +      do_div(clock, link_n);
 +
 +      pipe_config->adjusted_mode.clock = clock;
  }
  
  /** Returns the currently programmed mode of the given pipe. */
@@@ -7368,7 -6971,6 +7368,7 @@@ struct drm_display_mode *intel_crtc_mod
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
        struct drm_display_mode *mode;
 +      struct intel_crtc_config pipe_config;
        int htot = I915_READ(HTOTAL(cpu_transcoder));
        int hsync = I915_READ(HSYNC(cpu_transcoder));
        int vtot = I915_READ(VTOTAL(cpu_transcoder));
        if (!mode)
                return NULL;
  
 -      mode->clock = intel_crtc_clock_get(dev, crtc);
 +      /*
 +       * Construct a pipe_config sufficient for getting the clock info
 +       * back out of crtc_clock_get.
 +       *
 +       * Note, if LVDS ever uses a non-1 pixel multiplier, we'll need
 +       * to use a real value here instead.
 +       */
 +      pipe_config.cpu_transcoder = (enum transcoder) intel_crtc->pipe;
 +      pipe_config.pixel_multiplier = 1;
 +      i9xx_crtc_clock_get(intel_crtc, &pipe_config);
 +
 +      mode->clock = pipe_config.adjusted_mode.clock;
        mode->hdisplay = (htot & 0xffff) + 1;
        mode->htotal = ((htot & 0xffff0000) >> 16) + 1;
        mode->hsync_start = (hsync & 0xffff) + 1;
@@@ -7473,19 -7064,13 +7473,19 @@@ static void intel_decrease_pllclock(str
  
  void intel_mark_busy(struct drm_device *dev)
  {
 -      i915_update_gfx_val(dev->dev_private);
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +
 +      hsw_package_c8_gpu_busy(dev_priv);
 +      i915_update_gfx_val(dev_priv);
  }
  
  void intel_mark_idle(struct drm_device *dev)
  {
 +      struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_crtc *crtc;
  
 +      hsw_package_c8_gpu_idle(dev_priv);
 +
        if (!i915_powersave)
                return;
  
@@@ -7650,8 -7235,7 +7650,8 @@@ inline static void intel_mark_page_flip
  static int intel_gen2_queue_flip(struct drm_device *dev,
                                 struct drm_crtc *crtc,
                                 struct drm_framebuffer *fb,
 -                               struct drm_i915_gem_object *obj)
 +                               struct drm_i915_gem_object *obj,
 +                               uint32_t flags)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        intel_ring_emit(ring, MI_DISPLAY_FLIP |
                        MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
        intel_ring_emit(ring, fb->pitches[0]);
 -      intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 +      intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
        intel_ring_emit(ring, 0); /* aux display base address, unused */
  
        intel_mark_page_flip_active(intel_crtc);
@@@ -7695,8 -7279,7 +7695,8 @@@ err
  static int intel_gen3_queue_flip(struct drm_device *dev,
                                 struct drm_crtc *crtc,
                                 struct drm_framebuffer *fb,
 -                               struct drm_i915_gem_object *obj)
 +                               struct drm_i915_gem_object *obj,
 +                               uint32_t flags)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 |
                        MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
        intel_ring_emit(ring, fb->pitches[0]);
 -      intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 +      intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
        intel_ring_emit(ring, MI_NOOP);
  
        intel_mark_page_flip_active(intel_crtc);
@@@ -7737,8 -7320,7 +7737,8 @@@ err
  static int intel_gen4_queue_flip(struct drm_device *dev,
                                 struct drm_crtc *crtc,
                                 struct drm_framebuffer *fb,
 -                               struct drm_i915_gem_object *obj)
 +                               struct drm_i915_gem_object *obj,
 +                               uint32_t flags)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
                        MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
        intel_ring_emit(ring, fb->pitches[0]);
        intel_ring_emit(ring,
 -                      (obj->gtt_offset + intel_crtc->dspaddr_offset) |
 +                      (i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset) |
                        obj->tiling_mode);
  
        /* XXX Enabling the panel-fitter across page-flip is so far
@@@ -7786,8 -7368,7 +7786,8 @@@ err
  static int intel_gen6_queue_flip(struct drm_device *dev,
                                 struct drm_crtc *crtc,
                                 struct drm_framebuffer *fb,
 -                               struct drm_i915_gem_object *obj)
 +                               struct drm_i915_gem_object *obj,
 +                               uint32_t flags)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        intel_ring_emit(ring, MI_DISPLAY_FLIP |
                        MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
        intel_ring_emit(ring, fb->pitches[0] | obj->tiling_mode);
 -      intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 +      intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
  
        /* Contrary to the suggestions in the documentation,
         * "Enable Panel Fitter" does not seem to be required when page
@@@ -7837,8 -7418,7 +7837,8 @@@ err
  static int intel_gen7_queue_flip(struct drm_device *dev,
                                 struct drm_crtc *crtc,
                                 struct drm_framebuffer *fb,
 -                               struct drm_i915_gem_object *obj)
 +                               struct drm_i915_gem_object *obj,
 +                               uint32_t flags)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
  
        intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
        intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
 -      intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 +      intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
        intel_ring_emit(ring, (MI_NOOP));
  
        intel_mark_page_flip_active(intel_crtc);
  static int intel_default_queue_flip(struct drm_device *dev,
                                    struct drm_crtc *crtc,
                                    struct drm_framebuffer *fb,
 -                                  struct drm_i915_gem_object *obj)
 +                                  struct drm_i915_gem_object *obj,
 +                                  uint32_t flags)
  {
        return -ENODEV;
  }
  
  static int intel_crtc_page_flip(struct drm_crtc *crtc,
                                struct drm_framebuffer *fb,
 -                              struct drm_pending_vblank_event *event)
 +                              struct drm_pending_vblank_event *event,
 +                              uint32_t page_flip_flags)
  {
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        atomic_inc(&intel_crtc->unpin_work_count);
        intel_crtc->reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
  
 -      ret = dev_priv->display.queue_flip(dev, crtc, fb, obj);
 +      ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, page_flip_flags);
        if (ret)
                goto cleanup_pending;
  
@@@ -8211,6 -7789,7 +8211,6 @@@ intel_modeset_pipe_config(struct drm_cr
                          struct drm_display_mode *mode)
  {
        struct drm_device *dev = crtc->dev;
 -      struct drm_encoder_helper_funcs *encoder_funcs;
        struct intel_encoder *encoder;
        struct intel_crtc_config *pipe_config;
        int plane_bpp, ret = -EINVAL;
  
        drm_mode_copy(&pipe_config->adjusted_mode, mode);
        drm_mode_copy(&pipe_config->requested_mode, mode);
 -      pipe_config->cpu_transcoder = to_intel_crtc(crtc)->pipe;
 +      pipe_config->cpu_transcoder =
 +              (enum transcoder) to_intel_crtc(crtc)->pipe;
        pipe_config->shared_dpll = DPLL_ID_PRIVATE;
  
 +      /*
 +       * Sanitize sync polarity flags based on requested ones. If neither
 +       * positive or negative polarity is requested, treat this as meaning
 +       * negative polarity.
 +       */
 +      if (!(pipe_config->adjusted_mode.flags &
 +            (DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NHSYNC)))
 +              pipe_config->adjusted_mode.flags |= DRM_MODE_FLAG_NHSYNC;
 +
 +      if (!(pipe_config->adjusted_mode.flags &
 +            (DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_NVSYNC)))
 +              pipe_config->adjusted_mode.flags |= DRM_MODE_FLAG_NVSYNC;
 +
        /* Compute a starting value for pipe_config->pipe_bpp taking the source
         * plane pixel format and any sink constraints into account. Returns the
         * source plane bpp so that dithering can be selected on mismatches
@@@ -8258,9 -7823,6 +8258,9 @@@ encoder_retry
        pipe_config->port_clock = 0;
        pipe_config->pixel_multiplier = 1;
  
 +      /* Fill in default crtc timings, allow encoders to overwrite them. */
 +      drm_mode_set_crtcinfo(&pipe_config->adjusted_mode, 0);
 +
        /* Pass our mode to the connectors and the CRTC to give them a chance to
         * adjust it according to limitations or connector properties, and also
         * a chance to reject the mode entirely.
                if (&encoder->new_crtc->base != crtc)
                        continue;
  
 -              if (encoder->compute_config) {
 -                      if (!(encoder->compute_config(encoder, pipe_config))) {
 -                              DRM_DEBUG_KMS("Encoder config failure\n");
 -                              goto fail;
 -                      }
 -
 -                      continue;
 -              }
 -
 -              encoder_funcs = encoder->base.helper_private;
 -              if (!(encoder_funcs->mode_fixup(&encoder->base,
 -                                              &pipe_config->requested_mode,
 -                                              &pipe_config->adjusted_mode))) {
 -                      DRM_DEBUG_KMS("Encoder fixup failed\n");
 +              if (!(encoder->compute_config(encoder, pipe_config))) {
 +                      DRM_DEBUG_KMS("Encoder config failure\n");
                        goto fail;
                }
        }
@@@ -8467,28 -8041,6 +8467,28 @@@ intel_modeset_update_state(struct drm_d
  
  }
  
 +static bool intel_fuzzy_clock_check(struct intel_crtc_config *cur,
 +                                  struct intel_crtc_config *new)
 +{
 +      int clock1, clock2, diff;
 +
 +      clock1 = cur->adjusted_mode.clock;
 +      clock2 = new->adjusted_mode.clock;
 +
 +      if (clock1 == clock2)
 +              return true;
 +
 +      if (!clock1 || !clock2)
 +              return false;
 +
 +      diff = abs(clock1 - clock2);
 +
 +      if (((((diff + clock1 + clock2) * 100)) / (clock1 + clock2)) < 105)
 +              return true;
 +
 +      return false;
 +}
 +
  #define for_each_intel_crtc_masked(dev, mask, intel_crtc) \
        list_for_each_entry((intel_crtc), \
                            &(dev)->mode_config.crtc_list, \
@@@ -8520,7 -8072,7 +8520,7 @@@ intel_pipe_config_compare(struct drm_de
  
  #define PIPE_CONF_CHECK_FLAGS(name, mask)     \
        if ((current_config->name ^ pipe_config->name) & (mask)) { \
 -              DRM_ERROR("mismatch in " #name " " \
 +              DRM_ERROR("mismatch in " #name "(" #mask ") "      \
                          "(expected %i, found %i)\n", \
                          current_config->name & (mask), \
                          pipe_config->name & (mask)); \
        PIPE_CONF_CHECK_I(adjusted_mode.crtc_vsync_start);
        PIPE_CONF_CHECK_I(adjusted_mode.crtc_vsync_end);
  
 -      if (!HAS_PCH_SPLIT(dev))
 -              PIPE_CONF_CHECK_I(pixel_multiplier);
 +      PIPE_CONF_CHECK_I(pixel_multiplier);
  
        PIPE_CONF_CHECK_FLAGS(adjusted_mode.flags,
                              DRM_MODE_FLAG_INTERLACE);
  
        PIPE_CONF_CHECK_I(shared_dpll);
        PIPE_CONF_CHECK_X(dpll_hw_state.dpll);
 +      PIPE_CONF_CHECK_X(dpll_hw_state.dpll_md);
        PIPE_CONF_CHECK_X(dpll_hw_state.fp0);
        PIPE_CONF_CHECK_X(dpll_hw_state.fp1);
  
  #undef PIPE_CONF_CHECK_FLAGS
  #undef PIPE_CONF_QUIRK
  
 +      if (!IS_HASWELL(dev)) {
 +              if (!intel_fuzzy_clock_check(current_config, pipe_config)) {
 +                      DRM_ERROR("mismatch in clock (expected %d, found %d)\n",
 +                                current_config->adjusted_mode.clock,
 +                                pipe_config->adjusted_mode.clock);
 +                      return false;
 +              }
 +      }
 +
        return true;
  }
  
@@@ -8726,15 -8269,14 +8726,17 @@@ check_crtc_state(struct drm_device *dev
  
                list_for_each_entry(encoder, &dev->mode_config.encoder_list,
                                    base.head) {
+                       enum pipe pipe;
                        if (encoder->base.crtc != &crtc->base)
                                continue;
-                       if (encoder->get_config)
+                       if (encoder->get_config &&
+                           encoder->get_hw_state(encoder, &pipe))
                                encoder->get_config(encoder, &pipe_config);
                }
  
 +              if (dev_priv->display.get_clock)
 +                      dev_priv->display.get_clock(crtc, &pipe_config);
 +
                WARN(crtc->active != active,
                     "crtc active state doesn't match with hw state "
                     "(expected %i, found %i)\n", crtc->active, active);
@@@ -8912,9 -8454,9 +8914,9 @@@ out
        return ret;
  }
  
 -int intel_set_mode(struct drm_crtc *crtc,
 -                   struct drm_display_mode *mode,
 -                   int x, int y, struct drm_framebuffer *fb)
 +static int intel_set_mode(struct drm_crtc *crtc,
 +                        struct drm_display_mode *mode,
 +                        int x, int y, struct drm_framebuffer *fb)
  {
        int ret;
  
@@@ -9031,16 -8573,8 +9033,16 @@@ intel_set_config_compute_mode_changes(s
        } else if (set->crtc->fb != set->fb) {
                /* If we have no fb then treat it as a full mode set */
                if (set->crtc->fb == NULL) {
 -                      DRM_DEBUG_KMS("crtc has no fb, full mode set\n");
 -                      config->mode_changed = true;
 +                      struct intel_crtc *intel_crtc =
 +                              to_intel_crtc(set->crtc);
 +
 +                      if (intel_crtc->active && i915_fastboot) {
 +                              DRM_DEBUG_KMS("crtc has no fb, will flip\n");
 +                              config->fb_changed = true;
 +                      } else {
 +                              DRM_DEBUG_KMS("inactive crtc, full mode set\n");
 +                              config->mode_changed = true;
 +                      }
                } else if (set->fb == NULL) {
                        config->mode_changed = true;
                } else if (set->fb->pixel_format !=
                drm_mode_debug_printmodeline(set->mode);
                config->mode_changed = true;
        }
 +
 +      DRM_DEBUG_KMS("computed changes for [CRTC:%d], mode_changed=%d, fb_changed=%d\n",
 +                      set->crtc->base.id, config->mode_changed, config->fb_changed);
  }
  
  static int
@@@ -9073,13 -8604,14 +9075,13 @@@ intel_modeset_stage_output_state(struc
        struct drm_crtc *new_crtc;
        struct intel_connector *connector;
        struct intel_encoder *encoder;
 -      int count, ro;
 +      int ro;
  
        /* The upper layers ensure that we either disable a crtc or have a list
         * of connectors. For paranoia, double-check this. */
        WARN_ON(!set->fb && (set->num_connectors != 0));
        WARN_ON(set->fb && (set->num_connectors == 0));
  
 -      count = 0;
        list_for_each_entry(connector, &dev->mode_config.connector_list,
                            base.head) {
                /* Otherwise traverse passed in connector list and get encoders
        /* connector->new_encoder is now updated for all connectors. */
  
        /* Update crtc of enabled connectors. */
 -      count = 0;
        list_for_each_entry(connector, &dev->mode_config.connector_list,
                            base.head) {
                if (!connector->new_encoder)
@@@ -9271,32 -8804,19 +9273,32 @@@ static bool ibx_pch_dpll_get_hw_state(s
        return val & DPLL_VCO_ENABLE;
  }
  
 +static void ibx_pch_dpll_mode_set(struct drm_i915_private *dev_priv,
 +                                struct intel_shared_dpll *pll)
 +{
 +      I915_WRITE(PCH_FP0(pll->id), pll->hw_state.fp0);
 +      I915_WRITE(PCH_FP1(pll->id), pll->hw_state.fp1);
 +}
 +
  static void ibx_pch_dpll_enable(struct drm_i915_private *dev_priv,
                                struct intel_shared_dpll *pll)
  {
 -      uint32_t reg, val;
 -
        /* PCH refclock must be enabled first */
        assert_pch_refclk_enabled(dev_priv);
  
 -      reg = PCH_DPLL(pll->id);
 -      val = I915_READ(reg);
 -      val |= DPLL_VCO_ENABLE;
 -      I915_WRITE(reg, val);
 -      POSTING_READ(reg);
 +      I915_WRITE(PCH_DPLL(pll->id), pll->hw_state.dpll);
 +
 +      /* Wait for the clocks to stabilize. */
 +      POSTING_READ(PCH_DPLL(pll->id));
 +      udelay(150);
 +
 +      /* The pixel multiplier can only be updated once the
 +       * DPLL is enabled and the clocks are stable.
 +       *
 +       * So write it again.
 +       */
 +      I915_WRITE(PCH_DPLL(pll->id), pll->hw_state.dpll);
 +      POSTING_READ(PCH_DPLL(pll->id));
        udelay(200);
  }
  
@@@ -9305,6 -8825,7 +9307,6 @@@ static void ibx_pch_dpll_disable(struc
  {
        struct drm_device *dev = dev_priv->dev;
        struct intel_crtc *crtc;
 -      uint32_t reg, val;
  
        /* Make sure no transcoder isn't still depending on us. */
        list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
                        assert_pch_transcoder_disabled(dev_priv, crtc->pipe);
        }
  
 -      reg = PCH_DPLL(pll->id);
 -      val = I915_READ(reg);
 -      val &= ~DPLL_VCO_ENABLE;
 -      I915_WRITE(reg, val);
 -      POSTING_READ(reg);
 +      I915_WRITE(PCH_DPLL(pll->id), 0);
 +      POSTING_READ(PCH_DPLL(pll->id));
        udelay(200);
  }
  
@@@ -9332,7 -8856,6 +9334,7 @@@ static void ibx_pch_dpll_init(struct dr
        for (i = 0; i < dev_priv->num_shared_dpll; i++) {
                dev_priv->shared_dplls[i].id = i;
                dev_priv->shared_dplls[i].name = ibx_pch_dpll_names[i];
 +              dev_priv->shared_dplls[i].mode_set = ibx_pch_dpll_mode_set;
                dev_priv->shared_dplls[i].enable = ibx_pch_dpll_enable;
                dev_priv->shared_dplls[i].disable = ibx_pch_dpll_disable;
                dev_priv->shared_dplls[i].get_hw_state =
@@@ -9512,13 -9035,8 +9514,13 @@@ static void intel_setup_outputs(struct 
                        intel_dp_init(dev, PCH_DP_D, PORT_D);
        } else if (IS_VALLEYVIEW(dev)) {
                /* Check for built-in panel first. Shares lanes with HDMI on SDVOC */
 -              if (I915_READ(VLV_DISPLAY_BASE + DP_C) & DP_DETECTED)
 -                      intel_dp_init(dev, VLV_DISPLAY_BASE + DP_C, PORT_C);
 +              if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIC) & SDVO_DETECTED) {
 +                      intel_hdmi_init(dev, VLV_DISPLAY_BASE + GEN4_HDMIC,
 +                                      PORT_C);
 +                      if (I915_READ(VLV_DISPLAY_BASE + DP_C) & DP_DETECTED)
 +                              intel_dp_init(dev, VLV_DISPLAY_BASE + DP_C,
 +                                            PORT_C);
 +              }
  
                if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIB) & SDVO_DETECTED) {
                        intel_hdmi_init(dev, VLV_DISPLAY_BASE + GEN4_HDMIB,
        drm_helper_move_panel_connectors_to_head(dev);
  }
  
 +void intel_framebuffer_fini(struct intel_framebuffer *fb)
 +{
 +      drm_framebuffer_cleanup(&fb->base);
 +      drm_gem_object_unreference_unlocked(&fb->obj->base);
 +}
 +
  static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
  {
        struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
  
 -      drm_framebuffer_cleanup(fb);
 -      drm_gem_object_unreference_unlocked(&intel_fb->obj->base);
 -
 +      intel_framebuffer_fini(intel_fb);
        kfree(intel_fb);
  }
  
@@@ -9758,7 -9272,6 +9760,7 @@@ static void intel_init_display(struct d
                dev_priv->display.update_plane = ironlake_update_plane;
        } else if (HAS_PCH_SPLIT(dev)) {
                dev_priv->display.get_pipe_config = ironlake_get_pipe_config;
 +              dev_priv->display.get_clock = ironlake_crtc_clock_get;
                dev_priv->display.crtc_mode_set = ironlake_crtc_mode_set;
                dev_priv->display.crtc_enable = ironlake_crtc_enable;
                dev_priv->display.crtc_disable = ironlake_crtc_disable;
                dev_priv->display.update_plane = ironlake_update_plane;
        } else if (IS_VALLEYVIEW(dev)) {
                dev_priv->display.get_pipe_config = i9xx_get_pipe_config;
 +              dev_priv->display.get_clock = i9xx_crtc_clock_get;
                dev_priv->display.crtc_mode_set = i9xx_crtc_mode_set;
                dev_priv->display.crtc_enable = valleyview_crtc_enable;
                dev_priv->display.crtc_disable = i9xx_crtc_disable;
                dev_priv->display.update_plane = i9xx_update_plane;
        } else {
                dev_priv->display.get_pipe_config = i9xx_get_pipe_config;
 +              dev_priv->display.get_clock = i9xx_crtc_clock_get;
                dev_priv->display.crtc_mode_set = i9xx_crtc_mode_set;
                dev_priv->display.crtc_enable = i9xx_crtc_enable;
                dev_priv->display.crtc_disable = i9xx_crtc_disable;
        else if (IS_I915G(dev))
                dev_priv->display.get_display_clock_speed =
                        i915_get_display_clock_speed;
 -      else if (IS_I945GM(dev) || IS_845G(dev) || IS_PINEVIEW_M(dev))
 +      else if (IS_I945GM(dev) || IS_845G(dev))
                dev_priv->display.get_display_clock_speed =
                        i9xx_misc_get_display_clock_speed;
 +      else if (IS_PINEVIEW(dev))
 +              dev_priv->display.get_display_clock_speed =
 +                      pnv_get_display_clock_speed;
        else if (IS_I915GM(dev))
                dev_priv->display.get_display_clock_speed =
                        i915gm_get_display_clock_speed;
@@@ -10078,7 -9586,7 +10080,7 @@@ void intel_modeset_init(struct drm_devi
                      INTEL_INFO(dev)->num_pipes,
                      INTEL_INFO(dev)->num_pipes > 1 ? "s" : "");
  
 -      for (i = 0; i < INTEL_INFO(dev)->num_pipes; i++) {
 +      for_each_pipe(i) {
                intel_crtc_init(dev, i);
                for (j = 0; j < dev_priv->num_plane; j++) {
                        ret = intel_plane_init(dev, i, j);
@@@ -10284,17 -9792,6 +10286,17 @@@ void i915_redisable_vga(struct drm_devi
        struct drm_i915_private *dev_priv = dev->dev_private;
        u32 vga_reg = i915_vgacntrl_reg(dev);
  
 +      /* This function can be called both from intel_modeset_setup_hw_state or
 +       * at a very early point in our resume sequence, where the power well
 +       * structures are not yet restored. Since this function is at a very
 +       * paranoid "someone might have enabled VGA while we were not looking"
 +       * level, just check if the power well is enabled instead of trying to
 +       * follow the "don't touch the power well if we don't need it" policy
 +       * the rest of the driver uses. */
 +      if (HAS_POWER_WELL(dev) &&
 +          (I915_READ(HSW_PWR_WELL_DRIVER) & HSW_PWR_WELL_STATE_ENABLED) == 0)
 +              return;
 +
        if (I915_READ(vga_reg) != VGA_DISP_DISABLE) {
                DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n");
                i915_disable_vga(dev);
@@@ -10365,15 -9862,6 +10367,15 @@@ static void intel_modeset_readout_hw_st
                              pipe);
        }
  
 +      list_for_each_entry(crtc, &dev->mode_config.crtc_list,
 +                          base.head) {
 +              if (!crtc->active)
 +                      continue;
 +              if (dev_priv->display.get_clock)
 +                      dev_priv->display.get_clock(crtc,
 +                                                  &crtc->config);
 +      }
 +
        list_for_each_entry(connector, &dev->mode_config.connector_list,
                            base.head) {
                if (connector->get_hw_state(connector)) {
@@@ -10405,22 -9893,6 +10407,22 @@@ void intel_modeset_setup_hw_state(struc
  
        intel_modeset_readout_hw_state(dev);
  
 +      /*
 +       * Now that we have the config, copy it to each CRTC struct
 +       * Note that this could go away if we move to using crtc_config
 +       * checking everywhere.
 +       */
 +      list_for_each_entry(crtc, &dev->mode_config.crtc_list,
 +                          base.head) {
 +              if (crtc->active && i915_fastboot) {
 +                      intel_crtc_mode_from_pipe_config(crtc, &crtc->config);
 +
 +                      DRM_DEBUG_KMS("[CRTC:%d] found active mode: ",
 +                                    crtc->base.base.id);
 +                      drm_mode_debug_printmodeline(&crtc->base.mode);
 +              }
 +      }
 +
        /* HW state is read out, now we need to sanitize this mess. */
        list_for_each_entry(encoder, &dev->mode_config.encoder_list,
                            base.head) {
@@@ -10483,6 -9955,7 +10485,6 @@@ void intel_modeset_cleanup(struct drm_d
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_crtc *crtc;
 -      struct intel_crtc *intel_crtc;
  
        /*
         * Interrupts and polling as the first thing to avoid creating havoc.
                if (!crtc->fb)
                        continue;
  
 -              intel_crtc = to_intel_crtc(crtc);
                intel_increase_pllclock(crtc);
        }
  
@@@ -10561,10 -10035,15 +10563,12 @@@ int intel_modeset_vga_set_state(struct 
        return 0;
  }
  
 -#ifdef CONFIG_DEBUG_FS
 -#include <linux/seq_file.h>
 -
  struct intel_display_error_state {
  
        u32 power_well_driver;
  
+       int num_transcoders;
        struct intel_cursor_error_state {
                u32 control;
                u32 position;
        } cursor[I915_MAX_PIPES];
  
        struct intel_pipe_error_state {
-               enum transcoder cpu_transcoder;
-               u32 conf;
                u32 source;
-               u32 htotal;
-               u32 hblank;
-               u32 hsync;
-               u32 vtotal;
-               u32 vblank;
-               u32 vsync;
        } pipe[I915_MAX_PIPES];
  
        struct intel_plane_error_state {
                u32 surface;
                u32 tile_offset;
        } plane[I915_MAX_PIPES];
+       struct intel_transcoder_error_state {
+               enum transcoder cpu_transcoder;
+               u32 conf;
+               u32 htotal;
+               u32 hblank;
+               u32 hsync;
+               u32 vtotal;
+               u32 vblank;
+               u32 vsync;
+       } transcoder[4];
  };
  
  struct intel_display_error_state *
@@@ -10601,9 -10084,17 +10609,17 @@@ intel_display_capture_error_state(struc
  {
        drm_i915_private_t *dev_priv = dev->dev_private;
        struct intel_display_error_state *error;
-       enum transcoder cpu_transcoder;
+       int transcoders[] = {
+               TRANSCODER_A,
+               TRANSCODER_B,
+               TRANSCODER_C,
+               TRANSCODER_EDP,
+       };
        int i;
  
+       if (INTEL_INFO(dev)->num_pipes == 0)
+               return NULL;
        error = kmalloc(sizeof(*error), GFP_ATOMIC);
        if (error == NULL)
                return NULL;
                error->power_well_driver = I915_READ(HSW_PWR_WELL_DRIVER);
  
        for_each_pipe(i) {
-               cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, i);
-               error->pipe[i].cpu_transcoder = cpu_transcoder;
                if (INTEL_INFO(dev)->gen <= 6 || IS_VALLEYVIEW(dev)) {
                        error->cursor[i].control = I915_READ(CURCNTR(i));
                        error->cursor[i].position = I915_READ(CURPOS(i));
                        error->plane[i].tile_offset = I915_READ(DSPTILEOFF(i));
                }
  
-               error->pipe[i].conf = I915_READ(PIPECONF(cpu_transcoder));
                error->pipe[i].source = I915_READ(PIPESRC(i));
-               error->pipe[i].htotal = I915_READ(HTOTAL(cpu_transcoder));
-               error->pipe[i].hblank = I915_READ(HBLANK(cpu_transcoder));
-               error->pipe[i].hsync = I915_READ(HSYNC(cpu_transcoder));
-               error->pipe[i].vtotal = I915_READ(VTOTAL(cpu_transcoder));
-               error->pipe[i].vblank = I915_READ(VBLANK(cpu_transcoder));
-               error->pipe[i].vsync = I915_READ(VSYNC(cpu_transcoder));
+       }
+       error->num_transcoders = INTEL_INFO(dev)->num_pipes;
+       if (HAS_DDI(dev_priv->dev))
+               error->num_transcoders++; /* Account for eDP. */
+       for (i = 0; i < error->num_transcoders; i++) {
+               enum transcoder cpu_transcoder = transcoders[i];
+               error->transcoder[i].cpu_transcoder = cpu_transcoder;
+               error->transcoder[i].conf = I915_READ(PIPECONF(cpu_transcoder));
+               error->transcoder[i].htotal = I915_READ(HTOTAL(cpu_transcoder));
+               error->transcoder[i].hblank = I915_READ(HBLANK(cpu_transcoder));
+               error->transcoder[i].hsync = I915_READ(HSYNC(cpu_transcoder));
+               error->transcoder[i].vtotal = I915_READ(VTOTAL(cpu_transcoder));
+               error->transcoder[i].vblank = I915_READ(VBLANK(cpu_transcoder));
+               error->transcoder[i].vsync = I915_READ(VSYNC(cpu_transcoder));
        }
  
        /* In the code above we read the registers without checking if the power
         * well was on, so here we have to clear the FPGA_DBG_RM_NOCLAIM bit to
         * prevent the next I915_WRITE from detecting it and printing an error
         * message. */
 -      if (HAS_POWER_WELL(dev))
 -              I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
 +      intel_uncore_clear_errors(dev);
  
        return error;
  }
@@@ -10666,22 -10166,16 +10690,16 @@@ intel_display_print_error_state(struct 
  {
        int i;
  
+       if (!error)
+               return;
        err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev)->num_pipes);
        if (HAS_POWER_WELL(dev))
                err_printf(m, "PWR_WELL_CTL2: %08x\n",
                           error->power_well_driver);
        for_each_pipe(i) {
                err_printf(m, "Pipe [%d]:\n", i);
-               err_printf(m, "  CPU transcoder: %c\n",
-                          transcoder_name(error->pipe[i].cpu_transcoder));
-               err_printf(m, "  CONF: %08x\n", error->pipe[i].conf);
                err_printf(m, "  SRC: %08x\n", error->pipe[i].source);
-               err_printf(m, "  HTOTAL: %08x\n", error->pipe[i].htotal);
-               err_printf(m, "  HBLANK: %08x\n", error->pipe[i].hblank);
-               err_printf(m, "  HSYNC: %08x\n", error->pipe[i].hsync);
-               err_printf(m, "  VTOTAL: %08x\n", error->pipe[i].vtotal);
-               err_printf(m, "  VBLANK: %08x\n", error->pipe[i].vblank);
-               err_printf(m, "  VSYNC: %08x\n", error->pipe[i].vsync);
  
                err_printf(m, "Plane [%d]:\n", i);
                err_printf(m, "  CNTR: %08x\n", error->plane[i].control);
                err_printf(m, "  POS: %08x\n", error->cursor[i].position);
                err_printf(m, "  BASE: %08x\n", error->cursor[i].base);
        }
+       for (i = 0; i < error->num_transcoders; i++) {
+               err_printf(m, "  CPU transcoder: %c\n",
+                          transcoder_name(error->transcoder[i].cpu_transcoder));
+               err_printf(m, "  CONF: %08x\n", error->transcoder[i].conf);
+               err_printf(m, "  HTOTAL: %08x\n", error->transcoder[i].htotal);
+               err_printf(m, "  HBLANK: %08x\n", error->transcoder[i].hblank);
+               err_printf(m, "  HSYNC: %08x\n", error->transcoder[i].hsync);
+               err_printf(m, "  VTOTAL: %08x\n", error->transcoder[i].vtotal);
+               err_printf(m, "  VBLANK: %08x\n", error->transcoder[i].vblank);
+               err_printf(m, "  VSYNC: %08x\n", error->transcoder[i].vsync);
+       }
  }
 -#endif
index 01b5a519c43c0bc2dbb59a1a5100259e653fcaa6,5950888ae1d00bd7cf70ebb18d5595ee0ee8446e..a43c33bc4a3582ece3758ae7286b87fb2a8256ec
@@@ -194,6 -194,9 +194,6 @@@ void intel_gmch_panel_fitting(struct in
            adjusted_mode->vdisplay == mode->vdisplay)
                goto out;
  
 -      drm_mode_set_crtcinfo(adjusted_mode, 0);
 -      pipe_config->timings_set = true;
 -
        switch (fitting_mode) {
        case DRM_MODE_SCALE_CENTER:
                /*
@@@ -494,8 -497,11 +494,11 @@@ void intel_panel_set_backlight(struct d
                goto out;
        }
  
-       /* scale to hardware */
-       level = level * freq / max;
+       /* scale to hardware, but be careful to not overflow */
+       if (freq < max)
+               level = level * freq / max;
+       else
+               level = freq / max * level;
  
        dev_priv->backlight.level = level;
        if (dev_priv->backlight.device)
@@@ -512,6 -518,17 +515,17 @@@ void intel_panel_disable_backlight(stru
        struct drm_i915_private *dev_priv = dev->dev_private;
        unsigned long flags;
  
+       /*
+        * Do not disable backlight on the vgaswitcheroo path. When switching
+        * away from i915, the other client may depend on i915 to handle the
+        * backlight. This will leave the backlight on unnecessarily when
+        * another client is not activated.
+        */
+       if (dev->switch_power_state == DRM_SWITCH_POWER_CHANGING) {
+               DRM_DEBUG_DRIVER("Skipping backlight disable on vga switch\n");
+               return;
+       }
        spin_lock_irqsave(&dev_priv->backlight.lock, flags);
  
        dev_priv->backlight.enabled = false;
index 0150ba598bf08ff1ca5ceea4ce4c8445961a094f,b0e4a0bd1313c0dafac84df1b6af56e7ccced950..46056820d1d2200db076c2e5f3291fdde1068915
@@@ -30,7 -30,8 +30,7 @@@
  #include "intel_drv.h"
  #include "../../../platform/x86/intel_ips.h"
  #include <linux/module.h>
 -
 -#define FORCEWAKE_ACK_TIMEOUT_MS 2
 +#include <drm/i915_powerwell.h>
  
  /* FBC, or Frame Buffer Compression, is a technique employed to compress the
   * framebuffer contents in-memory, aiming at reducing the required bandwidth
@@@ -85,7 -86,7 +85,7 @@@ static void i8xx_enable_fbc(struct drm_
        int plane, i;
        u32 fbc_ctl, fbc_ctl2;
  
 -      cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
 +      cfb_pitch = dev_priv->fbc.size / FBC_LL_SIZE;
        if (fb->pitches[0] < cfb_pitch)
                cfb_pitch = fb->pitches[0];
  
@@@ -216,7 -217,7 +216,7 @@@ static void ironlake_enable_fbc(struct 
                   (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
                   (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
        I915_WRITE(ILK_DPFC_FENCE_YOFF, crtc->y);
 -      I915_WRITE(ILK_FBC_RT_BASE, obj->gtt_offset | ILK_FBC_RT_VALID);
 +      I915_WRITE(ILK_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj) | ILK_FBC_RT_VALID);
        /* enable it... */
        I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
  
@@@ -273,7 -274,7 +273,7 @@@ static void gen7_enable_fbc(struct drm_
        struct drm_i915_gem_object *obj = intel_fb->obj;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
  
 -      I915_WRITE(IVB_FBC_RT_BASE, obj->gtt_offset);
 +      I915_WRITE(IVB_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj));
  
        I915_WRITE(ILK_DPFC_CONTROL, DPFC_CTL_EN | DPFC_CTL_LIMIT_1X |
                   IVB_DPFC_CTL_FENCE_EN |
@@@ -324,7 -325,7 +324,7 @@@ static void intel_fbc_work_fn(struct wo
        struct drm_i915_private *dev_priv = dev->dev_private;
  
        mutex_lock(&dev->struct_mutex);
 -      if (work == dev_priv->fbc_work) {
 +      if (work == dev_priv->fbc.fbc_work) {
                /* Double check that we haven't switched fb without cancelling
                 * the prior work.
                 */
                        dev_priv->display.enable_fbc(work->crtc,
                                                     work->interval);
  
 -                      dev_priv->cfb_plane = to_intel_crtc(work->crtc)->plane;
 -                      dev_priv->cfb_fb = work->crtc->fb->base.id;
 -                      dev_priv->cfb_y = work->crtc->y;
 +                      dev_priv->fbc.plane = to_intel_crtc(work->crtc)->plane;
 +                      dev_priv->fbc.fb_id = work->crtc->fb->base.id;
 +                      dev_priv->fbc.y = work->crtc->y;
                }
  
 -              dev_priv->fbc_work = NULL;
 +              dev_priv->fbc.fbc_work = NULL;
        }
        mutex_unlock(&dev->struct_mutex);
  
  
  static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv)
  {
 -      if (dev_priv->fbc_work == NULL)
 +      if (dev_priv->fbc.fbc_work == NULL)
                return;
  
        DRM_DEBUG_KMS("cancelling pending FBC enable\n");
  
        /* Synchronisation is provided by struct_mutex and checking of
 -       * dev_priv->fbc_work, so we can perform the cancellation
 +       * dev_priv->fbc.fbc_work, so we can perform the cancellation
         * entirely asynchronously.
         */
 -      if (cancel_delayed_work(&dev_priv->fbc_work->work))
 +      if (cancel_delayed_work(&dev_priv->fbc.fbc_work->work))
                /* tasklet was killed before being run, clean up */
 -              kfree(dev_priv->fbc_work);
 +              kfree(dev_priv->fbc.fbc_work);
  
        /* Mark the work as no longer wanted so that if it does
         * wake-up (because the work was already running and waiting
         * for our mutex), it will discover that is no longer
         * necessary to run.
         */
 -      dev_priv->fbc_work = NULL;
 +      dev_priv->fbc.fbc_work = NULL;
  }
  
 -void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
 +static void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
  {
        struct intel_fbc_work *work;
        struct drm_device *dev = crtc->dev;
  
        work = kzalloc(sizeof *work, GFP_KERNEL);
        if (work == NULL) {
 +              DRM_ERROR("Failed to allocate FBC work structure\n");
                dev_priv->display.enable_fbc(crtc, interval);
                return;
        }
        work->interval = interval;
        INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn);
  
 -      dev_priv->fbc_work = work;
 -
 -      DRM_DEBUG_KMS("scheduling delayed FBC enable\n");
 +      dev_priv->fbc.fbc_work = work;
  
        /* Delay the actual enabling to let pageflipping cease and the
         * display to settle before starting the compression. Note that
         * following the termination of the page-flipping sequence
         * and indeed performing the enable as a co-routine and not
         * waiting synchronously upon the vblank.
 +       *
 +       * WaFbcWaitForVBlankBeforeEnable:ilk,snb
         */
        schedule_delayed_work(&work->work, msecs_to_jiffies(50));
  }
@@@ -418,17 -418,7 +418,17 @@@ void intel_disable_fbc(struct drm_devic
                return;
  
        dev_priv->display.disable_fbc(dev);
 -      dev_priv->cfb_plane = -1;
 +      dev_priv->fbc.plane = -1;
 +}
 +
 +static bool set_no_fbc_reason(struct drm_i915_private *dev_priv,
 +                            enum no_fbc_reason reason)
 +{
 +      if (dev_priv->fbc.no_fbc_reason == reason)
 +              return false;
 +
 +      dev_priv->fbc.no_fbc_reason = reason;
 +      return true;
  }
  
  /**
@@@ -458,18 -448,14 +458,18 @@@ void intel_update_fbc(struct drm_devic
        struct drm_framebuffer *fb;
        struct intel_framebuffer *intel_fb;
        struct drm_i915_gem_object *obj;
 -      int enable_fbc;
        unsigned int max_hdisplay, max_vdisplay;
  
 -      if (!i915_powersave)
 +      if (!I915_HAS_FBC(dev)) {
 +              set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED);
                return;
 +      }
  
 -      if (!I915_HAS_FBC(dev))
 +      if (!i915_powersave) {
 +              if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
 +                      DRM_DEBUG_KMS("fbc disabled per module param\n");
                return;
 +      }
  
        /*
         * If FBC is already on, we just have to verify that we can
                if (intel_crtc_active(tmp_crtc) &&
                    !to_intel_crtc(tmp_crtc)->primary_disabled) {
                        if (crtc) {
 -                              DRM_DEBUG_KMS("more than one pipe active, disabling compression\n");
 -                              dev_priv->no_fbc_reason = FBC_MULTIPLE_PIPES;
 +                              if (set_no_fbc_reason(dev_priv, FBC_MULTIPLE_PIPES))
 +                                      DRM_DEBUG_KMS("more than one pipe active, disabling compression\n");
                                goto out_disable;
                        }
                        crtc = tmp_crtc;
        }
  
        if (!crtc || crtc->fb == NULL) {
 -              DRM_DEBUG_KMS("no output, disabling\n");
 -              dev_priv->no_fbc_reason = FBC_NO_OUTPUT;
 +              if (set_no_fbc_reason(dev_priv, FBC_NO_OUTPUT))
 +                      DRM_DEBUG_KMS("no output, disabling\n");
                goto out_disable;
        }
  
        intel_fb = to_intel_framebuffer(fb);
        obj = intel_fb->obj;
  
 -      enable_fbc = i915_enable_fbc;
 -      if (enable_fbc < 0) {
 -              DRM_DEBUG_KMS("fbc set to per-chip default\n");
 -              enable_fbc = 1;
 -              if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
 -                      enable_fbc = 0;
 +      if (i915_enable_fbc < 0 &&
 +          INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev)) {
 +              if (set_no_fbc_reason(dev_priv, FBC_CHIP_DEFAULT))
 +                      DRM_DEBUG_KMS("disabled per chip default\n");
 +              goto out_disable;
        }
 -      if (!enable_fbc) {
 -              DRM_DEBUG_KMS("fbc disabled per module param\n");
 -              dev_priv->no_fbc_reason = FBC_MODULE_PARAM;
 +      if (!i915_enable_fbc) {
 +              if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
 +                      DRM_DEBUG_KMS("fbc disabled per module param\n");
                goto out_disable;
        }
        if ((crtc->mode.flags & DRM_MODE_FLAG_INTERLACE) ||
            (crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN)) {
 -              DRM_DEBUG_KMS("mode incompatible with compression, "
 -                            "disabling\n");
 -              dev_priv->no_fbc_reason = FBC_UNSUPPORTED_MODE;
 +              if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE))
 +                      DRM_DEBUG_KMS("mode incompatible with compression, "
 +                                    "disabling\n");
                goto out_disable;
        }
  
        }
        if ((crtc->mode.hdisplay > max_hdisplay) ||
            (crtc->mode.vdisplay > max_vdisplay)) {
 -              DRM_DEBUG_KMS("mode too large for compression, disabling\n");
 -              dev_priv->no_fbc_reason = FBC_MODE_TOO_LARGE;
 +              if (set_no_fbc_reason(dev_priv, FBC_MODE_TOO_LARGE))
 +                      DRM_DEBUG_KMS("mode too large for compression, disabling\n");
                goto out_disable;
        }
        if ((IS_I915GM(dev) || IS_I945GM(dev) || IS_HASWELL(dev)) &&
            intel_crtc->plane != 0) {
 -              DRM_DEBUG_KMS("plane not 0, disabling compression\n");
 -              dev_priv->no_fbc_reason = FBC_BAD_PLANE;
 +              if (set_no_fbc_reason(dev_priv, FBC_BAD_PLANE))
 +                      DRM_DEBUG_KMS("plane not 0, disabling compression\n");
                goto out_disable;
        }
  
         */
        if (obj->tiling_mode != I915_TILING_X ||
            obj->fence_reg == I915_FENCE_REG_NONE) {
 -              DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
 -              dev_priv->no_fbc_reason = FBC_NOT_TILED;
 +              if (set_no_fbc_reason(dev_priv, FBC_NOT_TILED))
 +                      DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
                goto out_disable;
        }
  
                goto out_disable;
  
        if (i915_gem_stolen_setup_compression(dev, intel_fb->obj->base.size)) {
 -              DRM_DEBUG_KMS("framebuffer too large, disabling compression\n");
 -              dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
 +              if (set_no_fbc_reason(dev_priv, FBC_STOLEN_TOO_SMALL))
 +                      DRM_DEBUG_KMS("framebuffer too large, disabling compression\n");
                goto out_disable;
        }
  
         * cannot be unpinned (and have its GTT offset and fence revoked)
         * without first being decoupled from the scanout and FBC disabled.
         */
 -      if (dev_priv->cfb_plane == intel_crtc->plane &&
 -          dev_priv->cfb_fb == fb->base.id &&
 -          dev_priv->cfb_y == crtc->y)
 +      if (dev_priv->fbc.plane == intel_crtc->plane &&
 +          dev_priv->fbc.fb_id == fb->base.id &&
 +          dev_priv->fbc.y == crtc->y)
                return;
  
        if (intel_fbc_enabled(dev)) {
        }
  
        intel_enable_fbc(crtc, 500);
 +      dev_priv->fbc.no_fbc_reason = FBC_OK;
        return;
  
  out_disable:
@@@ -1680,6 -1666,9 +1680,6 @@@ static void i830_update_wm(struct drm_d
        I915_WRITE(FW_BLC, fwater_lo);
  }
  
 -#define ILK_LP0_PLANE_LATENCY         700
 -#define ILK_LP0_CURSOR_LATENCY                1300
 -
  /*
   * Check the wm result.
   *
@@@ -1794,9 -1783,9 +1794,9 @@@ static void ironlake_update_wm(struct d
        enabled = 0;
        if (g4x_compute_wm0(dev, PIPE_A,
                            &ironlake_display_wm_info,
 -                          ILK_LP0_PLANE_LATENCY,
 +                          dev_priv->wm.pri_latency[0] * 100,
                            &ironlake_cursor_wm_info,
 -                          ILK_LP0_CURSOR_LATENCY,
 +                          dev_priv->wm.cur_latency[0] * 100,
                            &plane_wm, &cursor_wm)) {
                I915_WRITE(WM0_PIPEA_ILK,
                           (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
  
        if (g4x_compute_wm0(dev, PIPE_B,
                            &ironlake_display_wm_info,
 -                          ILK_LP0_PLANE_LATENCY,
 +                          dev_priv->wm.pri_latency[0] * 100,
                            &ironlake_cursor_wm_info,
 -                          ILK_LP0_CURSOR_LATENCY,
 +                          dev_priv->wm.cur_latency[0] * 100,
                            &plane_wm, &cursor_wm)) {
                I915_WRITE(WM0_PIPEB_ILK,
                           (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
  
        /* WM1 */
        if (!ironlake_compute_srwm(dev, 1, enabled,
 -                                 ILK_READ_WM1_LATENCY() * 500,
 +                                 dev_priv->wm.pri_latency[1] * 500,
                                   &ironlake_display_srwm_info,
                                   &ironlake_cursor_srwm_info,
                                   &fbc_wm, &plane_wm, &cursor_wm))
  
        I915_WRITE(WM1_LP_ILK,
                   WM1_LP_SR_EN |
 -                 (ILK_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
 +                 (dev_priv->wm.pri_latency[1] << WM1_LP_LATENCY_SHIFT) |
                   (fbc_wm << WM1_LP_FBC_SHIFT) |
                   (plane_wm << WM1_LP_SR_SHIFT) |
                   cursor_wm);
  
        /* WM2 */
        if (!ironlake_compute_srwm(dev, 2, enabled,
 -                                 ILK_READ_WM2_LATENCY() * 500,
 +                                 dev_priv->wm.pri_latency[2] * 500,
                                   &ironlake_display_srwm_info,
                                   &ironlake_cursor_srwm_info,
                                   &fbc_wm, &plane_wm, &cursor_wm))
  
        I915_WRITE(WM2_LP_ILK,
                   WM2_LP_EN |
 -                 (ILK_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
 +                 (dev_priv->wm.pri_latency[2] << WM1_LP_LATENCY_SHIFT) |
                   (fbc_wm << WM1_LP_FBC_SHIFT) |
                   (plane_wm << WM1_LP_SR_SHIFT) |
                   cursor_wm);
  static void sandybridge_update_wm(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
 -      int latency = SNB_READ_WM0_LATENCY() * 100;     /* In unit 0.1us */
 +      int latency = dev_priv->wm.pri_latency[0] * 100;        /* In unit 0.1us */
        u32 val;
        int fbc_wm, plane_wm, cursor_wm;
        unsigned int enabled;
  
        /* WM1 */
        if (!ironlake_compute_srwm(dev, 1, enabled,
 -                                 SNB_READ_WM1_LATENCY() * 500,
 +                                 dev_priv->wm.pri_latency[1] * 500,
                                   &sandybridge_display_srwm_info,
                                   &sandybridge_cursor_srwm_info,
                                   &fbc_wm, &plane_wm, &cursor_wm))
  
        I915_WRITE(WM1_LP_ILK,
                   WM1_LP_SR_EN |
 -                 (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
 +                 (dev_priv->wm.pri_latency[1] << WM1_LP_LATENCY_SHIFT) |
                   (fbc_wm << WM1_LP_FBC_SHIFT) |
                   (plane_wm << WM1_LP_SR_SHIFT) |
                   cursor_wm);
  
        /* WM2 */
        if (!ironlake_compute_srwm(dev, 2, enabled,
 -                                 SNB_READ_WM2_LATENCY() * 500,
 +                                 dev_priv->wm.pri_latency[2] * 500,
                                   &sandybridge_display_srwm_info,
                                   &sandybridge_cursor_srwm_info,
                                   &fbc_wm, &plane_wm, &cursor_wm))
  
        I915_WRITE(WM2_LP_ILK,
                   WM2_LP_EN |
 -                 (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
 +                 (dev_priv->wm.pri_latency[2] << WM1_LP_LATENCY_SHIFT) |
                   (fbc_wm << WM1_LP_FBC_SHIFT) |
                   (plane_wm << WM1_LP_SR_SHIFT) |
                   cursor_wm);
  
        /* WM3 */
        if (!ironlake_compute_srwm(dev, 3, enabled,
 -                                 SNB_READ_WM3_LATENCY() * 500,
 +                                 dev_priv->wm.pri_latency[3] * 500,
                                   &sandybridge_display_srwm_info,
                                   &sandybridge_cursor_srwm_info,
                                   &fbc_wm, &plane_wm, &cursor_wm))
  
        I915_WRITE(WM3_LP_ILK,
                   WM3_LP_EN |
 -                 (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) |
 +                 (dev_priv->wm.pri_latency[3] << WM1_LP_LATENCY_SHIFT) |
                   (fbc_wm << WM1_LP_FBC_SHIFT) |
                   (plane_wm << WM1_LP_SR_SHIFT) |
                   cursor_wm);
  static void ivybridge_update_wm(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
 -      int latency = SNB_READ_WM0_LATENCY() * 100;     /* In unit 0.1us */
 +      int latency = dev_priv->wm.pri_latency[0] * 100;        /* In unit 0.1us */
        u32 val;
        int fbc_wm, plane_wm, cursor_wm;
        int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm;
  
        /* WM1 */
        if (!ironlake_compute_srwm(dev, 1, enabled,
 -                                 SNB_READ_WM1_LATENCY() * 500,
 +                                 dev_priv->wm.pri_latency[1] * 500,
                                   &sandybridge_display_srwm_info,
                                   &sandybridge_cursor_srwm_info,
                                   &fbc_wm, &plane_wm, &cursor_wm))
  
        I915_WRITE(WM1_LP_ILK,
                   WM1_LP_SR_EN |
 -                 (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
 +                 (dev_priv->wm.pri_latency[1] << WM1_LP_LATENCY_SHIFT) |
                   (fbc_wm << WM1_LP_FBC_SHIFT) |
                   (plane_wm << WM1_LP_SR_SHIFT) |
                   cursor_wm);
  
        /* WM2 */
        if (!ironlake_compute_srwm(dev, 2, enabled,
 -                                 SNB_READ_WM2_LATENCY() * 500,
 +                                 dev_priv->wm.pri_latency[2] * 500,
                                   &sandybridge_display_srwm_info,
                                   &sandybridge_cursor_srwm_info,
                                   &fbc_wm, &plane_wm, &cursor_wm))
  
        I915_WRITE(WM2_LP_ILK,
                   WM2_LP_EN |
 -                 (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
 +                 (dev_priv->wm.pri_latency[2] << WM1_LP_LATENCY_SHIFT) |
                   (fbc_wm << WM1_LP_FBC_SHIFT) |
                   (plane_wm << WM1_LP_SR_SHIFT) |
                   cursor_wm);
  
        /* WM3, note we have to correct the cursor latency */
        if (!ironlake_compute_srwm(dev, 3, enabled,
 -                                 SNB_READ_WM3_LATENCY() * 500,
 +                                 dev_priv->wm.pri_latency[3] * 500,
                                   &sandybridge_display_srwm_info,
                                   &sandybridge_cursor_srwm_info,
                                   &fbc_wm, &plane_wm, &ignore_cursor_wm) ||
            !ironlake_compute_srwm(dev, 3, enabled,
 -                                 2 * SNB_READ_WM3_LATENCY() * 500,
 +                                 dev_priv->wm.cur_latency[3] * 500,
                                   &sandybridge_display_srwm_info,
                                   &sandybridge_cursor_srwm_info,
                                   &ignore_fbc_wm, &ignore_plane_wm, &cursor_wm))
  
        I915_WRITE(WM3_LP_ILK,
                   WM3_LP_EN |
 -                 (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) |
 +                 (dev_priv->wm.pri_latency[3] << WM1_LP_LATENCY_SHIFT) |
                   (fbc_wm << WM1_LP_FBC_SHIFT) |
                   (plane_wm << WM1_LP_SR_SHIFT) |
                   cursor_wm);
  }
  
 -static uint32_t hsw_wm_get_pixel_rate(struct drm_device *dev,
 -                                    struct drm_crtc *crtc)
 +static uint32_t ilk_pipe_pixel_rate(struct drm_device *dev,
 +                                  struct drm_crtc *crtc)
  {
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        uint32_t pixel_rate, pfit_size;
        return pixel_rate;
  }
  
 -static uint32_t hsw_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
 +/* latency must be in 0.1us units. */
 +static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
                               uint32_t latency)
  {
        uint64_t ret;
  
 +      if (WARN(latency == 0, "Latency value missing\n"))
 +              return UINT_MAX;
 +
        ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
        ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
  
        return ret;
  }
  
 -static uint32_t hsw_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
 +/* latency must be in 0.1us units. */
 +static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
                               uint32_t horiz_pixels, uint8_t bytes_per_pixel,
                               uint32_t latency)
  {
        uint32_t ret;
  
 +      if (WARN(latency == 0, "Latency value missing\n"))
 +              return UINT_MAX;
 +
        ret = (latency * pixel_rate) / (pipe_htotal * 10000);
        ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
        ret = DIV_ROUND_UP(ret, 64) + 2;
        return ret;
  }
  
 -static uint32_t hsw_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
 +static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
                           uint8_t bytes_per_pixel)
  {
        return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
  
  struct hsw_pipe_wm_parameters {
        bool active;
 -      bool sprite_enabled;
 -      uint8_t pri_bytes_per_pixel;
 -      uint8_t spr_bytes_per_pixel;
 -      uint8_t cur_bytes_per_pixel;
 -      uint32_t pri_horiz_pixels;
 -      uint32_t spr_horiz_pixels;
 -      uint32_t cur_horiz_pixels;
        uint32_t pipe_htotal;
        uint32_t pixel_rate;
 +      struct intel_plane_wm_parameters pri;
 +      struct intel_plane_wm_parameters spr;
 +      struct intel_plane_wm_parameters cur;
  };
  
  struct hsw_wm_maximums {
        uint16_t fbc;
  };
  
 -struct hsw_lp_wm_result {
 -      bool enable;
 -      bool fbc_enable;
 -      uint32_t pri_val;
 -      uint32_t spr_val;
 -      uint32_t cur_val;
 -      uint32_t fbc_val;
 -};
 -
  struct hsw_wm_values {
        uint32_t wm_pipe[3];
        uint32_t wm_lp[3];
        bool enable_fbc_wm;
  };
  
 -enum hsw_data_buf_partitioning {
 -      HSW_DATA_BUF_PART_1_2,
 -      HSW_DATA_BUF_PART_5_6,
 +/* used in computing the new watermarks state */
 +struct intel_wm_config {
 +      unsigned int num_pipes_active;
 +      bool sprites_enabled;
 +      bool sprites_scaled;
 +      bool fbc_wm_enabled;
  };
  
 -/* For both WM_PIPE and WM_LP. */
 -static uint32_t hsw_compute_pri_wm(struct hsw_pipe_wm_parameters *params,
 +/*
 + * For both WM_PIPE and WM_LP.
 + * mem_value must be in 0.1us units.
 + */
 +static uint32_t ilk_compute_pri_wm(struct hsw_pipe_wm_parameters *params,
                                   uint32_t mem_value,
                                   bool is_lp)
  {
        uint32_t method1, method2;
  
 -      /* TODO: for now, assume the primary plane is always enabled. */
 -      if (!params->active)
 +      if (!params->active || !params->pri.enabled)
                return 0;
  
 -      method1 = hsw_wm_method1(params->pixel_rate,
 -                               params->pri_bytes_per_pixel,
 +      method1 = ilk_wm_method1(params->pixel_rate,
 +                               params->pri.bytes_per_pixel,
                                 mem_value);
  
        if (!is_lp)
                return method1;
  
 -      method2 = hsw_wm_method2(params->pixel_rate,
 +      method2 = ilk_wm_method2(params->pixel_rate,
                                 params->pipe_htotal,
 -                               params->pri_horiz_pixels,
 -                               params->pri_bytes_per_pixel,
 +                               params->pri.horiz_pixels,
 +                               params->pri.bytes_per_pixel,
                                 mem_value);
  
        return min(method1, method2);
  }
  
 -/* For both WM_PIPE and WM_LP. */
 -static uint32_t hsw_compute_spr_wm(struct hsw_pipe_wm_parameters *params,
 +/*
 + * For both WM_PIPE and WM_LP.
 + * mem_value must be in 0.1us units.
 + */
 +static uint32_t ilk_compute_spr_wm(struct hsw_pipe_wm_parameters *params,
                                   uint32_t mem_value)
  {
        uint32_t method1, method2;
  
 -      if (!params->active || !params->sprite_enabled)
 +      if (!params->active || !params->spr.enabled)
                return 0;
  
 -      method1 = hsw_wm_method1(params->pixel_rate,
 -                               params->spr_bytes_per_pixel,
 +      method1 = ilk_wm_method1(params->pixel_rate,
 +                               params->spr.bytes_per_pixel,
                                 mem_value);
 -      method2 = hsw_wm_method2(params->pixel_rate,
 +      method2 = ilk_wm_method2(params->pixel_rate,
                                 params->pipe_htotal,
 -                               params->spr_horiz_pixels,
 -                               params->spr_bytes_per_pixel,
 +                               params->spr.horiz_pixels,
 +                               params->spr.bytes_per_pixel,
                                 mem_value);
        return min(method1, method2);
  }
  
 -/* For both WM_PIPE and WM_LP. */
 -static uint32_t hsw_compute_cur_wm(struct hsw_pipe_wm_parameters *params,
 +/*
 + * For both WM_PIPE and WM_LP.
 + * mem_value must be in 0.1us units.
 + */
 +static uint32_t ilk_compute_cur_wm(struct hsw_pipe_wm_parameters *params,
                                   uint32_t mem_value)
  {
 -      if (!params->active)
 +      if (!params->active || !params->cur.enabled)
                return 0;
  
 -      return hsw_wm_method2(params->pixel_rate,
 +      return ilk_wm_method2(params->pixel_rate,
                              params->pipe_htotal,
 -                            params->cur_horiz_pixels,
 -                            params->cur_bytes_per_pixel,
 +                            params->cur.horiz_pixels,
 +                            params->cur.bytes_per_pixel,
                              mem_value);
  }
  
  /* Only for WM_LP. */
 -static uint32_t hsw_compute_fbc_wm(struct hsw_pipe_wm_parameters *params,
 -                                 uint32_t pri_val,
 -                                 uint32_t mem_value)
 +static uint32_t ilk_compute_fbc_wm(struct hsw_pipe_wm_parameters *params,
 +                                 uint32_t pri_val)
  {
 -      if (!params->active)
 +      if (!params->active || !params->pri.enabled)
                return 0;
  
 -      return hsw_wm_fbc(pri_val,
 -                        params->pri_horiz_pixels,
 -                        params->pri_bytes_per_pixel);
 +      return ilk_wm_fbc(pri_val,
 +                        params->pri.horiz_pixels,
 +                        params->pri.bytes_per_pixel);
  }
  
 -static bool hsw_compute_lp_wm(uint32_t mem_value, struct hsw_wm_maximums *max,
 -                            struct hsw_pipe_wm_parameters *params,
 -                            struct hsw_lp_wm_result *result)
 +static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
  {
 -      enum pipe pipe;
 -      uint32_t pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
 +      if (INTEL_INFO(dev)->gen >= 7)
 +              return 768;
 +      else
 +              return 512;
 +}
  
 -      for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
 -              struct hsw_pipe_wm_parameters *p = &params[pipe];
 +/* Calculate the maximum primary/sprite plane watermark */
 +static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
 +                                   int level,
 +                                   const struct intel_wm_config *config,
 +                                   enum intel_ddb_partitioning ddb_partitioning,
 +                                   bool is_sprite)
 +{
 +      unsigned int fifo_size = ilk_display_fifo_size(dev);
 +      unsigned int max;
  
 -              pri_val[pipe] = hsw_compute_pri_wm(p, mem_value, true);
 -              spr_val[pipe] = hsw_compute_spr_wm(p, mem_value);
 -              cur_val[pipe] = hsw_compute_cur_wm(p, mem_value);
 -              fbc_val[pipe] = hsw_compute_fbc_wm(p, pri_val[pipe], mem_value);
 -      }
 +      /* if sprites aren't enabled, sprites get nothing */
 +      if (is_sprite && !config->sprites_enabled)
 +              return 0;
  
 -      result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
 -      result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
 -      result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
 -      result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
 +      /* HSW allows LP1+ watermarks even with multiple pipes */
 +      if (level == 0 || config->num_pipes_active > 1) {
 +              fifo_size /= INTEL_INFO(dev)->num_pipes;
  
 -      if (result->fbc_val > max->fbc) {
 -              result->fbc_enable = false;
 -              result->fbc_val = 0;
 -      } else {
 -              result->fbc_enable = true;
 +              /*
 +               * For some reason the non self refresh
 +               * FIFO size is only half of the self
 +               * refresh FIFO size on ILK/SNB.
 +               */
 +              if (INTEL_INFO(dev)->gen <= 6)
 +                      fifo_size /= 2;
 +      }
 +
 +      if (config->sprites_enabled) {
 +              /* level 0 is always calculated with 1:1 split */
 +              if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
 +                      if (is_sprite)
 +                              fifo_size *= 5;
 +                      fifo_size /= 6;
 +              } else {
 +                      fifo_size /= 2;
 +              }
        }
  
 +      /* clamp to max that the registers can hold */
 +      if (INTEL_INFO(dev)->gen >= 7)
 +              /* IVB/HSW primary/sprite plane watermarks */
 +              max = level == 0 ? 127 : 1023;
 +      else if (!is_sprite)
 +              /* ILK/SNB primary plane watermarks */
 +              max = level == 0 ? 127 : 511;
 +      else
 +              /* ILK/SNB sprite plane watermarks */
 +              max = level == 0 ? 63 : 255;
 +
 +      return min(fifo_size, max);
 +}
 +
 +/* Calculate the maximum cursor plane watermark */
 +static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
 +                                    int level,
 +                                    const struct intel_wm_config *config)
 +{
 +      /* HSW LP1+ watermarks w/ multiple pipes */
 +      if (level > 0 && config->num_pipes_active > 1)
 +              return 64;
 +
 +      /* otherwise just report max that registers can hold */
 +      if (INTEL_INFO(dev)->gen >= 7)
 +              return level == 0 ? 63 : 255;
 +      else
 +              return level == 0 ? 31 : 63;
 +}
 +
 +/* Calculate the maximum FBC watermark */
 +static unsigned int ilk_fbc_wm_max(void)
 +{
 +      /* max that registers can hold */
 +      return 15;
 +}
 +
 +static void ilk_wm_max(struct drm_device *dev,
 +                     int level,
 +                     const struct intel_wm_config *config,
 +                     enum intel_ddb_partitioning ddb_partitioning,
 +                     struct hsw_wm_maximums *max)
 +{
 +      max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
 +      max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
 +      max->cur = ilk_cursor_wm_max(dev, level, config);
 +      max->fbc = ilk_fbc_wm_max();
 +}
 +
 +static bool ilk_check_wm(int level,
 +                       const struct hsw_wm_maximums *max,
 +                       struct intel_wm_level *result)
 +{
 +      bool ret;
 +
 +      /* already determined to be invalid? */
 +      if (!result->enable)
 +              return false;
 +
        result->enable = result->pri_val <= max->pri &&
                         result->spr_val <= max->spr &&
                         result->cur_val <= max->cur;
 -      return result->enable;
 +
 +      ret = result->enable;
 +
 +      /*
 +       * HACK until we can pre-compute everything,
 +       * and thus fail gracefully if LP0 watermarks
 +       * are exceeded...
 +       */
 +      if (level == 0 && !result->enable) {
 +              if (result->pri_val > max->pri)
 +                      DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
 +                                    level, result->pri_val, max->pri);
 +              if (result->spr_val > max->spr)
 +                      DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
 +                                    level, result->spr_val, max->spr);
 +              if (result->cur_val > max->cur)
 +                      DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
 +                                    level, result->cur_val, max->cur);
 +
 +              result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
 +              result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
 +              result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
 +              result->enable = true;
 +      }
 +
 +      DRM_DEBUG_KMS("WM%d: %sabled\n", level, result->enable ? "en" : "dis");
 +
 +      return ret;
 +}
 +
 +static void ilk_compute_wm_level(struct drm_i915_private *dev_priv,
 +                               int level,
 +                               struct hsw_pipe_wm_parameters *p,
 +                               struct intel_wm_level *result)
 +{
 +      uint16_t pri_latency = dev_priv->wm.pri_latency[level];
 +      uint16_t spr_latency = dev_priv->wm.spr_latency[level];
 +      uint16_t cur_latency = dev_priv->wm.cur_latency[level];
 +
 +      /* WM1+ latency values stored in 0.5us units */
 +      if (level > 0) {
 +              pri_latency *= 5;
 +              spr_latency *= 5;
 +              cur_latency *= 5;
 +      }
 +
 +      result->pri_val = ilk_compute_pri_wm(p, pri_latency, level);
 +      result->spr_val = ilk_compute_spr_wm(p, spr_latency);
 +      result->cur_val = ilk_compute_cur_wm(p, cur_latency);
 +      result->fbc_val = ilk_compute_fbc_wm(p, result->pri_val);
 +      result->enable = true;
 +}
 +
 +static bool hsw_compute_lp_wm(struct drm_i915_private *dev_priv,
 +                            int level, struct hsw_wm_maximums *max,
 +                            struct hsw_pipe_wm_parameters *params,
 +                            struct intel_wm_level *result)
 +{
 +      enum pipe pipe;
 +      struct intel_wm_level res[3];
 +
 +      for (pipe = PIPE_A; pipe <= PIPE_C; pipe++)
 +              ilk_compute_wm_level(dev_priv, level, &params[pipe], &res[pipe]);
 +
 +      result->pri_val = max3(res[0].pri_val, res[1].pri_val, res[2].pri_val);
 +      result->spr_val = max3(res[0].spr_val, res[1].spr_val, res[2].spr_val);
 +      result->cur_val = max3(res[0].cur_val, res[1].cur_val, res[2].cur_val);
 +      result->fbc_val = max3(res[0].fbc_val, res[1].fbc_val, res[2].fbc_val);
 +      result->enable = true;
 +
 +      return ilk_check_wm(level, max, result);
  }
  
  static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
 -                                  uint32_t mem_value, enum pipe pipe,
 +                                  enum pipe pipe,
                                    struct hsw_pipe_wm_parameters *params)
  {
        uint32_t pri_val, cur_val, spr_val;
 +      /* WM0 latency values stored in 0.1us units */
 +      uint16_t pri_latency = dev_priv->wm.pri_latency[0];
 +      uint16_t spr_latency = dev_priv->wm.spr_latency[0];
 +      uint16_t cur_latency = dev_priv->wm.cur_latency[0];
  
 -      pri_val = hsw_compute_pri_wm(params, mem_value, false);
 -      spr_val = hsw_compute_spr_wm(params, mem_value);
 -      cur_val = hsw_compute_cur_wm(params, mem_value);
 +      pri_val = ilk_compute_pri_wm(params, pri_latency, false);
 +      spr_val = ilk_compute_spr_wm(params, spr_latency);
 +      cur_val = ilk_compute_cur_wm(params, cur_latency);
  
        WARN(pri_val > 127,
             "Primary WM error, mode not supported for pipe %c\n",
@@@ -2505,116 -2338,27 +2505,116 @@@ hsw_compute_linetime_wm(struct drm_devi
               PIPE_WM_LINETIME_TIME(linetime);
  }
  
 +static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[5])
 +{
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +
 +      if (IS_HASWELL(dev)) {
 +              uint64_t sskpd = I915_READ64(MCH_SSKPD);
 +
 +              wm[0] = (sskpd >> 56) & 0xFF;
 +              if (wm[0] == 0)
 +                      wm[0] = sskpd & 0xF;
 +              wm[1] = (sskpd >> 4) & 0xFF;
 +              wm[2] = (sskpd >> 12) & 0xFF;
 +              wm[3] = (sskpd >> 20) & 0x1FF;
 +              wm[4] = (sskpd >> 32) & 0x1FF;
 +      } else if (INTEL_INFO(dev)->gen >= 6) {
 +              uint32_t sskpd = I915_READ(MCH_SSKPD);
 +
 +              wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
 +              wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
 +              wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
 +              wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
 +      } else if (INTEL_INFO(dev)->gen >= 5) {
 +              uint32_t mltr = I915_READ(MLTR_ILK);
 +
 +              /* ILK primary LP0 latency is 700 ns */
 +              wm[0] = 7;
 +              wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
 +              wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
 +      }
 +}
 +
 +static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
 +{
 +      /* ILK sprite LP0 latency is 1300 ns */
 +      if (INTEL_INFO(dev)->gen == 5)
 +              wm[0] = 13;
 +}
 +
 +static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
 +{
 +      /* ILK cursor LP0 latency is 1300 ns */
 +      if (INTEL_INFO(dev)->gen == 5)
 +              wm[0] = 13;
 +
 +      /* WaDoubleCursorLP3Latency:ivb */
 +      if (IS_IVYBRIDGE(dev))
 +              wm[3] *= 2;
 +}
 +
 +static void intel_print_wm_latency(struct drm_device *dev,
 +                                 const char *name,
 +                                 const uint16_t wm[5])
 +{
 +      int level, max_level;
 +
 +      /* how many WM levels are we expecting */
 +      if (IS_HASWELL(dev))
 +              max_level = 4;
 +      else if (INTEL_INFO(dev)->gen >= 6)
 +              max_level = 3;
 +      else
 +              max_level = 2;
 +
 +      for (level = 0; level <= max_level; level++) {
 +              unsigned int latency = wm[level];
 +
 +              if (latency == 0) {
 +                      DRM_ERROR("%s WM%d latency not provided\n",
 +                                name, level);
 +                      continue;
 +              }
 +
 +              /* WM1+ latency values in 0.5us units */
 +              if (level > 0)
 +                      latency *= 5;
 +
 +              DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
 +                            name, level, wm[level],
 +                            latency / 10, latency % 10);
 +      }
 +}
 +
 +static void intel_setup_wm_latency(struct drm_device *dev)
 +{
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +
 +      intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
 +
 +      memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
 +             sizeof(dev_priv->wm.pri_latency));
 +      memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
 +             sizeof(dev_priv->wm.pri_latency));
 +
 +      intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
 +      intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
 +
 +      intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
 +      intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
 +      intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
 +}
 +
  static void hsw_compute_wm_parameters(struct drm_device *dev,
                                      struct hsw_pipe_wm_parameters *params,
 -                                    uint32_t *wm,
                                      struct hsw_wm_maximums *lp_max_1_2,
                                      struct hsw_wm_maximums *lp_max_5_6)
  {
 -      struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_crtc *crtc;
        struct drm_plane *plane;
 -      uint64_t sskpd = I915_READ64(MCH_SSKPD);
        enum pipe pipe;
 -      int pipes_active = 0, sprites_enabled = 0;
 -
 -      if ((sskpd >> 56) & 0xFF)
 -              wm[0] = (sskpd >> 56) & 0xFF;
 -      else
 -              wm[0] = sskpd & 0xF;
 -      wm[1] = ((sskpd >> 4) & 0xFF) * 5;
 -      wm[2] = ((sskpd >> 12) & 0xFF) * 5;
 -      wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
 -      wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
 +      struct intel_wm_config config = {};
  
        list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
                struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
                if (!p->active)
                        continue;
  
 -              pipes_active++;
 +              config.num_pipes_active++;
  
                p->pipe_htotal = intel_crtc->config.adjusted_mode.htotal;
 -              p->pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
 -              p->pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
 -              p->cur_bytes_per_pixel = 4;
 -              p->pri_horiz_pixels =
 +              p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
 +              p->pri.bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
 +              p->cur.bytes_per_pixel = 4;
 +              p->pri.horiz_pixels =
                        intel_crtc->config.requested_mode.hdisplay;
 -              p->cur_horiz_pixels = 64;
 +              p->cur.horiz_pixels = 64;
 +              /* TODO: for now, assume primary and cursor planes are always enabled. */
 +              p->pri.enabled = true;
 +              p->cur.enabled = true;
        }
  
        list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
                pipe = intel_plane->pipe;
                p = &params[pipe];
  
 -              p->sprite_enabled = intel_plane->wm.enable;
 -              p->spr_bytes_per_pixel = intel_plane->wm.bytes_per_pixel;
 -              p->spr_horiz_pixels = intel_plane->wm.horiz_pixels;
 +              p->spr = intel_plane->wm;
  
 -              if (p->sprite_enabled)
 -                      sprites_enabled++;
 +              config.sprites_enabled |= p->spr.enabled;
 +              config.sprites_scaled |= p->spr.scaled;
        }
  
 -      if (pipes_active > 1) {
 -              lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
 -              lp_max_1_2->spr = lp_max_5_6->spr = 128;
 -              lp_max_1_2->cur = lp_max_5_6->cur = 64;
 -      } else {
 -              lp_max_1_2->pri = sprites_enabled ? 384 : 768;
 -              lp_max_5_6->pri = sprites_enabled ? 128 : 768;
 -              lp_max_1_2->spr = 384;
 -              lp_max_5_6->spr = 640;
 -              lp_max_1_2->cur = lp_max_5_6->cur = 255;
 -      }
 -      lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
 +      ilk_wm_max(dev, 1, &config, INTEL_DDB_PART_1_2, lp_max_1_2);
 +
 +      /* 5/6 split only in single pipe config on IVB+ */
 +      if (INTEL_INFO(dev)->gen >= 7 && config.num_pipes_active <= 1)
 +              ilk_wm_max(dev, 1, &config, INTEL_DDB_PART_5_6, lp_max_5_6);
 +      else
 +              *lp_max_5_6 = *lp_max_1_2;
  }
  
  static void hsw_compute_wm_results(struct drm_device *dev,
                                   struct hsw_pipe_wm_parameters *params,
 -                                 uint32_t *wm,
                                   struct hsw_wm_maximums *lp_maximums,
                                   struct hsw_wm_values *results)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_crtc *crtc;
 -      struct hsw_lp_wm_result lp_results[4] = {};
 +      struct intel_wm_level lp_results[4] = {};
        enum pipe pipe;
        int level, max_level, wm_lp;
  
        for (level = 1; level <= 4; level++)
 -              if (!hsw_compute_lp_wm(wm[level], lp_maximums, params,
 +              if (!hsw_compute_lp_wm(dev_priv, level,
 +                                     lp_maximums, params,
                                       &lp_results[level - 1]))
                        break;
        max_level = level - 1;
  
 +      memset(results, 0, sizeof(*results));
 +
        /* The spec says it is preferred to disable FBC WMs instead of disabling
         * a WM level. */
        results->enable_fbc_wm = true;
        for (level = 1; level <= max_level; level++) {
 -              if (!lp_results[level - 1].fbc_enable) {
 +              if (lp_results[level - 1].fbc_val > lp_maximums->fbc) {
                        results->enable_fbc_wm = false;
 -                      break;
 +                      lp_results[level - 1].fbc_val = 0;
                }
        }
  
 -      memset(results, 0, sizeof(*results));
        for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
 -              const struct hsw_lp_wm_result *r;
 +              const struct intel_wm_level *r;
  
                level = (max_level == 4 && wm_lp > 1) ? wm_lp + 1 : wm_lp;
                if (level > max_level)
        }
  
        for_each_pipe(pipe)
 -              results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
 -                                                           pipe,
 +              results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, pipe,
                                                             &params[pipe]);
  
        for_each_pipe(pipe) {
  
  /* Find the result with the highest level enabled. Check for enable_fbc_wm in
   * case both are at the same level. Prefer r1 in case they're the same. */
 -struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
 -                                         struct hsw_wm_values *r2)
 +static struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
 +                                                struct hsw_wm_values *r2)
  {
        int i, val_r1 = 0, val_r2 = 0;
  
   */
  static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
                                struct hsw_wm_values *results,
 -                              enum hsw_data_buf_partitioning partitioning)
 +                              enum intel_ddb_partitioning partitioning)
  {
        struct hsw_wm_values previous;
        uint32_t val;
 -      enum hsw_data_buf_partitioning prev_partitioning;
 +      enum intel_ddb_partitioning prev_partitioning;
        bool prev_enable_fbc_wm;
  
        previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
        previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
  
        prev_partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
 -                          HSW_DATA_BUF_PART_5_6 : HSW_DATA_BUF_PART_1_2;
 +                              INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
  
        prev_enable_fbc_wm = !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
  
  
        if (prev_partitioning != partitioning) {
                val = I915_READ(WM_MISC);
 -              if (partitioning == HSW_DATA_BUF_PART_1_2)
 +              if (partitioning == INTEL_DDB_PART_1_2)
                        val &= ~WM_MISC_DATA_PARTITION_5_6;
                else
                        val |= WM_MISC_DATA_PARTITION_5_6;
@@@ -2847,39 -2595,44 +2847,39 @@@ static void haswell_update_wm(struct dr
        struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
        struct hsw_pipe_wm_parameters params[3];
        struct hsw_wm_values results_1_2, results_5_6, *best_results;
 -      uint32_t wm[5];
 -      enum hsw_data_buf_partitioning partitioning;
 +      enum intel_ddb_partitioning partitioning;
  
 -      hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
 +      hsw_compute_wm_parameters(dev, params, &lp_max_1_2, &lp_max_5_6);
  
 -      hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
 +      hsw_compute_wm_results(dev, params,
 +                             &lp_max_1_2, &results_1_2);
        if (lp_max_1_2.pri != lp_max_5_6.pri) {
 -              hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
 -                                     &results_5_6);
 +              hsw_compute_wm_results(dev, params,
 +                                     &lp_max_5_6, &results_5_6);
                best_results = hsw_find_best_result(&results_1_2, &results_5_6);
        } else {
                best_results = &results_1_2;
        }
  
        partitioning = (best_results == &results_1_2) ?
 -                     HSW_DATA_BUF_PART_1_2 : HSW_DATA_BUF_PART_5_6;
 +                     INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
  
        hsw_write_wm_values(dev_priv, best_results, partitioning);
  }
  
 -static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
 +static void haswell_update_sprite_wm(struct drm_plane *plane,
 +                                   struct drm_crtc *crtc,
                                     uint32_t sprite_width, int pixel_size,
 -                                   bool enable)
 +                                   bool enabled, bool scaled)
  {
 -      struct drm_plane *plane;
 +      struct intel_plane *intel_plane = to_intel_plane(plane);
  
 -      list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
 -              struct intel_plane *intel_plane = to_intel_plane(plane);
 -
 -              if (intel_plane->pipe == pipe) {
 -                      intel_plane->wm.enable = enable;
 -                      intel_plane->wm.horiz_pixels = sprite_width + 1;
 -                      intel_plane->wm.bytes_per_pixel = pixel_size;
 -                      break;
 -              }
 -      }
 +      intel_plane->wm.enabled = enabled;
 +      intel_plane->wm.scaled = scaled;
 +      intel_plane->wm.horiz_pixels = sprite_width;
 +      intel_plane->wm.bytes_per_pixel = pixel_size;
  
 -      haswell_update_wm(dev);
 +      haswell_update_wm(plane->dev);
  }
  
  static bool
@@@ -2958,20 -2711,17 +2958,20 @@@ sandybridge_compute_sprite_srwm(struct 
        return *sprite_wm > 0x3ff ? false : true;
  }
  
 -static void sandybridge_update_sprite_wm(struct drm_device *dev, int pipe,
 +static void sandybridge_update_sprite_wm(struct drm_plane *plane,
 +                                       struct drm_crtc *crtc,
                                         uint32_t sprite_width, int pixel_size,
 -                                       bool enable)
 +                                       bool enabled, bool scaled)
  {
 +      struct drm_device *dev = plane->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 -      int latency = SNB_READ_WM0_LATENCY() * 100;     /* In unit 0.1us */
 +      int pipe = to_intel_plane(plane)->pipe;
 +      int latency = dev_priv->wm.spr_latency[0] * 100;        /* In unit 0.1us */
        u32 val;
        int sprite_wm, reg;
        int ret;
  
 -      if (!enable)
 +      if (!enabled)
                return;
  
        switch (pipe) {
        ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
                                              pixel_size,
                                              &sandybridge_display_srwm_info,
 -                                            SNB_READ_WM1_LATENCY() * 500,
 +                                            dev_priv->wm.spr_latency[1] * 500,
                                              &sprite_wm);
        if (!ret) {
                DRM_DEBUG_KMS("failed to compute sprite lp1 wm on pipe %c\n",
        ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
                                              pixel_size,
                                              &sandybridge_display_srwm_info,
 -                                            SNB_READ_WM2_LATENCY() * 500,
 +                                            dev_priv->wm.spr_latency[2] * 500,
                                              &sprite_wm);
        if (!ret) {
                DRM_DEBUG_KMS("failed to compute sprite lp2 wm on pipe %c\n",
        ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
                                              pixel_size,
                                              &sandybridge_display_srwm_info,
 -                                            SNB_READ_WM3_LATENCY() * 500,
 +                                            dev_priv->wm.spr_latency[3] * 500,
                                              &sprite_wm);
        if (!ret) {
                DRM_DEBUG_KMS("failed to compute sprite lp3 wm on pipe %c\n",
@@@ -3084,16 -2834,15 +3084,16 @@@ void intel_update_watermarks(struct drm
                dev_priv->display.update_wm(dev);
  }
  
 -void intel_update_sprite_watermarks(struct drm_device *dev, int pipe,
 +void intel_update_sprite_watermarks(struct drm_plane *plane,
 +                                  struct drm_crtc *crtc,
                                    uint32_t sprite_width, int pixel_size,
 -                                  bool enable)
 +                                  bool enabled, bool scaled)
  {
 -      struct drm_i915_private *dev_priv = dev->dev_private;
 +      struct drm_i915_private *dev_priv = plane->dev->dev_private;
  
        if (dev_priv->display.update_sprite_wm)
 -              dev_priv->display.update_sprite_wm(dev, pipe, sprite_width,
 -                                                 pixel_size, enable);
 +              dev_priv->display.update_sprite_wm(plane, crtc, sprite_width,
 +                                                 pixel_size, enabled, scaled);
  }
  
  static struct drm_i915_gem_object *
@@@ -3110,7 -2859,7 +3110,7 @@@ intel_alloc_context_page(struct drm_dev
                return NULL;
        }
  
 -      ret = i915_gem_object_pin(ctx, 4096, true, false);
 +      ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
        if (ret) {
                DRM_ERROR("failed to pin power context: %d\n", ret);
                goto err_unref;
@@@ -3327,12 -3076,19 +3327,12 @@@ void gen6_set_rps(struct drm_device *de
   */
  static void vlv_update_rps_cur_delay(struct drm_i915_private *dev_priv)
  {
 -      unsigned long timeout = jiffies + msecs_to_jiffies(10);
        u32 pval;
  
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
  
 -      do {
 -              pval = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 -              if (time_after(jiffies, timeout)) {
 -                      DRM_DEBUG_DRIVER("timed out waiting for Punit\n");
 -                      break;
 -              }
 -              udelay(10);
 -      } while (pval & 1);
 +      if (wait_for(((pval = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS)) & GENFREQSTATUS) == 0, 10))
 +              DRM_DEBUG_DRIVER("timed out waiting for Punit\n");
  
        pval >>= 8;
  
@@@ -3373,10 -3129,13 +3373,10 @@@ void valleyview_set_rps(struct drm_devi
        trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv->mem_freq, val));
  }
  
 -
 -static void gen6_disable_rps(struct drm_device *dev)
 +static void gen6_disable_rps_interrupts(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
  
 -      I915_WRITE(GEN6_RC_CONTROL, 0);
 -      I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
        I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
        I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
        /* Complete PM interrupt masking here doesn't race with the rps work
         * register (PMIMR) to mask PM interrupts. The only risk is in leaving
         * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
  
 -      spin_lock_irq(&dev_priv->rps.lock);
 +      spin_lock_irq(&dev_priv->irq_lock);
        dev_priv->rps.pm_iir = 0;
 -      spin_unlock_irq(&dev_priv->rps.lock);
 +      spin_unlock_irq(&dev_priv->irq_lock);
  
        I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
  }
  
 -static void valleyview_disable_rps(struct drm_device *dev)
 +static void gen6_disable_rps(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
  
        I915_WRITE(GEN6_RC_CONTROL, 0);
 -      I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
 -      I915_WRITE(GEN6_PMIER, 0);
 -      /* Complete PM interrupt masking here doesn't race with the rps work
 -       * item again unmasking PM interrupts because that is using a different
 -       * register (PMIMR) to mask PM interrupts. The only risk is in leaving
 -       * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
 +      I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
  
 -      spin_lock_irq(&dev_priv->rps.lock);
 -      dev_priv->rps.pm_iir = 0;
 -      spin_unlock_irq(&dev_priv->rps.lock);
 +      gen6_disable_rps_interrupts(dev);
 +}
  
 -      I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
 +static void valleyview_disable_rps(struct drm_device *dev)
 +{
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +
 +      I915_WRITE(GEN6_RC_CONTROL, 0);
 +
 +      gen6_disable_rps_interrupts(dev);
  
        if (dev_priv->vlv_pctx) {
                drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
  
  int intel_enable_rc6(const struct drm_device *dev)
  {
 +      /* No RC6 before Ironlake */
 +      if (INTEL_INFO(dev)->gen < 5)
 +              return 0;
 +
        /* Respect the kernel parameter if it is set */
        if (i915_enable_rc6 >= 0)
                return i915_enable_rc6;
        return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
  }
  
 +static void gen6_enable_rps_interrupts(struct drm_device *dev)
 +{
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +
 +      spin_lock_irq(&dev_priv->irq_lock);
 +      WARN_ON(dev_priv->rps.pm_iir);
 +      snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
 +      I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
 +      spin_unlock_irq(&dev_priv->irq_lock);
 +      /* only unmask PM interrupts we need. Mask all others. */
 +      I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RPS_EVENTS);
 +}
 +
  static void gen6_enable_rps(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
  
        I915_WRITE(GEN6_RC_SLEEP, 0);
        I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
 -      I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
 +      if (INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev))
 +              I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
 +      else
 +              I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
        I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
        I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
  
  
        gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8);
  
 -      /* requires MSI enabled */
 -      I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) | GEN6_PM_RPS_EVENTS);
 -      spin_lock_irq(&dev_priv->rps.lock);
 -      /* FIXME: Our interrupt enabling sequence is bonghits.
 -       * dev_priv->rps.pm_iir really should be 0 here. */
 -      dev_priv->rps.pm_iir = 0;
 -      I915_WRITE(GEN6_PMIMR, I915_READ(GEN6_PMIMR) & ~GEN6_PM_RPS_EVENTS);
 -      I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
 -      spin_unlock_irq(&dev_priv->rps.lock);
 -      /* unmask all PM interrupts */
 -      I915_WRITE(GEN6_PMINTRMSK, 0);
 +      gen6_enable_rps_interrupts(dev);
  
        rc6vids = 0;
        ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
        gen6_gt_force_wake_put(dev_priv);
  }
  
 -static void gen6_update_ring_freq(struct drm_device *dev)
 +void gen6_update_ring_freq(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
        int min_freq = 15;
@@@ -3733,7 -3482,7 +3733,7 @@@ static void valleyview_setup_pctx(struc
                pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
                pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
                                                                      pcbr_offset,
 -                                                                    -1,
 +                                                                    I915_GTT_OFFSET_NONE,
                                                                      pctx_size);
                goto out;
        }
@@@ -3858,7 -3607,14 +3858,7 @@@ static void valleyview_enable_rps(struc
  
        valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
  
 -      /* requires MSI enabled */
 -      I915_WRITE(GEN6_PMIER, GEN6_PM_RPS_EVENTS);
 -      spin_lock_irq(&dev_priv->rps.lock);
 -      WARN_ON(dev_priv->rps.pm_iir != 0);
 -      I915_WRITE(GEN6_PMIMR, 0);
 -      spin_unlock_irq(&dev_priv->rps.lock);
 -      /* enable all PM interrupts */
 -      I915_WRITE(GEN6_PMINTRMSK, 0);
 +      gen6_enable_rps_interrupts(dev);
  
        gen6_gt_force_wake_put(dev_priv);
  }
@@@ -3952,7 -3708,7 +3952,7 @@@ static void ironlake_enable_rc6(struct 
  
        intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
        intel_ring_emit(ring, MI_SET_CONTEXT);
 -      intel_ring_emit(ring, dev_priv->ips.renderctx->gtt_offset |
 +      intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) |
                        MI_MM_SPACE_GTT |
                        MI_SAVE_EXT_STATE_EN |
                        MI_RESTORE_EXT_STATE_EN |
                return;
        }
  
 -      I915_WRITE(PWRCTXA, dev_priv->ips.pwrctx->gtt_offset | PWRCTX_EN);
 +      I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
        I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
  }
  
@@@ -4673,10 -4429,7 +4673,10 @@@ static void ironlake_init_clock_gating(
        struct drm_i915_private *dev_priv = dev->dev_private;
        uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
  
 -      /* Required for FBC */
 +      /*
 +       * Required for FBC
 +       * WaFbcDisableDpfcClockGating:ilk
 +       */
        dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
                   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
                   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
         * The bit 7,8,9 of 0x42020.
         */
        if (IS_IRONLAKE_M(dev)) {
 +              /* WaFbcAsynchFlipDisableFbcQueue:ilk */
                I915_WRITE(ILK_DISPLAY_CHICKEN1,
                           I915_READ(ILK_DISPLAY_CHICKEN1) |
                           ILK_FBCQ_DIS);
@@@ -4850,8 -4602,6 +4850,8 @@@ static void gen6_init_clock_gating(stru
         * The bit5 and bit7 of 0x42020
         * The bit14 of 0x70180
         * The bit14 of 0x71180
 +       *
 +       * WaFbcAsynchFlipDisableFbcQueue:snb
         */
        I915_WRITE(ILK_DISPLAY_CHICKEN1,
                   I915_READ(ILK_DISPLAY_CHICKEN1) |
                   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
                   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
  
 -      /* WaMbcDriverBootEnable:snb */
 -      I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
 -                 GEN6_MBCTL_ENABLE_BOOT_FETCH);
 -
        g4x_disable_trickle_feed(dev);
  
        /* The default value should be 0x200 according to docs, but the two
@@@ -4959,6 -4713,10 +4959,6 @@@ static void haswell_init_clock_gating(s
        I915_WRITE(CACHE_MODE_1,
                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
  
 -      /* WaMbcDriverBootEnable:hsw */
 -      I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
 -                 GEN6_MBCTL_ENABLE_BOOT_FETCH);
 -
        /* WaSwitchSolVfFArbitrationPriority:hsw */
        I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
  
@@@ -5042,6 -4800,10 +5042,6 @@@ static void ivybridge_init_clock_gating
  
        g4x_disable_trickle_feed(dev);
  
 -      /* WaMbcDriverBootEnable:ivb */
 -      I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
 -                 GEN6_MBCTL_ENABLE_BOOT_FETCH);
 -
        /* WaVSRefCountFullforceMissDisable:ivb */
        gen7_setup_fixed_func_scheduler(dev_priv);
  
@@@ -5101,6 -4863,11 +5101,6 @@@ static void valleyview_init_clock_gatin
                   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
                   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
  
 -      /* WaMbcDriverBootEnable:vlv */
 -      I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
 -                 GEN6_MBCTL_ENABLE_BOOT_FETCH);
 -
 -
        /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
         * gating disable must be set.  Failure to set it results in
         * flickering pixels due to Z write ordering failures after
@@@ -5268,7 -5035,7 +5268,7 @@@ bool intel_display_power_enabled(struc
        case POWER_DOMAIN_TRANSCODER_B:
        case POWER_DOMAIN_TRANSCODER_C:
                return I915_READ(HSW_PWR_WELL_DRIVER) ==
 -                     (HSW_PWR_WELL_ENABLE | HSW_PWR_WELL_STATE);
 +                   (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED);
        default:
                BUG();
        }
@@@ -5281,24 -5048,41 +5281,42 @@@ static void __intel_set_power_well(stru
        uint32_t tmp;
  
        tmp = I915_READ(HSW_PWR_WELL_DRIVER);
 -      is_enabled = tmp & HSW_PWR_WELL_STATE;
 -      enable_requested = tmp & HSW_PWR_WELL_ENABLE;
 +      is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED;
 +      enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST;
  
        if (enable) {
                if (!enable_requested)
 -                      I915_WRITE(HSW_PWR_WELL_DRIVER, HSW_PWR_WELL_ENABLE);
 +                      I915_WRITE(HSW_PWR_WELL_DRIVER,
 +                                 HSW_PWR_WELL_ENABLE_REQUEST);
  
                if (!is_enabled) {
                        DRM_DEBUG_KMS("Enabling power well\n");
                        if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) &
 -                                    HSW_PWR_WELL_STATE), 20))
 +                                    HSW_PWR_WELL_STATE_ENABLED), 20))
                                DRM_ERROR("Timeout enabling power well\n");
                }
        } else {
                if (enable_requested) {
+                       unsigned long irqflags;
+                       enum pipe p;
                        I915_WRITE(HSW_PWR_WELL_DRIVER, 0);
+                       POSTING_READ(HSW_PWR_WELL_DRIVER);
                        DRM_DEBUG_KMS("Requesting to disable the power well\n");
+                       /*
+                        * After this, the registers on the pipes that are part
+                        * of the power well will become zero, so we have to
+                        * adjust our counters according to that.
+                        *
+                        * FIXME: Should we do this in general in
+                        * drm_vblank_post_modeset?
+                        */
+                       spin_lock_irqsave(&dev->vbl_lock, irqflags);
+                       for_each_pipe(p)
+                               if (p != PIPE_A)
+                                       dev->last_vblank[p] = 0;
+                       spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
                }
        }
  }
@@@ -5394,21 -5178,10 +5412,21 @@@ void intel_init_power_well(struct drm_d
  
        /* We're taking over the BIOS, so clear any requests made by it since
         * the driver is in charge now. */
 -      if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE)
 +      if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST)
                I915_WRITE(HSW_PWR_WELL_BIOS, 0);
  }
  
 +/* Disables PC8 so we can use the GMBUS and DP AUX interrupts. */
 +void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv)
 +{
 +      hsw_disable_package_c8(dev_priv);
 +}
 +
 +void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
 +{
 +      hsw_enable_package_c8(dev_priv);
 +}
 +
  /* Set up chip specific power management-related functions */
  void intel_init_pm(struct drm_device *dev)
  {
  
        /* For FIFO watermark updates */
        if (HAS_PCH_SPLIT(dev)) {
 +              intel_setup_wm_latency(dev);
 +
                if (IS_GEN5(dev)) {
 -                      if (I915_READ(MLTR_ILK) & ILK_SRLT_MASK)
 +                      if (dev_priv->wm.pri_latency[1] &&
 +                          dev_priv->wm.spr_latency[1] &&
 +                          dev_priv->wm.cur_latency[1])
                                dev_priv->display.update_wm = ironlake_update_wm;
                        else {
                                DRM_DEBUG_KMS("Failed to get proper latency. "
                        }
                        dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
                } else if (IS_GEN6(dev)) {
 -                      if (SNB_READ_WM0_LATENCY()) {
 +                      if (dev_priv->wm.pri_latency[0] &&
 +                          dev_priv->wm.spr_latency[0] &&
 +                          dev_priv->wm.cur_latency[0]) {
                                dev_priv->display.update_wm = sandybridge_update_wm;
                                dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
                        } else {
                        }
                        dev_priv->display.init_clock_gating = gen6_init_clock_gating;
                } else if (IS_IVYBRIDGE(dev)) {
 -                      if (SNB_READ_WM0_LATENCY()) {
 +                      if (dev_priv->wm.pri_latency[0] &&
 +                          dev_priv->wm.spr_latency[0] &&
 +                          dev_priv->wm.cur_latency[0]) {
                                dev_priv->display.update_wm = ivybridge_update_wm;
                                dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
                        } else {
                        }
                        dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
                } else if (IS_HASWELL(dev)) {
 -                      if (I915_READ64(MCH_SSKPD)) {
 +                      if (dev_priv->wm.pri_latency[0] &&
 +                          dev_priv->wm.spr_latency[0] &&
 +                          dev_priv->wm.cur_latency[0]) {
                                dev_priv->display.update_wm = haswell_update_wm;
                                dev_priv->display.update_sprite_wm =
                                        haswell_update_sprite_wm;
        }
  }
  
 -static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv)
 -{
 -      u32 gt_thread_status_mask;
 -
 -      if (IS_HASWELL(dev_priv->dev))
 -              gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK_HSW;
 -      else
 -              gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK;
 -
 -      /* w/a for a sporadic read returning 0 by waiting for the GT
 -       * thread to wake up.
 -       */
 -      if (wait_for_atomic_us((I915_READ_NOTRACE(GEN6_GT_THREAD_STATUS_REG) & gt_thread_status_mask) == 0, 500))
 -              DRM_ERROR("GT thread status wait timed out\n");
 -}
 -
 -static void __gen6_gt_force_wake_reset(struct drm_i915_private *dev_priv)
 -{
 -      I915_WRITE_NOTRACE(FORCEWAKE, 0);
 -      POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
 -}
 -
 -static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 -{
 -      if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1) == 0,
 -                          FORCEWAKE_ACK_TIMEOUT_MS))
 -              DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 -
 -      I915_WRITE_NOTRACE(FORCEWAKE, 1);
 -      POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
 -
 -      if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1),
 -                          FORCEWAKE_ACK_TIMEOUT_MS))
 -              DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
 -
 -      /* WaRsForcewakeWaitTC0:snb */
 -      __gen6_gt_wait_for_thread_c0(dev_priv);
 -}
 -
 -static void __gen6_gt_force_wake_mt_reset(struct drm_i915_private *dev_priv)
 -{
 -      I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(0xffff));
 -      /* something from same cacheline, but !FORCEWAKE_MT */
 -      POSTING_READ(ECOBUS);
 -}
 -
 -static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
 -{
 -      u32 forcewake_ack;
 -
 -      if (IS_HASWELL(dev_priv->dev))
 -              forcewake_ack = FORCEWAKE_ACK_HSW;
 -      else
 -              forcewake_ack = FORCEWAKE_MT_ACK;
 -
 -      if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL) == 0,
 -                          FORCEWAKE_ACK_TIMEOUT_MS))
 -              DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 -
 -      I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
 -      /* something from same cacheline, but !FORCEWAKE_MT */
 -      POSTING_READ(ECOBUS);
 -
 -      if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL),
 -                          FORCEWAKE_ACK_TIMEOUT_MS))
 -              DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
 -
 -      /* WaRsForcewakeWaitTC0:ivb,hsw */
 -      __gen6_gt_wait_for_thread_c0(dev_priv);
 -}
 -
 -/*
 - * Generally this is called implicitly by the register read function. However,
 - * if some sequence requires the GT to not power down then this function should
 - * be called at the beginning of the sequence followed by a call to
 - * gen6_gt_force_wake_put() at the end of the sequence.
 - */
 -void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 -{
 -      unsigned long irqflags;
 -
 -      spin_lock_irqsave(&dev_priv->gt_lock, irqflags);
 -      if (dev_priv->forcewake_count++ == 0)
 -              dev_priv->gt.force_wake_get(dev_priv);
 -      spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags);
 -}
 -
 -void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv)
 -{
 -      u32 gtfifodbg;
 -      gtfifodbg = I915_READ_NOTRACE(GTFIFODBG);
 -      if (WARN(gtfifodbg & GT_FIFO_CPU_ERROR_MASK,
 -           "MMIO read or write has been dropped %x\n", gtfifodbg))
 -              I915_WRITE_NOTRACE(GTFIFODBG, GT_FIFO_CPU_ERROR_MASK);
 -}
 -
 -static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 -{
 -      I915_WRITE_NOTRACE(FORCEWAKE, 0);
 -      /* something from same cacheline, but !FORCEWAKE */
 -      POSTING_READ(ECOBUS);
 -      gen6_gt_check_fifodbg(dev_priv);
 -}
 -
 -static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv)
 -{
 -      I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
 -      /* something from same cacheline, but !FORCEWAKE_MT */
 -      POSTING_READ(ECOBUS);
 -      gen6_gt_check_fifodbg(dev_priv);
 -}
 -
 -/*
 - * see gen6_gt_force_wake_get()
 - */
 -void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 -{
 -      unsigned long irqflags;
 -
 -      spin_lock_irqsave(&dev_priv->gt_lock, irqflags);
 -      if (--dev_priv->forcewake_count == 0)
 -              dev_priv->gt.force_wake_put(dev_priv);
 -      spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags);
 -}
 -
 -int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 -{
 -      int ret = 0;
 -
 -      if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
 -              int loop = 500;
 -              u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
 -              while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
 -                      udelay(10);
 -                      fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
 -              }
 -              if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES))
 -                      ++ret;
 -              dev_priv->gt_fifo_count = fifo;
 -      }
 -      dev_priv->gt_fifo_count--;
 -
 -      return ret;
 -}
 -
 -static void vlv_force_wake_reset(struct drm_i915_private *dev_priv)
 -{
 -      I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(0xffff));
 -      /* something from same cacheline, but !FORCEWAKE_VLV */
 -      POSTING_READ(FORCEWAKE_ACK_VLV);
 -}
 -
 -static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
 -{
 -      if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0,
 -                          FORCEWAKE_ACK_TIMEOUT_MS))
 -              DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 -
 -      I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
 -      I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV,
 -                         _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
 -
 -      if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL),
 -                          FORCEWAKE_ACK_TIMEOUT_MS))
 -              DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n");
 -
 -      if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_MEDIA_VLV) &
 -                           FORCEWAKE_KERNEL),
 -                          FORCEWAKE_ACK_TIMEOUT_MS))
 -              DRM_ERROR("Timed out waiting for media to ack forcewake request.\n");
 -
 -      /* WaRsForcewakeWaitTC0:vlv */
 -      __gen6_gt_wait_for_thread_c0(dev_priv);
 -}
 -
 -static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
 -{
 -      I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
 -      I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV,
 -                         _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
 -      /* The below doubles as a POSTING_READ */
 -      gen6_gt_check_fifodbg(dev_priv);
 -}
 -
 -void intel_gt_sanitize(struct drm_device *dev)
 -{
 -      struct drm_i915_private *dev_priv = dev->dev_private;
 -
 -      if (IS_VALLEYVIEW(dev)) {
 -              vlv_force_wake_reset(dev_priv);
 -      } else if (INTEL_INFO(dev)->gen >= 6) {
 -              __gen6_gt_force_wake_reset(dev_priv);
 -              if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
 -                      __gen6_gt_force_wake_mt_reset(dev_priv);
 -      }
 -
 -      /* BIOS often leaves RC6 enabled, but disable it for hw init */
 -      if (INTEL_INFO(dev)->gen >= 6)
 -              intel_disable_gt_powersave(dev);
 -}
 -
 -void intel_gt_init(struct drm_device *dev)
 -{
 -      struct drm_i915_private *dev_priv = dev->dev_private;
 -
 -      if (IS_VALLEYVIEW(dev)) {
 -              dev_priv->gt.force_wake_get = vlv_force_wake_get;
 -              dev_priv->gt.force_wake_put = vlv_force_wake_put;
 -      } else if (IS_HASWELL(dev)) {
 -              dev_priv->gt.force_wake_get = __gen6_gt_force_wake_mt_get;
 -              dev_priv->gt.force_wake_put = __gen6_gt_force_wake_mt_put;
 -      } else if (IS_IVYBRIDGE(dev)) {
 -              u32 ecobus;
 -
 -              /* IVB configs may use multi-threaded forcewake */
 -
 -              /* A small trick here - if the bios hasn't configured
 -               * MT forcewake, and if the device is in RC6, then
 -               * force_wake_mt_get will not wake the device and the
 -               * ECOBUS read will return zero. Which will be
 -               * (correctly) interpreted by the test below as MT
 -               * forcewake being disabled.
 -               */
 -              mutex_lock(&dev->struct_mutex);
 -              __gen6_gt_force_wake_mt_get(dev_priv);
 -              ecobus = I915_READ_NOTRACE(ECOBUS);
 -              __gen6_gt_force_wake_mt_put(dev_priv);
 -              mutex_unlock(&dev->struct_mutex);
 -
 -              if (ecobus & FORCEWAKE_MT_ENABLE) {
 -                      dev_priv->gt.force_wake_get =
 -                                              __gen6_gt_force_wake_mt_get;
 -                      dev_priv->gt.force_wake_put =
 -                                              __gen6_gt_force_wake_mt_put;
 -              } else {
 -                      DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n");
 -                      DRM_INFO("when using vblank-synced partial screen updates.\n");
 -                      dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
 -                      dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
 -              }
 -      } else if (IS_GEN6(dev)) {
 -              dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
 -              dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
 -      }
 -}
 -
 -void intel_pm_init(struct drm_device *dev)
 -{
 -      struct drm_i915_private *dev_priv = dev->dev_private;
 -
 -      INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
 -                        intel_gen6_powersave_work);
 -}
 -
  int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
  {
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
@@@ -5649,11 -5666,3 +5667,11 @@@ int vlv_freq_opcode(int ddr_freq, int v
        return val;
  }
  
 +void intel_pm_init(struct drm_device *dev)
 +{
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +
 +      INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
 +                        intel_gen6_powersave_work);
 +}
 +
index 7de29d40d1ad175b6b9a48f25ac532e11e427029,079ef0129e7416aabcf46e3c96ee409b65702808..f05cceac5a52326ad203bb234816cbd2f6c4bb04
@@@ -440,14 -440,14 +440,14 @@@ static int init_ring_common(struct inte
         * registers with the above sequence (the readback of the HEAD registers
         * also enforces ordering), otherwise the hw might lose the new ring
         * register values. */
 -      I915_WRITE_START(ring, obj->gtt_offset);
 +      I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
        I915_WRITE_CTL(ring,
                        ((ring->size - PAGE_SIZE) & RING_NR_PAGES)
                        | RING_VALID);
  
        /* If the head is still not zero, the ring is dead */
        if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
 -                   I915_READ_START(ring) == obj->gtt_offset &&
 +                   I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
                     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
                DRM_ERROR("%s initialization failed "
                                "ctl %08x head %08x tail %08x start %08x\n",
@@@ -501,11 -501,11 +501,11 @@@ init_pipe_control(struct intel_ring_buf
  
        i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
  
 -      ret = i915_gem_object_pin(obj, 4096, true, false);
 +      ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
        if (ret)
                goto err_unref;
  
 -      pc->gtt_offset = obj->gtt_offset;
 +      pc->gtt_offset = i915_gem_obj_ggtt_offset(obj);
        pc->cpu_page = kmap(sg_page(obj->pages->sgl));
        if (pc->cpu_page == NULL) {
                ret = -ENOMEM;
@@@ -836,8 -836,11 +836,8 @@@ gen5_ring_get_irq(struct intel_ring_buf
                return false;
  
        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 -      if (ring->irq_refcount.gt++ == 0) {
 -              dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
 -              I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 -              POSTING_READ(GTIMR);
 -      }
 +      if (ring->irq_refcount++ == 0)
 +              ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask);
        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  
        return true;
@@@ -851,8 -854,11 +851,8 @@@ gen5_ring_put_irq(struct intel_ring_buf
        unsigned long flags;
  
        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 -      if (--ring->irq_refcount.gt == 0) {
 -              dev_priv->gt_irq_mask |= ring->irq_enable_mask;
 -              I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 -              POSTING_READ(GTIMR);
 -      }
 +      if (--ring->irq_refcount == 0)
 +              ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask);
        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  }
  
@@@ -867,7 -873,7 +867,7 @@@ i9xx_ring_get_irq(struct intel_ring_buf
                return false;
  
        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 -      if (ring->irq_refcount.gt++ == 0) {
 +      if (ring->irq_refcount++ == 0) {
                dev_priv->irq_mask &= ~ring->irq_enable_mask;
                I915_WRITE(IMR, dev_priv->irq_mask);
                POSTING_READ(IMR);
@@@ -885,7 -891,7 +885,7 @@@ i9xx_ring_put_irq(struct intel_ring_buf
        unsigned long flags;
  
        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 -      if (--ring->irq_refcount.gt == 0) {
 +      if (--ring->irq_refcount == 0) {
                dev_priv->irq_mask |= ring->irq_enable_mask;
                I915_WRITE(IMR, dev_priv->irq_mask);
                POSTING_READ(IMR);
@@@ -904,7 -910,7 +904,7 @@@ i8xx_ring_get_irq(struct intel_ring_buf
                return false;
  
        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 -      if (ring->irq_refcount.gt++ == 0) {
 +      if (ring->irq_refcount++ == 0) {
                dev_priv->irq_mask &= ~ring->irq_enable_mask;
                I915_WRITE16(IMR, dev_priv->irq_mask);
                POSTING_READ16(IMR);
@@@ -922,7 -928,7 +922,7 @@@ i8xx_ring_put_irq(struct intel_ring_buf
        unsigned long flags;
  
        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 -      if (--ring->irq_refcount.gt == 0) {
 +      if (--ring->irq_refcount == 0) {
                dev_priv->irq_mask |= ring->irq_enable_mask;
                I915_WRITE16(IMR, dev_priv->irq_mask);
                POSTING_READ16(IMR);
@@@ -962,6 -968,18 +962,18 @@@ void intel_ring_setup_status_page(struc
  
        I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
        POSTING_READ(mmio);
+       /* Flush the TLB for this page */
+       if (INTEL_INFO(dev)->gen >= 6) {
+               u32 reg = RING_INSTPM(ring->mmio_base);
+               I915_WRITE(reg,
+                          _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
+                                             INSTPM_SYNC_FLUSH));
+               if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
+                            1000))
+                       DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
+                                 ring->name);
+       }
  }
  
  static int
@@@ -1015,14 -1033,16 +1027,14 @@@ gen6_ring_get_irq(struct intel_ring_buf
        gen6_gt_force_wake_get(dev_priv);
  
        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 -      if (ring->irq_refcount.gt++ == 0) {
 +      if (ring->irq_refcount++ == 0) {
                if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
                        I915_WRITE_IMR(ring,
                                       ~(ring->irq_enable_mask |
                                         GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
                else
                        I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
 -              dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
 -              I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 -              POSTING_READ(GTIMR);
 +              ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask);
        }
        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  
@@@ -1037,13 -1057,15 +1049,13 @@@ gen6_ring_put_irq(struct intel_ring_buf
        unsigned long flags;
  
        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 -      if (--ring->irq_refcount.gt == 0) {
 +      if (--ring->irq_refcount == 0) {
                if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
                        I915_WRITE_IMR(ring,
                                       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
                else
                        I915_WRITE_IMR(ring, ~0);
 -              dev_priv->gt_irq_mask |= ring->irq_enable_mask;
 -              I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 -              POSTING_READ(GTIMR);
 +              ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask);
        }
        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  
@@@ -1060,12 -1082,14 +1072,12 @@@ hsw_vebox_get_irq(struct intel_ring_buf
        if (!dev->irq_enabled)
                return false;
  
 -      spin_lock_irqsave(&dev_priv->rps.lock, flags);
 -      if (ring->irq_refcount.pm++ == 0) {
 -              u32 pm_imr = I915_READ(GEN6_PMIMR);
 +      spin_lock_irqsave(&dev_priv->irq_lock, flags);
 +      if (ring->irq_refcount++ == 0) {
                I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
 -              I915_WRITE(GEN6_PMIMR, pm_imr & ~ring->irq_enable_mask);
 -              POSTING_READ(GEN6_PMIMR);
 +              snb_enable_pm_irq(dev_priv, ring->irq_enable_mask);
        }
 -      spin_unlock_irqrestore(&dev_priv->rps.lock, flags);
 +      spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  
        return true;
  }
@@@ -1080,12 -1104,14 +1092,12 @@@ hsw_vebox_put_irq(struct intel_ring_buf
        if (!dev->irq_enabled)
                return;
  
 -      spin_lock_irqsave(&dev_priv->rps.lock, flags);
 -      if (--ring->irq_refcount.pm == 0) {
 -              u32 pm_imr = I915_READ(GEN6_PMIMR);
 +      spin_lock_irqsave(&dev_priv->irq_lock, flags);
 +      if (--ring->irq_refcount == 0) {
                I915_WRITE_IMR(ring, ~0);
 -              I915_WRITE(GEN6_PMIMR, pm_imr | ring->irq_enable_mask);
 -              POSTING_READ(GEN6_PMIMR);
 +              snb_disable_pm_irq(dev_priv, ring->irq_enable_mask);
        }
 -      spin_unlock_irqrestore(&dev_priv->rps.lock, flags);
 +      spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  }
  
  static int
@@@ -1130,7 -1156,7 +1142,7 @@@ i830_dispatch_execbuffer(struct intel_r
                intel_ring_advance(ring);
        } else {
                struct drm_i915_gem_object *obj = ring->private;
 -              u32 cs_offset = obj->gtt_offset;
 +              u32 cs_offset = i915_gem_obj_ggtt_offset(obj);
  
                if (len > I830_BATCH_LIMIT)
                        return -ENOSPC;
@@@ -1210,12 -1236,12 +1222,12 @@@ static int init_status_page(struct inte
  
        i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
  
 -      ret = i915_gem_object_pin(obj, 4096, true, false);
 +      ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
        if (ret != 0) {
                goto err_unref;
        }
  
 -      ring->status_page.gfx_addr = obj->gtt_offset;
 +      ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
        ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
        if (ring->status_page.page_addr == NULL) {
                ret = -ENOMEM;
@@@ -1293,7 -1319,7 +1305,7 @@@ static int intel_init_ring_buffer(struc
  
        ring->obj = obj;
  
 -      ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
 +      ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
        if (ret)
                goto err_unref;
  
                goto err_unpin;
  
        ring->virtual_start =
 -              ioremap_wc(dev_priv->gtt.mappable_base + obj->gtt_offset,
 +              ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
                           ring->size);
        if (ring->virtual_start == NULL) {
                DRM_ERROR("Failed to map ringbuffer.\n");
@@@ -1580,8 -1606,6 +1592,8 @@@ void intel_ring_init_seqno(struct intel
        if (INTEL_INFO(ring->dev)->gen >= 6) {
                I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
                I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
 +              if (HAS_VEBOX(ring->dev))
 +                      I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
        }
  
        ring->set_seqno(ring, seqno);
@@@ -1816,7 -1840,7 +1828,7 @@@ int intel_init_render_ring_buffer(struc
                        return -ENOMEM;
                }
  
 -              ret = i915_gem_object_pin(obj, 0, true, false);
 +              ret = i915_gem_obj_ggtt_pin(obj, 0, true, false);
                if (ret != 0) {
                        drm_gem_object_unreference(&obj->base);
                        DRM_ERROR("Failed to ping batch bo\n");
@@@ -1996,7 -2020,8 +2008,7 @@@ int intel_init_vebox_ring_buffer(struc
        ring->add_request = gen6_add_request;
        ring->get_seqno = gen6_ring_get_seqno;
        ring->set_seqno = ring_set_seqno;
 -      ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT |
 -              PM_VEBOX_CS_ERROR_INTERRUPT;
 +      ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
        ring->irq_get = hsw_vebox_get_irq;
        ring->irq_put = hsw_vebox_put_irq;
        ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
index fd4539d9ad2c6a00f38830be80d9a7fabe9fa604,d70e4a92773bfda96079b1406ffffa9a2f65a0e7..07b192fe15c6c6f9d0c23434280972616693ca48
@@@ -148,9 -148,7 +148,9 @@@ mgag200_bo_evict_flags(struct ttm_buffe
  
  static int mgag200_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
  {
 -      return 0;
 +      struct mgag200_bo *mgabo = mgag200_bo(bo);
 +
 +      return drm_vma_node_verify_access(&mgabo->gem.vma_node, filp);
  }
  
  static int mgag200_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
@@@ -323,7 -321,9 +323,8 @@@ int mgag200_bo_create(struct drm_devic
                return ret;
        }
  
 -      mgabo->gem.driver_private = NULL;
        mgabo->bo.bdev = &mdev->ttm.bdev;
+       mgabo->bo.bdev->dev_mapping = dev->dev_mapping;
  
        mgag200_ttm_placement(mgabo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
  
@@@ -354,6 -354,7 +355,7 @@@ int mgag200_bo_pin(struct mgag200_bo *b
                bo->pin_count++;
                if (gpu_addr)
                        *gpu_addr = mgag200_bo_gpu_offset(bo);
+               return 0;
        }
  
        mgag200_ttm_placement(bo, pl_flag);
index 2e7c5fd3de3d5752780d017cf5751d48c19b1fb2,ec9cd6f10f910aac9f56c4acbc4d9e09d393c85a..20f9a538746eeba0164bc73c69f9a67a6330e840
   */
  
  #include <subdev/mc.h>
 +#include <linux/pm_runtime.h>
  
  static irqreturn_t
  nouveau_mc_intr(int irq, void *arg)
  {
        struct nouveau_mc *pmc = arg;
        const struct nouveau_mc_intr *map = pmc->intr_map;
 +      struct nouveau_device *device = nv_device(pmc);
        struct nouveau_subdev *unit;
        u32 stat, intr;
  
        intr = stat = nv_rd32(pmc, 0x000100);
 +      if (intr == 0xffffffff)
 +              return IRQ_NONE;
        while (stat && map->stat) {
                if (stat & map->stat) {
                        unit = nouveau_subdev(pmc, map->unit);
@@@ -51,8 -47,6 +51,8 @@@
                nv_error(pmc, "unknown intr 0x%08x\n", stat);
        }
  
 +      if (stat == IRQ_HANDLED)
 +              pm_runtime_mark_last_busy(&device->pdev->dev);
        return stat ? IRQ_HANDLED : IRQ_NONE;
  }
  
@@@ -86,7 -80,9 +86,9 @@@ _nouveau_mc_dtor(struct nouveau_object 
  
  int
  nouveau_mc_create_(struct nouveau_object *parent, struct nouveau_object *engine,
-                  struct nouveau_oclass *oclass, int length, void **pobject)
+                  struct nouveau_oclass *oclass,
+                  const struct nouveau_mc_intr *intr_map,
+                  int length, void **pobject)
  {
        struct nouveau_device *device = nv_device(parent);
        struct nouveau_mc *pmc;
        if (ret)
                return ret;
  
+       pmc->intr_map = intr_map;
        ret = request_irq(device->pdev->irq, nouveau_mc_intr,
                          IRQF_SHARED, "nouveau", pmc);
        if (ret < 0)
index 6413552df21cd3296a16f78907230dd16d0827b3,6a13ffb53bdb642e989cfdc75a7bc5f8341d08d3..d4fbf11360febee34d2774c08233944c6b546fc4
@@@ -22,7 -22,6 +22,7 @@@
   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   * DEALINGS IN THE SOFTWARE.
   */
 +#include <linux/pm_runtime.h>
  
  #include <drm/drmP.h>
  #include <drm/drm_crtc_helper.h>
@@@ -607,6 -606,24 +607,24 @@@ nv_crtc_mode_set_regs(struct drm_crtc *
        regp->ramdac_a34 = 0x1;
  }
  
+ static int
+ nv_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
+ {
+       struct nv04_display *disp = nv04_display(crtc->dev);
+       struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb);
+       struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+       int ret;
+       ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM);
+       if (ret == 0) {
+               if (disp->image[nv_crtc->index])
+                       nouveau_bo_unpin(disp->image[nv_crtc->index]);
+               nouveau_bo_ref(nvfb->nvbo, &disp->image[nv_crtc->index]);
+       }
+       return ret;
+ }
  /**
   * Sets up registers for the given mode/adjusted_mode pair.
   *
@@@ -623,10 -640,15 +641,15 @@@ nv_crtc_mode_set(struct drm_crtc *crtc
        struct drm_device *dev = crtc->dev;
        struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
        struct nouveau_drm *drm = nouveau_drm(dev);
+       int ret;
  
        NV_DEBUG(drm, "CTRC mode on CRTC %d:\n", nv_crtc->index);
        drm_mode_debug_printmodeline(adjusted_mode);
  
+       ret = nv_crtc_swap_fbs(crtc, old_fb);
+       if (ret)
+               return ret;
        /* unlock must come after turning off FP_TG_CONTROL in output_prepare */
        nv_lock_vga_crtc_shadow(dev, nv_crtc->index, -1);
  
@@@ -723,6 -745,7 +746,7 @@@ static void nv_crtc_commit(struct drm_c
  
  static void nv_crtc_destroy(struct drm_crtc *crtc)
  {
+       struct nv04_display *disp = nv04_display(crtc->dev);
        struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
  
        if (!nv_crtc)
  
        drm_crtc_cleanup(crtc);
  
+       if (disp->image[nv_crtc->index])
+               nouveau_bo_unpin(disp->image[nv_crtc->index]);
+       nouveau_bo_ref(NULL, &disp->image[nv_crtc->index]);
        nouveau_bo_unmap(nv_crtc->cursor.nvbo);
        nouveau_bo_unpin(nv_crtc->cursor.nvbo);
        nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
@@@ -754,6 -781,16 +782,16 @@@ nv_crtc_gamma_load(struct drm_crtc *crt
        nouveau_hw_load_state_palette(dev, nv_crtc->index, &nv04_display(dev)->mode_reg);
  }
  
+ static void
+ nv_crtc_disable(struct drm_crtc *crtc)
+ {
+       struct nv04_display *disp = nv04_display(crtc->dev);
+       struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+       if (disp->image[nv_crtc->index])
+               nouveau_bo_unpin(disp->image[nv_crtc->index]);
+       nouveau_bo_ref(NULL, &disp->image[nv_crtc->index]);
+ }
  static void
  nv_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, uint32_t start,
                  uint32_t size)
@@@ -792,7 -829,6 +830,6 @@@ nv04_crtc_do_mode_set_base(struct drm_c
        struct drm_framebuffer *drm_fb;
        struct nouveau_framebuffer *fb;
        int arb_burst, arb_lwm;
-       int ret;
  
        NV_DEBUG(drm, "index %d\n", nv_crtc->index);
  
                return 0;
        }
  
        /* If atomic, we want to switch to the fb we were passed, so
-        * now we update pointers to do that.  (We don't pin; just
-        * assume we're already pinned and update the base address.)
+        * now we update pointers to do that.
         */
        if (atomic) {
                drm_fb = passed_fb;
        } else {
                drm_fb = crtc->fb;
                fb = nouveau_framebuffer(crtc->fb);
-               /* If not atomic, we can go ahead and pin, and unpin the
-                * old fb we were passed.
-                */
-               ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM);
-               if (ret)
-                       return ret;
-               if (passed_fb) {
-                       struct nouveau_framebuffer *ofb = nouveau_framebuffer(passed_fb);
-                       nouveau_bo_unpin(ofb->nvbo);
-               }
        }
  
        nv_crtc->fb.offset = fb->nvbo->bo.offset;
@@@ -878,6 -901,9 +902,9 @@@ static in
  nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
                        struct drm_framebuffer *old_fb)
  {
+       int ret = nv_crtc_swap_fbs(crtc, old_fb);
+       if (ret)
+               return ret;
        return nv04_crtc_do_mode_set_base(crtc, old_fb, x, y, false);
  }
  
@@@ -1008,59 -1034,13 +1035,59 @@@ nv04_crtc_cursor_move(struct drm_crtc *
        return 0;
  }
  
 +int
 +nouveau_crtc_set_config(struct drm_mode_set *set)
 +{
 +      struct drm_device *dev;
 +      struct nouveau_drm *drm;
 +      int ret;
 +      struct drm_crtc *crtc;
 +      bool active = false;
 +      if (!set || !set->crtc)
 +              return -EINVAL;
 +
 +      dev = set->crtc->dev;
 +
 +      /* get a pm reference here */
 +      ret = pm_runtime_get_sync(dev->dev);
 +      if (ret < 0)
 +              return ret;
 +
 +      ret = drm_crtc_helper_set_config(set);
 +
 +      drm = nouveau_drm(dev);
 +
 +      /* if we get here with no crtcs active then we can drop a reference */
 +      list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 +              if (crtc->enabled)
 +                      active = true;
 +      }
 +
 +      pm_runtime_mark_last_busy(dev->dev);
 +      /* if we have active crtcs and we don't have a power ref,
 +         take the current one */
 +      if (active && !drm->have_disp_power_ref) {
 +              drm->have_disp_power_ref = true;
 +              return ret;
 +      }
 +      /* if we have no active crtcs, then drop the power ref
 +         we got before */
 +      if (!active && drm->have_disp_power_ref) {
 +              pm_runtime_put_autosuspend(dev->dev);
 +              drm->have_disp_power_ref = false;
 +      }
 +      /* drop the power reference we got coming in here */
 +      pm_runtime_put_autosuspend(dev->dev);
 +      return ret;
 +}
 +
  static const struct drm_crtc_funcs nv04_crtc_funcs = {
        .save = nv_crtc_save,
        .restore = nv_crtc_restore,
        .cursor_set = nv04_crtc_cursor_set,
        .cursor_move = nv04_crtc_cursor_move,
        .gamma_set = nv_crtc_gamma_set,
 -      .set_config = drm_crtc_helper_set_config,
 +      .set_config = nouveau_crtc_set_config,
        .page_flip = nouveau_crtc_page_flip,
        .destroy = nv_crtc_destroy,
  };
@@@ -1074,6 -1054,7 +1101,7 @@@ static const struct drm_crtc_helper_fun
        .mode_set_base = nv04_crtc_mode_set_base,
        .mode_set_base_atomic = nv04_crtc_mode_set_base_atomic,
        .load_lut = nv_crtc_gamma_load,
+       .disable = nv_crtc_disable,
  };
  
  int
index e4444bacd0b2b2da14084c895eddbdd1cf9823b4,af20fba3a1a47ae441f8c587d79670d445ed6374..755c38d0627171e985ad87abe703b3407081648f
@@@ -198,7 -198,12 +198,12 @@@ nouveau_bo_new(struct drm_device *dev, 
        size_t acc_size;
        int ret;
        int type = ttm_bo_type_device;
-       int max_size = INT_MAX & ~((1 << drm->client.base.vm->vmm->lpg_shift) - 1);
+       int lpg_shift = 12;
+       int max_size;
+       if (drm->client.base.vm)
+               lpg_shift = drm->client.base.vm->vmm->lpg_shift;
+       max_size = INT_MAX & ~((1 << lpg_shift) - 1);
  
        if (size <= 0 || size > max_size) {
                nv_warn(drm, "skipped size %x\n", (u32)size);
@@@ -1260,9 -1265,7 +1265,9 @@@ out
  static int
  nouveau_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
  {
 -      return 0;
 +      struct nouveau_bo *nvbo = nouveau_bo(bo);
 +
 +      return drm_vma_node_verify_access(&nvbo->gem->vma_node, filp);
  }
  
  static int
index 44202bf7b81993dd3b2195d5659e1d47865c1fe9,a03e75deacafc23e05ff2c9982bd9e44e2380d76..77ffded688374f4e78518ed677d1dc059ecc3809
@@@ -394,7 -394,7 +394,7 @@@ nouveau_display_suspend(struct drm_devi
  
        nouveau_display_fini(dev);
  
 -      NV_INFO(drm, "unpinning framebuffer(s)...\n");
 +      NV_SUSPEND(drm, "unpinning framebuffer(s)...\n");
        list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
                struct nouveau_framebuffer *nouveau_fb;
  
  }
  
  void
 -nouveau_display_resume(struct drm_device *dev)
 +nouveau_display_repin(struct drm_device *dev)
  {
        struct nouveau_drm *drm = nouveau_drm(dev);
        struct drm_crtc *crtc;
                if (ret)
                        NV_ERROR(drm, "Could not pin/map cursor.\n");
        }
 +}
  
 -      nouveau_fbcon_set_suspend(dev, 0);
 -      nouveau_fbcon_zfill_all(dev);
 -
 +void
 +nouveau_display_resume(struct drm_device *dev)
 +{
 +      struct drm_crtc *crtc;
        nouveau_display_init(dev);
  
        /* Force CLUT to get re-loaded during modeset */
@@@ -521,8 -519,7 +521,8 @@@ fail
  
  int
  nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 -                     struct drm_pending_vblank_event *event)
 +                     struct drm_pending_vblank_event *event,
 +                     uint32_t page_flip_flags)
  {
        struct drm_device *dev = crtc->dev;
        struct nouveau_drm *drm = nouveau_drm(dev);
                ret = nv50_display_flip_next(crtc, fb, chan, 0);
                if (ret)
                        goto fail_unreserve;
+       } else {
+               struct nv04_display *dispnv04 = nv04_display(dev);
+               nouveau_bo_ref(new_bo, &dispnv04->image[nouveau_crtc(crtc)->index]);
        }
  
        ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence);
@@@ -676,6 -676,13 +679,6 @@@ nouveau_display_dumb_create(struct drm_
        return ret;
  }
  
 -int
 -nouveau_display_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
 -                           uint32_t handle)
 -{
 -      return drm_gem_handle_delete(file_priv, handle);
 -}
 -
  int
  nouveau_display_dumb_map_offset(struct drm_file *file_priv,
                                struct drm_device *dev,
        gem = drm_gem_object_lookup(dev, file_priv, handle);
        if (gem) {
                struct nouveau_bo *bo = gem->driver_private;
 -              *poffset = bo->bo.addr_space_offset;
 +              *poffset = drm_vma_node_offset_addr(&bo->bo.vma_node);
                drm_gem_object_unreference_unlocked(gem);
                return 0;
        }
index 3569d89b9e41c9c69250bffc8b75fce591b69653,16023986d30149e7264bc84947f8573686a93333..00885417ffffd2e8ad695a1a01059d40df68c011
@@@ -50,7 -50,7 +50,7 @@@ static char *pre_emph_names[] = 
   * or from atom. Note that atom operates on
   * dw units.
   */
static void radeon_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
  {
  #ifdef __BIG_ENDIAN
        u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */
@@@ -100,7 -100,7 +100,7 @@@ static int radeon_process_aux_ch(struc
  
        base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1);
  
-       radeon_copy_swap(base, send, send_bytes, true);
+       radeon_atom_copy_swap(base, send, send_bytes, true);
  
        args.v1.lpAuxRequest = cpu_to_le16((u16)(0 + 4));
        args.v1.lpDataOut = cpu_to_le16((u16)(16 + 4));
                recv_bytes = recv_size;
  
        if (recv && recv_size)
-               radeon_copy_swap(recv, base + 16, recv_bytes, false);
+               radeon_atom_copy_swap(recv, base + 16, recv_bytes, false);
  
        return recv_bytes;
  }
@@@ -585,7 -585,7 +585,7 @@@ static bool radeon_dp_get_link_status(s
                return false;
        }
  
 -      DRM_DEBUG_KMS("link status %*ph\n", 6, link_status);
 +      DRM_DEBUG_KMS("link status %6ph\n", link_status);
        return true;
  }
  
index 6adbc998349e152770d3b29e478448d12c2a24f5,a77b593185fb983a0304b10cfadddf9278e9c842..a3bba05872769fa77d1561681de9639aae95acf9
  #include "cikd.h"
  #include "atom.h"
  #include "cik_blit_shaders.h"
- /* GFX */
- #define CIK_PFP_UCODE_SIZE 2144
- #define CIK_ME_UCODE_SIZE 2144
- #define CIK_CE_UCODE_SIZE 2144
- /* compute */
- #define CIK_MEC_UCODE_SIZE 4192
- /* interrupts */
- #define BONAIRE_RLC_UCODE_SIZE 2048
- #define KB_RLC_UCODE_SIZE 2560
- #define KV_RLC_UCODE_SIZE 2560
- /* gddr controller */
- #define CIK_MC_UCODE_SIZE 7866
- /* sdma */
- #define CIK_SDMA_UCODE_SIZE 1050
- #define CIK_SDMA_UCODE_VERSION 64
+ #include "radeon_ucode.h"
+ #include "clearstate_ci.h"
  
  MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
@@@ -54,6 -40,7 +40,7 @@@ MODULE_FIRMWARE("radeon/BONAIRE_mec.bin
  MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
+ MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
@@@ -72,10 -59,61 +59,61 @@@ extern void r600_ih_ring_fini(struct ra
  extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
  extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
  extern bool evergreen_is_display_hung(struct radeon_device *rdev);
+ extern void sumo_rlc_fini(struct radeon_device *rdev);
+ extern int sumo_rlc_init(struct radeon_device *rdev);
  extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
- extern void si_rlc_fini(struct radeon_device *rdev);
- extern int si_rlc_init(struct radeon_device *rdev);
+ extern void si_rlc_reset(struct radeon_device *rdev);
+ extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
+ extern int cik_sdma_resume(struct radeon_device *rdev);
+ extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
+ extern void cik_sdma_fini(struct radeon_device *rdev);
+ extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
+                                struct radeon_ib *ib,
+                                uint64_t pe,
+                                uint64_t addr, unsigned count,
+                                uint32_t incr, uint32_t flags);
  static void cik_rlc_stop(struct radeon_device *rdev);
+ static void cik_pcie_gen3_enable(struct radeon_device *rdev);
+ static void cik_program_aspm(struct radeon_device *rdev);
+ static void cik_init_pg(struct radeon_device *rdev);
+ static void cik_init_cg(struct radeon_device *rdev);
+ /* get temperature in millidegrees */
+ int ci_get_temp(struct radeon_device *rdev)
+ {
+       u32 temp;
+       int actual_temp = 0;
+       temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
+               CTF_TEMP_SHIFT;
+       if (temp & 0x200)
+               actual_temp = 255;
+       else
+               actual_temp = temp & 0x1ff;
+       actual_temp = actual_temp * 1000;
+       return actual_temp;
+ }
+ /* get temperature in millidegrees */
+ int kv_get_temp(struct radeon_device *rdev)
+ {
+       u32 temp;
+       int actual_temp = 0;
+       temp = RREG32_SMC(0xC0300E0C);
+       if (temp)
+               actual_temp = (temp / 8) - 49;
+       else
+               actual_temp = 0;
+       actual_temp = actual_temp * 1000;
+       return actual_temp;
+ }
  
  /*
   * Indirect registers accessor
@@@ -98,6 -136,778 +136,778 @@@ void cik_pciep_wreg(struct radeon_devic
        (void)RREG32(PCIE_DATA);
  }
  
+ static const u32 spectre_rlc_save_restore_register_list[] =
+ {
+       (0x0e00 << 16) | (0xc12c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc140 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc150 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc15c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc168 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc170 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc178 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc204 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2b8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2bc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2c0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8228 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x829c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x869c >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x98f4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x98f8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9900 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc260 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x90e8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c000 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c00c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c1c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9700 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89bc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8900 >> 2),
+       0x00000000,
+       0x3,
+       (0x0e00 << 16) | (0xc130 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc134 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc1fc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc208 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc264 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc268 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc26c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc270 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc274 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc278 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc27c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc280 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc284 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc288 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc28c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc290 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc294 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc298 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc29c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2ac  >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2b0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x301d0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30238 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30250 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30254 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30258 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3025c >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc99c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9834 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f00 >> 2),
+       0x00000000,
+       (0x0001 << 16) | (0x30f00 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f04 >> 2),
+       0x00000000,
+       (0x0001 << 16) | (0x30f04 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f08 >> 2),
+       0x00000000,
+       (0x0001 << 16) | (0x30f08 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f0c >> 2),
+       0x00000000,
+       (0x0001 << 16) | (0x30f0c >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x9b7c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8a14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8a18 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8bf0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8bcc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8b24 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30a04 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a10 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a14 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a18 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a2c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc700 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc704 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc708 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc768 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc770 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc774 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc778 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc77c >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc780 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc784 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc788 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc78c >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc798 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc79c >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7a0 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7a4 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7a8 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7ac >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7b0 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc7b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9100 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c010 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92a8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92ac >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92b8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92bc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92c0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92c4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92c8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92cc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x92d0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c04 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c20 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c38 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c3c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xae00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9604 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac08 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac0c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac10 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac58 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac68 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac6c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac70 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac74 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac78 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac7c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac80 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac84 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac88 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac8c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x970c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9714 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9718 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x971c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x8e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x9e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0xae00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0xbe00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd10 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88bc >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0x89c0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88c4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88c8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8980 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30938 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3093c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30940 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89a0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30900 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30904 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c210 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c214 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c218 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8904 >> 2),
+       0x00000000,
+       0x5,
+       (0x0e00 << 16) | (0x8c28 >> 2),
+       (0x0e00 << 16) | (0x8c2c >> 2),
+       (0x0e00 << 16) | (0x8c30 >> 2),
+       (0x0e00 << 16) | (0x8c34 >> 2),
+       (0x0e00 << 16) | (0x9600 >> 2),
+ };
+ static const u32 kalindi_rlc_save_restore_register_list[] =
+ {
+       (0x0e00 << 16) | (0xc12c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc140 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc150 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc15c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc168 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc170 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc204 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2b8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2bc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2c0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8228 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x829c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x869c >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x98f4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x98f8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9900 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc260 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x90e8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c000 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c00c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c1c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9700 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xcd20 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89bc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8900 >> 2),
+       0x00000000,
+       0x3,
+       (0x0e00 << 16) | (0xc130 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc134 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc1fc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc208 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc264 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc268 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc26c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc270 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc274 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc28c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc290 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc294 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc298 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2a8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc2ac >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x301d0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30238 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30250 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30254 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30258 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3025c >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc900 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc904 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc908 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc90c >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0xc910 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc99c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9834 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f00 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f04 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f08 >> 2),
+       0x00000000,
+       (0x0000 << 16) | (0x30f0c >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x9b7c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8a14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8a18 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8bf0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8bcc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8b24 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30a04 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a10 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a14 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a18 >> 2),
+       0x00000000,
+       (0x0600 << 16) | (0x30a2c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc700 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc704 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc708 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xc768 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc770 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc774 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc798 >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0xc79c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9100 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c010 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c04 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c20 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c38 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8c3c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xae00 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9604 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac08 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac0c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac10 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac58 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac68 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac6c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac70 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac74 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac78 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac7c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac80 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac84 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac88 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xac8c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x970c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9714 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x9718 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x971c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x4e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x5e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x6e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x7e00 << 16) | (0x31068 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd10 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0xcd14 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88b8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88bc >> 2),
+       0x00000000,
+       (0x0400 << 16) | (0x89c0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88c4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88c8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x88d8 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8980 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30938 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3093c >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30940 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89a0 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30900 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x30904 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x89b4 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3e1fc >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c210 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c214 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x3c218 >> 2),
+       0x00000000,
+       (0x0e00 << 16) | (0x8904 >> 2),
+       0x00000000,
+       0x5,
+       (0x0e00 << 16) | (0x8c28 >> 2),
+       (0x0e00 << 16) | (0x8c2c >> 2),
+       (0x0e00 << 16) | (0x8c30 >> 2),
+       (0x0e00 << 16) | (0x8c34 >> 2),
+       (0x0e00 << 16) | (0x9600 >> 2),
+ };
  static const u32 bonaire_golden_spm_registers[] =
  {
        0x30800, 0xe0ffffff, 0xe0000000
@@@ -744,7 -1554,7 +1554,7 @@@ static int cik_init_microcode(struct ra
        const char *chip_name;
        size_t pfp_req_size, me_req_size, ce_req_size,
                mec_req_size, rlc_req_size, mc_req_size,
-               sdma_req_size;
+               sdma_req_size, smc_req_size;
        char fw_name[30];
        int err;
  
                rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
                mc_req_size = CIK_MC_UCODE_SIZE * 4;
                sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
+               smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
                break;
        case CHIP_KAVERI:
                chip_name = "KAVERI";
                err = -EINVAL;
        }
  
-       /* No MC ucode on APUs */
+       /* No SMC, MC ucode on APUs */
        if (!(rdev->flags & RADEON_IS_IGP)) {
                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
                err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
                               rdev->mc_fw->size, fw_name);
                        err = -EINVAL;
                }
+               snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
+               err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
+               if (err) {
+                       printk(KERN_ERR
+                              "smc: error loading firmware \"%s\"\n",
+                              fw_name);
+                       release_firmware(rdev->smc_fw);
+                       rdev->smc_fw = NULL;
+               } else if (rdev->smc_fw->size != smc_req_size) {
+                       printk(KERN_ERR
+                              "cik_smc: Bogus length %zu in firmware \"%s\"\n",
+                              rdev->smc_fw->size, fw_name);
+                       err = -EINVAL;
+               }
        }
  
  out:
                rdev->rlc_fw = NULL;
                release_firmware(rdev->mc_fw);
                rdev->mc_fw = NULL;
+               release_firmware(rdev->smc_fw);
+               rdev->smc_fw = NULL;
        }
        return err;
  }
@@@ -1880,7 -2708,46 +2708,46 @@@ static void cik_gpu_init(struct radeon_
                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
                break;
        case CHIP_KAVERI:
-               /* TODO */
+               rdev->config.cik.max_shader_engines = 1;
+               rdev->config.cik.max_tile_pipes = 4;
+               if ((rdev->pdev->device == 0x1304) ||
+                   (rdev->pdev->device == 0x1305) ||
+                   (rdev->pdev->device == 0x130C) ||
+                   (rdev->pdev->device == 0x130F) ||
+                   (rdev->pdev->device == 0x1310) ||
+                   (rdev->pdev->device == 0x1311) ||
+                   (rdev->pdev->device == 0x131C)) {
+                       rdev->config.cik.max_cu_per_sh = 8;
+                       rdev->config.cik.max_backends_per_se = 2;
+               } else if ((rdev->pdev->device == 0x1309) ||
+                          (rdev->pdev->device == 0x130A) ||
+                          (rdev->pdev->device == 0x130D) ||
+                          (rdev->pdev->device == 0x1313)) {
+                       rdev->config.cik.max_cu_per_sh = 6;
+                       rdev->config.cik.max_backends_per_se = 2;
+               } else if ((rdev->pdev->device == 0x1306) ||
+                          (rdev->pdev->device == 0x1307) ||
+                          (rdev->pdev->device == 0x130B) ||
+                          (rdev->pdev->device == 0x130E) ||
+                          (rdev->pdev->device == 0x1315) ||
+                          (rdev->pdev->device == 0x131B)) {
+                       rdev->config.cik.max_cu_per_sh = 4;
+                       rdev->config.cik.max_backends_per_se = 1;
+               } else {
+                       rdev->config.cik.max_cu_per_sh = 3;
+                       rdev->config.cik.max_backends_per_se = 1;
+               }
+               rdev->config.cik.max_sh_per_se = 1;
+               rdev->config.cik.max_texture_channel_caches = 4;
+               rdev->config.cik.max_gprs = 256;
+               rdev->config.cik.max_gs_threads = 16;
+               rdev->config.cik.max_hw_contexts = 8;
+               rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
+               rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
+               rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
+               rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
+               gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
                break;
        case CHIP_KABINI:
        default:
@@@ -2535,8 -3402,8 +3402,8 @@@ static int cik_cp_gfx_resume(struct rad
        /* ring 0 - compute and gfx */
        /* Set ring buffer size */
        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 -      rb_bufsz = drm_order(ring->ring_size / 8);
 -      tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 +      rb_bufsz = order_base_2(ring->ring_size / 8);
 +      tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
  #ifdef __BIG_ENDIAN
        tmp |= BUF_SWAP_32BIT;
  #endif
@@@ -2587,11 -3454,12 +3454,12 @@@ u32 cik_compute_ring_get_rptr(struct ra
        if (rdev->wb.enabled) {
                rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
        } else {
+               mutex_lock(&rdev->srbm_mutex);
                cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
                rptr = RREG32(CP_HQD_PQ_RPTR);
                cik_srbm_select(rdev, 0, 0, 0, 0);
+               mutex_unlock(&rdev->srbm_mutex);
        }
-       rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
  
        return rptr;
  }
@@@ -2604,11 -3472,12 +3472,12 @@@ u32 cik_compute_ring_get_wptr(struct ra
        if (rdev->wb.enabled) {
                wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
        } else {
+               mutex_lock(&rdev->srbm_mutex);
                cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
                wptr = RREG32(CP_HQD_PQ_WPTR);
                cik_srbm_select(rdev, 0, 0, 0, 0);
+               mutex_unlock(&rdev->srbm_mutex);
        }
-       wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
  
        return wptr;
  }
  void cik_compute_ring_set_wptr(struct radeon_device *rdev,
                               struct radeon_ring *ring)
  {
-       u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
-       rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
-       WDOORBELL32(ring->doorbell_offset, wptr);
+       rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
+       WDOORBELL32(ring->doorbell_offset, ring->wptr);
  }
  
  /**
@@@ -2897,6 -3764,7 +3764,7 @@@ static int cik_cp_compute_resume(struc
        WREG32(CP_CPF_DEBUG, tmp);
  
        /* init the pipes */
+       mutex_lock(&rdev->srbm_mutex);
        for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
                int me = (i < 4) ? 1 : 2;
                int pipe = (i < 4) ? i : (i - 4);
                /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
                tmp = RREG32(CP_HPD_EOP_CONTROL);
                tmp &= ~EOP_SIZE_MASK;
 -              tmp |= drm_order(MEC_HPD_SIZE / 8);
 +              tmp |= order_base_2(MEC_HPD_SIZE / 8);
                WREG32(CP_HPD_EOP_CONTROL, tmp);
        }
        cik_srbm_select(rdev, 0, 0, 0, 0);
+       mutex_unlock(&rdev->srbm_mutex);
  
        /* init the queues.  Just two for now. */
        for (i = 0; i < 2; i++) {
                mqd->static_thread_mgmt23[0] = 0xffffffff;
                mqd->static_thread_mgmt23[1] = 0xffffffff;
  
+               mutex_lock(&rdev->srbm_mutex);
                cik_srbm_select(rdev, rdev->ring[idx].me,
                                rdev->ring[idx].pipe,
                                rdev->ring[idx].queue, 0);
                        ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
  
                mqd->queue_state.cp_hqd_pq_control |=
 -                      drm_order(rdev->ring[idx].ring_size / 8);
 +                      order_base_2(rdev->ring[idx].ring_size / 8);
                mqd->queue_state.cp_hqd_pq_control |=
 -                      (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
 +                      (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
  #ifdef __BIG_ENDIAN
                mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
  #endif
                WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
  
                cik_srbm_select(rdev, 0, 0, 0, 0);
+               mutex_unlock(&rdev->srbm_mutex);
  
                radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
                radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
@@@ -3142,13 -4013,6 +4013,6 @@@ static int cik_cp_resume(struct radeon_
  {
        int r;
  
-       /* Reset all cp blocks */
-       WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
-       RREG32(GRBM_SOFT_RESET);
-       mdelay(15);
-       WREG32(GRBM_SOFT_RESET, 0);
-       RREG32(GRBM_SOFT_RESET);
        r = cik_cp_load_microcode(rdev);
        if (r)
                return r;
        return 0;
  }
  
- /*
-  * sDMA - System DMA
-  * Starting with CIK, the GPU has new asynchronous
-  * DMA engines.  These engines are used for compute
-  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
-  * and each one supports 1 ring buffer used for gfx
-  * and 2 queues used for compute.
-  *
-  * The programming model is very similar to the CP
-  * (ring buffer, IBs, etc.), but sDMA has it's own
-  * packet format that is different from the PM4 format
-  * used by the CP. sDMA supports copying data, writing
-  * embedded data, solid fills, and a number of other
-  * things.  It also has support for tiling/detiling of
-  * buffers.
-  */
+ static void cik_print_gpu_status_regs(struct radeon_device *rdev)
+ {
+       dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
+               RREG32(GRBM_STATUS));
+       dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
+               RREG32(GRBM_STATUS2));
+       dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
+               RREG32(GRBM_STATUS_SE0));
+       dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
+               RREG32(GRBM_STATUS_SE1));
+       dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
+               RREG32(GRBM_STATUS_SE2));
+       dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
+               RREG32(GRBM_STATUS_SE3));
+       dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
+               RREG32(SRBM_STATUS));
+       dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
+               RREG32(SRBM_STATUS2));
+       dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
+               RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
+       dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
+                RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
+       dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
+       dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
+                RREG32(CP_STALLED_STAT1));
+       dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
+                RREG32(CP_STALLED_STAT2));
+       dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
+                RREG32(CP_STALLED_STAT3));
+       dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
+                RREG32(CP_CPF_BUSY_STAT));
+       dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
+                RREG32(CP_CPF_STALLED_STAT1));
+       dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
+       dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
+       dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
+                RREG32(CP_CPC_STALLED_STAT1));
+       dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
+ }
  /**
-  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
+  * cik_gpu_check_soft_reset - check which blocks are busy
   *
   * @rdev: radeon_device pointer
-  * @ib: IB object to schedule
   *
-  * Schedule an IB in the DMA ring (CIK).
+  * Check which blocks are busy and return the relevant reset
+  * mask to be used by cik_gpu_soft_reset().
+  * Returns a mask of the blocks to be reset.
   */
- void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
-                             struct radeon_ib *ib)
+ u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
  {
-       struct radeon_ring *ring = &rdev->ring[ib->ring];
-       u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+       u32 reset_mask = 0;
+       u32 tmp;
  
-       if (rdev->wb.enabled) {
-               u32 next_rptr = ring->wptr + 5;
-               while ((next_rptr & 7) != 4)
-                       next_rptr++;
-               next_rptr += 4;
-               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
-               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-               radeon_ring_write(ring, 1); /* number of DWs to follow */
-               radeon_ring_write(ring, next_rptr);
-       }
+       /* GRBM_STATUS */
+       tmp = RREG32(GRBM_STATUS);
+       if (tmp & (PA_BUSY | SC_BUSY |
+                  BCI_BUSY | SX_BUSY |
+                  TA_BUSY | VGT_BUSY |
+                  DB_BUSY | CB_BUSY |
+                  GDS_BUSY | SPI_BUSY |
+                  IA_BUSY | IA_BUSY_NO_DMA))
+               reset_mask |= RADEON_RESET_GFX;
  
-       /* IB packet must end on a 8 DW boundary */
-       while ((ring->wptr & 7) != 4)
-               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
-       radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
-       radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
-       radeon_ring_write(ring, ib->length_dw);
+       if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
+               reset_mask |= RADEON_RESET_CP;
  
- }
+       /* GRBM_STATUS2 */
+       tmp = RREG32(GRBM_STATUS2);
+       if (tmp & RLC_BUSY)
+               reset_mask |= RADEON_RESET_RLC;
  
- /**
-  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
-  *
-  * @rdev: radeon_device pointer
-  * @fence: radeon fence object
-  *
-  * Add a DMA fence packet to the ring to write
-  * the fence seq number and DMA trap packet to generate
-  * an interrupt if needed (CIK).
-  */
- void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
-                             struct radeon_fence *fence)
- {
-       struct radeon_ring *ring = &rdev->ring[fence->ring];
-       u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
-       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
-                         SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
-       u32 ref_and_mask;
+       /* SDMA0_STATUS_REG */
+       tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
+       if (!(tmp & SDMA_IDLE))
+               reset_mask |= RADEON_RESET_DMA;
  
-       if (fence->ring == R600_RING_TYPE_DMA_INDEX)
-               ref_and_mask = SDMA0;
-       else
-               ref_and_mask = SDMA1;
+       /* SDMA1_STATUS_REG */
+       tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
+       if (!(tmp & SDMA_IDLE))
+               reset_mask |= RADEON_RESET_DMA1;
  
-       /* write the fence */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
-       radeon_ring_write(ring, addr & 0xffffffff);
-       radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
-       radeon_ring_write(ring, fence->seq);
-       /* generate an interrupt */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
-       /* flush HDP */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
-       radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
-       radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
-       radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
-       radeon_ring_write(ring, ref_and_mask); /* MASK */
-       radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
- }
+       /* SRBM_STATUS2 */
+       tmp = RREG32(SRBM_STATUS2);
+       if (tmp & SDMA_BUSY)
+               reset_mask |= RADEON_RESET_DMA;
  
- /**
-  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
-  *
-  * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
-  * @semaphore: radeon semaphore object
-  * @emit_wait: wait or signal semaphore
-  *
-  * Add a DMA semaphore packet to the ring wait on or signal
-  * other rings (CIK).
-  */
- void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
-                                 struct radeon_ring *ring,
-                                 struct radeon_semaphore *semaphore,
-                                 bool emit_wait)
- {
-       u64 addr = semaphore->gpu_addr;
-       u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
+       if (tmp & SDMA1_BUSY)
+               reset_mask |= RADEON_RESET_DMA1;
  
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
-       radeon_ring_write(ring, addr & 0xfffffff8);
-       radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
- }
+       /* SRBM_STATUS */
+       tmp = RREG32(SRBM_STATUS);
  
- /**
-  * cik_sdma_gfx_stop - stop the gfx async dma engines
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Stop the gfx async dma ring buffers (CIK).
-  */
- static void cik_sdma_gfx_stop(struct radeon_device *rdev)
- {
-       u32 rb_cntl, reg_offset;
-       int i;
+       if (tmp & IH_BUSY)
+               reset_mask |= RADEON_RESET_IH;
  
-       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+       if (tmp & SEM_BUSY)
+               reset_mask |= RADEON_RESET_SEM;
  
-       for (i = 0; i < 2; i++) {
-               if (i == 0)
-                       reg_offset = SDMA0_REGISTER_OFFSET;
-               else
-                       reg_offset = SDMA1_REGISTER_OFFSET;
-               rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
-               rb_cntl &= ~SDMA_RB_ENABLE;
-               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
-               WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
-       }
- }
+       if (tmp & GRBM_RQ_PENDING)
+               reset_mask |= RADEON_RESET_GRBM;
  
- /**
-  * cik_sdma_rlc_stop - stop the compute async dma engines
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Stop the compute async dma queues (CIK).
-  */
- static void cik_sdma_rlc_stop(struct radeon_device *rdev)
- {
-       /* XXX todo */
- }
+       if (tmp & VMC_BUSY)
+               reset_mask |= RADEON_RESET_VMC;
  
- /**
-  * cik_sdma_enable - stop the async dma engines
-  *
-  * @rdev: radeon_device pointer
-  * @enable: enable/disable the DMA MEs.
-  *
-  * Halt or unhalt the async dma engines (CIK).
-  */
- static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
- {
-       u32 me_cntl, reg_offset;
-       int i;
+       if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
+                  MCC_BUSY | MCD_BUSY))
+               reset_mask |= RADEON_RESET_MC;
  
-       for (i = 0; i < 2; i++) {
-               if (i == 0)
-                       reg_offset = SDMA0_REGISTER_OFFSET;
-               else
-                       reg_offset = SDMA1_REGISTER_OFFSET;
-               me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
-               if (enable)
-                       me_cntl &= ~SDMA_HALT;
-               else
-                       me_cntl |= SDMA_HALT;
-               WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
+       if (evergreen_is_display_hung(rdev))
+               reset_mask |= RADEON_RESET_DISPLAY;
+       /* Skip MC reset as it's mostly likely not hung, just busy */
+       if (reset_mask & RADEON_RESET_MC) {
+               DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
+               reset_mask &= ~RADEON_RESET_MC;
        }
+       return reset_mask;
  }
  
  /**
-  * cik_sdma_gfx_resume - setup and start the async dma engines
+  * cik_gpu_soft_reset - soft reset GPU
   *
   * @rdev: radeon_device pointer
+  * @reset_mask: mask of which blocks to reset
   *
-  * Set up the gfx DMA ring buffers and enable them (CIK).
-  * Returns 0 for success, error for failure.
+  * Soft reset the blocks specified in @reset_mask.
   */
- static int cik_sdma_gfx_resume(struct radeon_device *rdev)
+ static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
  {
-       struct radeon_ring *ring;
-       u32 rb_cntl, ib_cntl;
-       u32 rb_bufsz;
-       u32 reg_offset, wb_offset;
-       int i, r;
+       struct evergreen_mc_save save;
+       u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+       u32 tmp;
  
-       for (i = 0; i < 2; i++) {
-               if (i == 0) {
-                       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
-                       reg_offset = SDMA0_REGISTER_OFFSET;
-                       wb_offset = R600_WB_DMA_RPTR_OFFSET;
-               } else {
-                       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
-                       reg_offset = SDMA1_REGISTER_OFFSET;
-                       wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
-               }
+       if (reset_mask == 0)
+               return;
  
-               WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
-               WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+       dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
  
-               /* Set ring buffer size in dwords */
-               rb_bufsz = order_base_2(ring->ring_size / 4);
-               rb_cntl = rb_bufsz << 1;
- #ifdef __BIG_ENDIAN
-               rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
- #endif
-               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
+       cik_print_gpu_status_regs(rdev);
+       dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
+                RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+       dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+                RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
  
-               /* Initialize the ring buffer's read and write pointers */
-               WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
-               WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
+       /* stop the rlc */
+       cik_rlc_stop(rdev);
  
-               /* set the wb address whether it's enabled or not */
-               WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
-                      upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
-               WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
-                      ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+       /* Disable GFX parsing/prefetching */
+       WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
  
-               if (rdev->wb.enabled)
-                       rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
+       /* Disable MEC parsing/prefetching */
+       WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
  
-               WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
-               WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
+       if (reset_mask & RADEON_RESET_DMA) {
+               /* sdma0 */
+               tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
+               tmp |= SDMA_HALT;
+               WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
+       }
+       if (reset_mask & RADEON_RESET_DMA1) {
+               /* sdma1 */
+               tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
+               tmp |= SDMA_HALT;
+               WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
+       }
  
-               ring->wptr = 0;
-               WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
+       evergreen_mc_stop(rdev, &save);
+       if (evergreen_mc_wait_for_idle(rdev)) {
+               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+       }
  
-               ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
+       if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
+               grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
  
-               /* enable DMA RB */
-               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
+       if (reset_mask & RADEON_RESET_CP) {
+               grbm_soft_reset |= SOFT_RESET_CP;
  
-               ib_cntl = SDMA_IB_ENABLE;
- #ifdef __BIG_ENDIAN
-               ib_cntl |= SDMA_IB_SWAP_ENABLE;
- #endif
-               /* enable DMA IBs */
-               WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
+               srbm_soft_reset |= SOFT_RESET_GRBM;
+       }
  
-               ring->ready = true;
+       if (reset_mask & RADEON_RESET_DMA)
+               srbm_soft_reset |= SOFT_RESET_SDMA;
  
-               r = radeon_ring_test(rdev, ring->idx, ring);
-               if (r) {
-                       ring->ready = false;
-                       return r;
-               }
+       if (reset_mask & RADEON_RESET_DMA1)
+               srbm_soft_reset |= SOFT_RESET_SDMA1;
+       if (reset_mask & RADEON_RESET_DISPLAY)
+               srbm_soft_reset |= SOFT_RESET_DC;
+       if (reset_mask & RADEON_RESET_RLC)
+               grbm_soft_reset |= SOFT_RESET_RLC;
+       if (reset_mask & RADEON_RESET_SEM)
+               srbm_soft_reset |= SOFT_RESET_SEM;
+       if (reset_mask & RADEON_RESET_IH)
+               srbm_soft_reset |= SOFT_RESET_IH;
+       if (reset_mask & RADEON_RESET_GRBM)
+               srbm_soft_reset |= SOFT_RESET_GRBM;
+       if (reset_mask & RADEON_RESET_VMC)
+               srbm_soft_reset |= SOFT_RESET_VMC;
+       if (!(rdev->flags & RADEON_IS_IGP)) {
+               if (reset_mask & RADEON_RESET_MC)
+                       srbm_soft_reset |= SOFT_RESET_MC;
        }
  
-       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+       if (grbm_soft_reset) {
+               tmp = RREG32(GRBM_SOFT_RESET);
+               tmp |= grbm_soft_reset;
+               dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+               WREG32(GRBM_SOFT_RESET, tmp);
+               tmp = RREG32(GRBM_SOFT_RESET);
  
-       return 0;
- }
+               udelay(50);
  
- /**
-  * cik_sdma_rlc_resume - setup and start the async dma engines
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Set up the compute DMA queues and enable them (CIK).
-  * Returns 0 for success, error for failure.
-  */
- static int cik_sdma_rlc_resume(struct radeon_device *rdev)
- {
-       /* XXX todo */
-       return 0;
- }
+               tmp &= ~grbm_soft_reset;
+               WREG32(GRBM_SOFT_RESET, tmp);
+               tmp = RREG32(GRBM_SOFT_RESET);
+       }
  
- /**
-  * cik_sdma_load_microcode - load the sDMA ME ucode
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Loads the sDMA0/1 ucode.
-  * Returns 0 for success, -EINVAL if the ucode is not available.
-  */
- static int cik_sdma_load_microcode(struct radeon_device *rdev)
- {
-       const __be32 *fw_data;
-       int i;
+       if (srbm_soft_reset) {
+               tmp = RREG32(SRBM_SOFT_RESET);
+               tmp |= srbm_soft_reset;
+               dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+               WREG32(SRBM_SOFT_RESET, tmp);
+               tmp = RREG32(SRBM_SOFT_RESET);
  
-       if (!rdev->sdma_fw)
-               return -EINVAL;
+               udelay(50);
  
-       /* stop the gfx rings and rlc compute queues */
-       cik_sdma_gfx_stop(rdev);
-       cik_sdma_rlc_stop(rdev);
+               tmp &= ~srbm_soft_reset;
+               WREG32(SRBM_SOFT_RESET, tmp);
+               tmp = RREG32(SRBM_SOFT_RESET);
+       }
  
-       /* halt the MEs */
-       cik_sdma_enable(rdev, false);
+       /* Wait a little for things to settle down */
+       udelay(50);
  
-       /* sdma0 */
-       fw_data = (const __be32 *)rdev->sdma_fw->data;
-       WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
-       for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
-               WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
-       WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
-       /* sdma1 */
-       fw_data = (const __be32 *)rdev->sdma_fw->data;
-       WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
-       for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
-               WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
-       WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
-       WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
-       WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
-       return 0;
+       evergreen_mc_resume(rdev, &save);
+       udelay(50);
+       cik_print_gpu_status_regs(rdev);
  }
  
  /**
-  * cik_sdma_resume - setup and start the async dma engines
+  * cik_asic_reset - soft reset GPU
   *
   * @rdev: radeon_device pointer
   *
-  * Set up the DMA engines and enable them (CIK).
-  * Returns 0 for success, error for failure.
+  * Look up which blocks are hung and attempt
+  * to reset them.
+  * Returns 0 for success.
   */
static int cik_sdma_resume(struct radeon_device *rdev)
int cik_asic_reset(struct radeon_device *rdev)
  {
-       int r;
+       u32 reset_mask;
  
-       /* Reset dma */
-       WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
-       RREG32(SRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(SRBM_SOFT_RESET, 0);
-       RREG32(SRBM_SOFT_RESET);
+       reset_mask = cik_gpu_check_soft_reset(rdev);
  
-       r = cik_sdma_load_microcode(rdev);
-       if (r)
-               return r;
+       if (reset_mask)
+               r600_set_bios_scratch_engine_hung(rdev, true);
  
-       /* unhalt the MEs */
-       cik_sdma_enable(rdev, true);
+       cik_gpu_soft_reset(rdev, reset_mask);
  
-       /* start the gfx rings and rlc compute queues */
-       r = cik_sdma_gfx_resume(rdev);
-       if (r)
-               return r;
-       r = cik_sdma_rlc_resume(rdev);
-       if (r)
-               return r;
+       reset_mask = cik_gpu_check_soft_reset(rdev);
+       if (!reset_mask)
+               r600_set_bios_scratch_engine_hung(rdev, false);
  
        return 0;
  }
  
  /**
-  * cik_sdma_fini - tear down the async dma engines
+  * cik_gfx_is_lockup - check if the 3D engine is locked up
   *
   * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
   *
-  * Stop the async dma engines and free the rings (CIK).
+  * Check if the 3D engine is locked up (CIK).
+  * Returns true if the engine is locked, false if not.
   */
static void cik_sdma_fini(struct radeon_device *rdev)
bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
  {
-       /* stop the gfx rings and rlc compute queues */
-       cik_sdma_gfx_stop(rdev);
-       cik_sdma_rlc_stop(rdev);
-       /* halt the MEs */
-       cik_sdma_enable(rdev, false);
-       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
-       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
-       /* XXX - compute dma queue tear down */
- }
- /**
-  * cik_copy_dma - copy pages using the DMA engine
-  *
-  * @rdev: radeon_device pointer
-  * @src_offset: src GPU address
-  * @dst_offset: dst GPU address
-  * @num_gpu_pages: number of GPU pages to xfer
-  * @fence: radeon fence object
-  *
-  * Copy GPU paging using the DMA engine (CIK).
-  * Used by the radeon ttm implementation to move pages if
-  * registered as the asic copy callback.
-  */
- int cik_copy_dma(struct radeon_device *rdev,
-                uint64_t src_offset, uint64_t dst_offset,
-                unsigned num_gpu_pages,
-                struct radeon_fence **fence)
- {
-       struct radeon_semaphore *sem = NULL;
-       int ring_index = rdev->asic->copy.dma_ring_index;
-       struct radeon_ring *ring = &rdev->ring[ring_index];
-       u32 size_in_bytes, cur_size_in_bytes;
-       int i, num_loops;
-       int r = 0;
-       r = radeon_semaphore_create(rdev, &sem);
-       if (r) {
-               DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
-       }
-       size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
-       num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
-       r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
-       if (r) {
-               DRM_ERROR("radeon: moving bo (%d).\n", r);
-               radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
-       }
+       u32 reset_mask = cik_gpu_check_soft_reset(rdev);
  
-       if (radeon_fence_need_sync(*fence, ring->idx)) {
-               radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
-                                           ring->idx);
-               radeon_fence_note_sync(*fence, ring->idx);
-       } else {
-               radeon_semaphore_free(rdev, &sem, NULL);
-       }
-       for (i = 0; i < num_loops; i++) {
-               cur_size_in_bytes = size_in_bytes;
-               if (cur_size_in_bytes > 0x1fffff)
-                       cur_size_in_bytes = 0x1fffff;
-               size_in_bytes -= cur_size_in_bytes;
-               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
-               radeon_ring_write(ring, cur_size_in_bytes);
-               radeon_ring_write(ring, 0); /* src/dst endian swap */
-               radeon_ring_write(ring, src_offset & 0xffffffff);
-               radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
-               radeon_ring_write(ring, dst_offset & 0xfffffffc);
-               radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
-               src_offset += cur_size_in_bytes;
-               dst_offset += cur_size_in_bytes;
-       }
-       r = radeon_fence_emit(rdev, fence, ring->idx);
-       if (r) {
-               radeon_ring_unlock_undo(rdev, ring);
-               return r;
+       if (!(reset_mask & (RADEON_RESET_GFX |
+                           RADEON_RESET_COMPUTE |
+                           RADEON_RESET_CP))) {
+               radeon_ring_lockup_update(ring);
+               return false;
        }
-       radeon_ring_unlock_commit(rdev, ring);
-       radeon_semaphore_free(rdev, &sem, *fence);
-       return r;
+       /* force CP activities */
+       radeon_ring_force_activity(rdev, ring);
+       return radeon_ring_test_lockup(rdev, ring);
  }
  
+ /* MC */
  /**
-  * cik_sdma_ring_test - simple async dma engine test
+  * cik_mc_program - program the GPU memory controller
   *
   * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
   *
-  * Test the DMA engine by writing using it to write an
-  * value to memory. (CIK).
-  * Returns 0 for success, error for failure.
+  * Set the location of vram, gart, and AGP in the GPU's
+  * physical address space (CIK).
   */
- int cik_sdma_ring_test(struct radeon_device *rdev,
-                      struct radeon_ring *ring)
+ static void cik_mc_program(struct radeon_device *rdev)
  {
-       unsigned i;
-       int r;
-       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+       struct evergreen_mc_save save;
        u32 tmp;
+       int i, j;
  
-       if (!ptr) {
-               DRM_ERROR("invalid vram scratch pointer\n");
-               return -EINVAL;
-       }
-       tmp = 0xCAFEDEAD;
-       writel(tmp, ptr);
-       r = radeon_ring_lock(rdev, ring, 4);
-       if (r) {
-               DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
-               return r;
+       /* Initialize HDP */
+       for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+               WREG32((0x2c14 + j), 0x00000000);
+               WREG32((0x2c18 + j), 0x00000000);
+               WREG32((0x2c1c + j), 0x00000000);
+               WREG32((0x2c20 + j), 0x00000000);
+               WREG32((0x2c24 + j), 0x00000000);
        }
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
-       radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
-       radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
-       radeon_ring_write(ring, 1); /* number of DWs to follow */
-       radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
+       WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
  
-       for (i = 0; i < rdev->usec_timeout; i++) {
-               tmp = readl(ptr);
-               if (tmp == 0xDEADBEEF)
-                       break;
-               DRM_UDELAY(1);
+       evergreen_mc_stop(rdev, &save);
+       if (radeon_mc_wait_for_idle(rdev)) {
+               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
        }
-       if (i < rdev->usec_timeout) {
-               DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
-       } else {
-               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
-                         ring->idx, tmp);
-               r = -EINVAL;
+       /* Lockout access through VGA aperture*/
+       WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+       /* Update configuration */
+       WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+              rdev->mc.vram_start >> 12);
+       WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+              rdev->mc.vram_end >> 12);
+       WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
+              rdev->vram_scratch.gpu_addr >> 12);
+       tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
+       tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+       WREG32(MC_VM_FB_LOCATION, tmp);
+       /* XXX double check these! */
+       WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+       WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
+       WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
+       WREG32(MC_VM_AGP_BASE, 0);
+       WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+       WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+       if (radeon_mc_wait_for_idle(rdev)) {
+               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
        }
-       return r;
+       evergreen_mc_resume(rdev, &save);
+       /* we need to own VRAM, so turn off the VGA renderer here
+        * to stop it overwriting our objects */
+       rv515_vga_render_disable(rdev);
  }
  
  /**
-  * cik_sdma_ib_test - test an IB on the DMA engine
+  * cik_mc_init - initialize the memory controller driver params
   *
   * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
   *
-  * Test a simple IB in the DMA ring (CIK).
-  * Returns 0 on success, error on failure.
+  * Look up the amount of vram, vram width, and decide how to place
+  * vram and gart within the GPU's physical address space (CIK).
+  * Returns 0 for success.
   */
int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
static int cik_mc_init(struct radeon_device *rdev)
  {
-       struct radeon_ib ib;
-       unsigned i;
-       int r;
-       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
-       u32 tmp = 0;
-       if (!ptr) {
-               DRM_ERROR("invalid vram scratch pointer\n");
-               return -EINVAL;
-       }
-       tmp = 0xCAFEDEAD;
-       writel(tmp, ptr);
-       r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
-       if (r) {
-               DRM_ERROR("radeon: failed to get ib (%d).\n", r);
-               return r;
-       }
-       ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
-       ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
-       ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
-       ib.ptr[3] = 1;
-       ib.ptr[4] = 0xDEADBEEF;
-       ib.length_dw = 5;
+       u32 tmp;
+       int chansize, numchan;
  
-       r = radeon_ib_schedule(rdev, &ib, NULL);
-       if (r) {
-               radeon_ib_free(rdev, &ib);
-               DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
-               return r;
-       }
-       r = radeon_fence_wait(ib.fence, false);
-       if (r) {
-               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
-               return r;
-       }
-       for (i = 0; i < rdev->usec_timeout; i++) {
-               tmp = readl(ptr);
-               if (tmp == 0xDEADBEEF)
-                       break;
-               DRM_UDELAY(1);
-       }
-       if (i < rdev->usec_timeout) {
-               DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+       /* Get VRAM informations */
+       rdev->mc.vram_is_ddr = true;
+       tmp = RREG32(MC_ARB_RAMCFG);
+       if (tmp & CHANSIZE_MASK) {
+               chansize = 64;
        } else {
-               DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
-               r = -EINVAL;
+               chansize = 32;
        }
-       radeon_ib_free(rdev, &ib);
-       return r;
- }
+       tmp = RREG32(MC_SHARED_CHMAP);
+       switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
+       case 0:
+       default:
+               numchan = 1;
+               break;
+       case 1:
+               numchan = 2;
+               break;
+       case 2:
+               numchan = 4;
+               break;
+       case 3:
+               numchan = 8;
+               break;
+       case 4:
+               numchan = 3;
+               break;
+       case 5:
+               numchan = 6;
+               break;
+       case 6:
+               numchan = 10;
+               break;
+       case 7:
+               numchan = 12;
+               break;
+       case 8:
+               numchan = 16;
+               break;
+       }
+       rdev->mc.vram_width = numchan * chansize;
+       /* Could aper size report 0 ? */
+       rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
+       rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
+       /* size in MB on si */
+       rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+       rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+       rdev->mc.visible_vram_size = rdev->mc.aper_size;
+       si_vram_gtt_location(rdev, &rdev->mc);
+       radeon_update_bandwidth_info(rdev);
+       return 0;
+ }
+ /*
+  * GART
+  * VMID 0 is the physical GPU addresses as used by the kernel.
+  * VMIDs 1-15 are used for userspace clients and are handled
+  * by the radeon vm/hsa code.
+  */
+ /**
+  * cik_pcie_gart_tlb_flush - gart tlb flush callback
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Flush the TLB for the VMID 0 page table (CIK).
+  */
+ void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
+ {
+       /* flush hdp cache */
+       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+       /* bits 0-15 are the VM contexts0-15 */
+       WREG32(VM_INVALIDATE_REQUEST, 0x1);
+ }
+ /**
+  * cik_pcie_gart_enable - gart enable
+  *
+  * @rdev: radeon_device pointer
+  *
+  * This sets up the TLBs, programs the page tables for VMID0,
+  * sets up the hw for VMIDs 1-15 which are allocated on
+  * demand, and sets up the global locations for the LDS, GDS,
+  * and GPUVM for FSA64 clients (CIK).
+  * Returns 0 for success, errors for failure.
+  */
+ static int cik_pcie_gart_enable(struct radeon_device *rdev)
+ {
+       int r, i;
+       if (rdev->gart.robj == NULL) {
+               dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
+               return -EINVAL;
+       }
+       r = radeon_gart_table_vram_pin(rdev);
+       if (r)
+               return r;
+       radeon_gart_restore(rdev);
+       /* Setup TLB control */
+       WREG32(MC_VM_MX_L1_TLB_CNTL,
+              (0xA << 7) |
+              ENABLE_L1_TLB |
+              SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+              ENABLE_ADVANCED_DRIVER_MODEL |
+              SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
+       /* Setup L2 cache */
+       WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
+              ENABLE_L2_FRAGMENT_PROCESSING |
+              ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+              ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
+              EFFECTIVE_L2_QUEUE_SIZE(7) |
+              CONTEXT1_IDENTITY_ACCESS_MODE(1));
+       WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
+       WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
+              L2_CACHE_BIGK_FRAGMENT_SIZE(6));
+       /* setup context0 */
+       WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+       WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+                       (u32)(rdev->dummy_page.addr >> 12));
+       WREG32(VM_CONTEXT0_CNTL2, 0);
+       WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
+       WREG32(0x15D4, 0);
+       WREG32(0x15D8, 0);
+       WREG32(0x15DC, 0);
+       /* empty context1-15 */
+       /* FIXME start with 4G, once using 2 level pt switch to full
+        * vm size space
+        */
+       /* set vm size, must be a multiple of 4 */
+       WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
+       WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
+       for (i = 1; i < 16; i++) {
+               if (i < 8)
+                       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
+                              rdev->gart.table_addr >> 12);
+               else
+                       WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
+                              rdev->gart.table_addr >> 12);
+       }
+       /* enable context1-15 */
+       WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
+              (u32)(rdev->dummy_page.addr >> 12));
+       WREG32(VM_CONTEXT1_CNTL2, 4);
+       WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
+                               RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+                               DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+                               PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
+                               VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
+                               READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               READ_PROTECTION_FAULT_ENABLE_DEFAULT |
+                               WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+                               WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
+       /* TC cache setup ??? */
+       WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
+       WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
+       WREG32(TC_CFG_L1_STORE_POLICY, 0);
+       WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
+       WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
+       WREG32(TC_CFG_L2_STORE_POLICY0, 0);
+       WREG32(TC_CFG_L2_STORE_POLICY1, 0);
+       WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
+       WREG32(TC_CFG_L1_VOLATILE, 0);
+       WREG32(TC_CFG_L2_VOLATILE, 0);
+       if (rdev->family == CHIP_KAVERI) {
+               u32 tmp = RREG32(CHUB_CONTROL);
+               tmp &= ~BYPASS_VM;
+               WREG32(CHUB_CONTROL, tmp);
+       }
+       /* XXX SH_MEM regs */
+       /* where to put LDS, scratch, GPUVM in FSA64 space */
+       mutex_lock(&rdev->srbm_mutex);
+       for (i = 0; i < 16; i++) {
+               cik_srbm_select(rdev, 0, 0, 0, i);
+               /* CP and shaders */
+               WREG32(SH_MEM_CONFIG, 0);
+               WREG32(SH_MEM_APE1_BASE, 1);
+               WREG32(SH_MEM_APE1_LIMIT, 0);
+               WREG32(SH_MEM_BASES, 0);
+               /* SDMA GFX */
+               WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
+               WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
+               WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
+               WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
+               /* XXX SDMA RLC - todo */
+       }
+       cik_srbm_select(rdev, 0, 0, 0, 0);
+       mutex_unlock(&rdev->srbm_mutex);
+       cik_pcie_gart_tlb_flush(rdev);
+       DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+                (unsigned)(rdev->mc.gtt_size >> 20),
+                (unsigned long long)rdev->gart.table_addr);
+       rdev->gart.ready = true;
+       return 0;
+ }
+ /**
+  * cik_pcie_gart_disable - gart disable
+  *
+  * @rdev: radeon_device pointer
+  *
+  * This disables all VM page table (CIK).
+  */
+ static void cik_pcie_gart_disable(struct radeon_device *rdev)
+ {
+       /* Disable all tables */
+       WREG32(VM_CONTEXT0_CNTL, 0);
+       WREG32(VM_CONTEXT1_CNTL, 0);
+       /* Setup TLB control */
+       WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+              SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
+       /* Setup L2 cache */
+       WREG32(VM_L2_CNTL,
+              ENABLE_L2_FRAGMENT_PROCESSING |
+              ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+              ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
+              EFFECTIVE_L2_QUEUE_SIZE(7) |
+              CONTEXT1_IDENTITY_ACCESS_MODE(1));
+       WREG32(VM_L2_CNTL2, 0);
+       WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
+              L2_CACHE_BIGK_FRAGMENT_SIZE(6));
+       radeon_gart_table_vram_unpin(rdev);
+ }
+ /**
+  * cik_pcie_gart_fini - vm fini callback
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Tears down the driver GART/VM setup (CIK).
+  */
+ static void cik_pcie_gart_fini(struct radeon_device *rdev)
+ {
+       cik_pcie_gart_disable(rdev);
+       radeon_gart_table_vram_free(rdev);
+       radeon_gart_fini(rdev);
+ }
+ /* vm parser */
+ /**
+  * cik_ib_parse - vm ib_parse callback
+  *
+  * @rdev: radeon_device pointer
+  * @ib: indirect buffer pointer
+  *
+  * CIK uses hw IB checking so this is a nop (CIK).
+  */
+ int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+ {
+       return 0;
+ }
+ /*
+  * vm
+  * VMID 0 is the physical GPU addresses as used by the kernel.
+  * VMIDs 1-15 are used for userspace clients and are handled
+  * by the radeon vm/hsa code.
+  */
+ /**
+  * cik_vm_init - cik vm init callback
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Inits cik specific vm parameters (number of VMs, base of vram for
+  * VMIDs 1-15) (CIK).
+  * Returns 0 for success.
+  */
+ int cik_vm_init(struct radeon_device *rdev)
+ {
+       /* number of VMs */
+       rdev->vm_manager.nvm = 16;
+       /* base offset of vram pages */
+       if (rdev->flags & RADEON_IS_IGP) {
+               u64 tmp = RREG32(MC_VM_FB_OFFSET);
+               tmp <<= 22;
+               rdev->vm_manager.vram_base_offset = tmp;
+       } else
+               rdev->vm_manager.vram_base_offset = 0;
+       return 0;
+ }
+ /**
+  * cik_vm_fini - cik vm fini callback
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Tear down any asic specific VM setup (CIK).
+  */
+ void cik_vm_fini(struct radeon_device *rdev)
+ {
+ }
+ /**
+  * cik_vm_decode_fault - print human readable fault info
+  *
+  * @rdev: radeon_device pointer
+  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
+  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+  *
+  * Print human readable fault information (CIK).
+  */
+ static void cik_vm_decode_fault(struct radeon_device *rdev,
+                               u32 status, u32 addr, u32 mc_client)
+ {
+       u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
+       u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
+       u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
+       char *block = (char *)&mc_client;
+       printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
+              protections, vmid, addr,
+              (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
+              block, mc_id);
+ }
+ /**
+  * cik_vm_flush - cik vm flush using the CP
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Update the page table base and flush the VM TLB
+  * using the CP (CIK).
+  */
+ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+ {
+       struct radeon_ring *ring = &rdev->ring[ridx];
+       if (vm == NULL)
+               return;
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       if (vm->id < 8) {
+               radeon_ring_write(ring,
+                                 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+       } else {
+               radeon_ring_write(ring,
+                                 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+       }
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+       /* update SH_MEM_* regs */
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, VMID(vm->id));
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, SH_MEM_BASES >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, 0); /* SH_MEM_BASES */
+       radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
+       radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
+       radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
  
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, VMID(0));
  
- static void cik_print_gpu_status_regs(struct radeon_device *rdev)
- {
-       dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
-               RREG32(GRBM_STATUS));
-       dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
-               RREG32(GRBM_STATUS2));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
-               RREG32(GRBM_STATUS_SE0));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
-               RREG32(GRBM_STATUS_SE1));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
-               RREG32(GRBM_STATUS_SE2));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
-               RREG32(GRBM_STATUS_SE3));
-       dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
-               RREG32(SRBM_STATUS));
-       dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
-               RREG32(SRBM_STATUS2));
-       dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
-               RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
-       dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
-                RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
-       dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
-       dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
-                RREG32(CP_STALLED_STAT1));
-       dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
-                RREG32(CP_STALLED_STAT2));
-       dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
-                RREG32(CP_STALLED_STAT3));
-       dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
-                RREG32(CP_CPF_BUSY_STAT));
-       dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
-                RREG32(CP_CPF_STALLED_STAT1));
-       dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
-       dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
-       dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
-                RREG32(CP_CPC_STALLED_STAT1));
-       dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
+       /* HDP flush */
+       /* We should be using the WAIT_REG_MEM packet here like in
+        * cik_fence_ring_emit(), but it causes the CP to hang in this
+        * context...
+        */
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, 0);
+       /* bits 0-15 are the VM contexts0-15 */
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, 1 << vm->id);
+       /* compute doesn't have PFP */
+       if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
+               /* sync PFP to ME, otherwise we might get invalid PFP reads */
+               radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+               radeon_ring_write(ring, 0x0);
+       }
  }
  
  /**
-  * cik_gpu_check_soft_reset - check which blocks are busy
+  * cik_vm_set_page - update the page tables using sDMA
   *
   * @rdev: radeon_device pointer
+  * @ib: indirect buffer to fill with commands
+  * @pe: addr of the page entry
+  * @addr: dst addr to write into pe
+  * @count: number of page entries to update
+  * @incr: increase next addr by incr bytes
+  * @flags: access flags
   *
-  * Check which blocks are busy and return the relevant reset
-  * mask to be used by cik_gpu_soft_reset().
-  * Returns a mask of the blocks to be reset.
+  * Update the page tables using CP or sDMA (CIK).
   */
- static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
+ void cik_vm_set_page(struct radeon_device *rdev,
+                    struct radeon_ib *ib,
+                    uint64_t pe,
+                    uint64_t addr, unsigned count,
+                    uint32_t incr, uint32_t flags)
+ {
+       uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+       uint64_t value;
+       unsigned ndw;
+       if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
+               /* CP */
+               while (count) {
+                       ndw = 2 + count * 2;
+                       if (ndw > 0x3FFE)
+                               ndw = 0x3FFE;
+                       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
+                       ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
+                                                   WRITE_DATA_DST_SEL(1));
+                       ib->ptr[ib->length_dw++] = pe;
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+                       for (; ndw > 2; ndw -= 2, --count, pe += 8) {
+                               if (flags & RADEON_VM_PAGE_SYSTEM) {
+                                       value = radeon_vm_map_gart(rdev, addr);
+                                       value &= 0xFFFFFFFFFFFFF000ULL;
+                               } else if (flags & RADEON_VM_PAGE_VALID) {
+                                       value = addr;
+                               } else {
+                                       value = 0;
+                               }
+                               addr += incr;
+                               value |= r600_flags;
+                               ib->ptr[ib->length_dw++] = value;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                       }
+               }
+       } else {
+               /* DMA */
+               cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
+       }
+ }
+ /*
+  * RLC
+  * The RLC is a multi-purpose microengine that handles a
+  * variety of functions, the most important of which is
+  * the interrupt controller.
+  */
+ static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
+                                         bool enable)
+ {
+       u32 tmp = RREG32(CP_INT_CNTL_RING0);
+       if (enable)
+               tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+       else
+               tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+       WREG32(CP_INT_CNTL_RING0, tmp);
+ }
+ static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
  {
-       u32 reset_mask = 0;
        u32 tmp;
  
-       /* GRBM_STATUS */
-       tmp = RREG32(GRBM_STATUS);
-       if (tmp & (PA_BUSY | SC_BUSY |
-                  BCI_BUSY | SX_BUSY |
-                  TA_BUSY | VGT_BUSY |
-                  DB_BUSY | CB_BUSY |
-                  GDS_BUSY | SPI_BUSY |
-                  IA_BUSY | IA_BUSY_NO_DMA))
-               reset_mask |= RADEON_RESET_GFX;
+       tmp = RREG32(RLC_LB_CNTL);
+       if (enable)
+               tmp |= LOAD_BALANCE_ENABLE;
+       else
+               tmp &= ~LOAD_BALANCE_ENABLE;
+       WREG32(RLC_LB_CNTL, tmp);
+ }
  
-       if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
-               reset_mask |= RADEON_RESET_CP;
+ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
+ {
+       u32 i, j, k;
+       u32 mask;
  
-       /* GRBM_STATUS2 */
-       tmp = RREG32(GRBM_STATUS2);
-       if (tmp & RLC_BUSY)
-               reset_mask |= RADEON_RESET_RLC;
+       for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+               for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+                       cik_select_se_sh(rdev, i, j);
+                       for (k = 0; k < rdev->usec_timeout; k++) {
+                               if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
+                                       break;
+                               udelay(1);
+                       }
+               }
+       }
+       cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
  
-       /* SDMA0_STATUS_REG */
-       tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
-       if (!(tmp & SDMA_IDLE))
-               reset_mask |= RADEON_RESET_DMA;
+       mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
+       for (k = 0; k < rdev->usec_timeout; k++) {
+               if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
+                       break;
+               udelay(1);
+       }
+ }
  
-       /* SDMA1_STATUS_REG */
-       tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
-       if (!(tmp & SDMA_IDLE))
-               reset_mask |= RADEON_RESET_DMA1;
+ static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
+ {
+       u32 tmp;
  
-       /* SRBM_STATUS2 */
-       tmp = RREG32(SRBM_STATUS2);
-       if (tmp & SDMA_BUSY)
-               reset_mask |= RADEON_RESET_DMA;
+       tmp = RREG32(RLC_CNTL);
+       if (tmp != rlc)
+               WREG32(RLC_CNTL, rlc);
+ }
  
-       if (tmp & SDMA1_BUSY)
-               reset_mask |= RADEON_RESET_DMA1;
+ static u32 cik_halt_rlc(struct radeon_device *rdev)
+ {
+       u32 data, orig;
  
-       /* SRBM_STATUS */
-       tmp = RREG32(SRBM_STATUS);
+       orig = data = RREG32(RLC_CNTL);
  
-       if (tmp & IH_BUSY)
-               reset_mask |= RADEON_RESET_IH;
+       if (data & RLC_ENABLE) {
+               u32 i;
  
-       if (tmp & SEM_BUSY)
-               reset_mask |= RADEON_RESET_SEM;
+               data &= ~RLC_ENABLE;
+               WREG32(RLC_CNTL, data);
  
-       if (tmp & GRBM_RQ_PENDING)
-               reset_mask |= RADEON_RESET_GRBM;
+               for (i = 0; i < rdev->usec_timeout; i++) {
+                       if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
+                               break;
+                       udelay(1);
+               }
  
-       if (tmp & VMC_BUSY)
-               reset_mask |= RADEON_RESET_VMC;
+               cik_wait_for_rlc_serdes(rdev);
+       }
  
-       if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
-                  MCC_BUSY | MCD_BUSY))
-               reset_mask |= RADEON_RESET_MC;
+       return orig;
+ }
  
-       if (evergreen_is_display_hung(rdev))
-               reset_mask |= RADEON_RESET_DISPLAY;
+ void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
+ {
+       u32 tmp, i, mask;
+       tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
+       WREG32(RLC_GPR_REG2, tmp);
+       mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               if ((RREG32(RLC_GPM_STAT) & mask) == mask)
+                       break;
+               udelay(1);
+       }
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
+                       break;
+               udelay(1);
+       }
+ }
+ void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
+ {
+       u32 tmp;
+       tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
+       WREG32(RLC_GPR_REG2, tmp);
+ }
+ /**
+  * cik_rlc_stop - stop the RLC ME
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Halt the RLC ME (MicroEngine) (CIK).
+  */
+ static void cik_rlc_stop(struct radeon_device *rdev)
+ {
+       WREG32(RLC_CNTL, 0);
+       cik_enable_gui_idle_interrupt(rdev, false);
+       cik_wait_for_rlc_serdes(rdev);
+ }
+ /**
+  * cik_rlc_start - start the RLC ME
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Unhalt the RLC ME (MicroEngine) (CIK).
+  */
+ static void cik_rlc_start(struct radeon_device *rdev)
+ {
+       WREG32(RLC_CNTL, RLC_ENABLE);
  
-       /* Skip MC reset as it's mostly likely not hung, just busy */
-       if (reset_mask & RADEON_RESET_MC) {
-               DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
-               reset_mask &= ~RADEON_RESET_MC;
-       }
+       cik_enable_gui_idle_interrupt(rdev, true);
  
-       return reset_mask;
+       udelay(50);
  }
  
  /**
-  * cik_gpu_soft_reset - soft reset GPU
+  * cik_rlc_resume - setup the RLC hw
   *
   * @rdev: radeon_device pointer
-  * @reset_mask: mask of which blocks to reset
   *
-  * Soft reset the blocks specified in @reset_mask.
+  * Initialize the RLC registers, load the ucode,
+  * and start the RLC (CIK).
+  * Returns 0 for success, -EINVAL if the ucode is not available.
   */
- static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
+ static int cik_rlc_resume(struct radeon_device *rdev)
  {
-       struct evergreen_mc_save save;
-       u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
-       u32 tmp;
-       if (reset_mask == 0)
-               return;
+       u32 i, size, tmp;
+       const __be32 *fw_data;
  
-       dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
+       if (!rdev->rlc_fw)
+               return -EINVAL;
  
-       cik_print_gpu_status_regs(rdev);
-       dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
-                RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
-       dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
-                RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+       switch (rdev->family) {
+       case CHIP_BONAIRE:
+       default:
+               size = BONAIRE_RLC_UCODE_SIZE;
+               break;
+       case CHIP_KAVERI:
+               size = KV_RLC_UCODE_SIZE;
+               break;
+       case CHIP_KABINI:
+               size = KB_RLC_UCODE_SIZE;
+               break;
+       }
  
-       /* stop the rlc */
        cik_rlc_stop(rdev);
  
-       /* Disable GFX parsing/prefetching */
-       WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
-       /* Disable MEC parsing/prefetching */
-       WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
+       /* disable CG */
+       tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
+       WREG32(RLC_CGCG_CGLS_CTRL, tmp);
  
-       if (reset_mask & RADEON_RESET_DMA) {
-               /* sdma0 */
-               tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
-               tmp |= SDMA_HALT;
-               WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
-       }
-       if (reset_mask & RADEON_RESET_DMA1) {
-               /* sdma1 */
-               tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
-               tmp |= SDMA_HALT;
-               WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
-       }
+       si_rlc_reset(rdev);
  
-       evergreen_mc_stop(rdev, &save);
-       if (evergreen_mc_wait_for_idle(rdev)) {
-               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
-       }
+       cik_init_pg(rdev);
  
-       if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
-               grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
+       cik_init_cg(rdev);
  
-       if (reset_mask & RADEON_RESET_CP) {
-               grbm_soft_reset |= SOFT_RESET_CP;
+       WREG32(RLC_LB_CNTR_INIT, 0);
+       WREG32(RLC_LB_CNTR_MAX, 0x00008000);
  
-               srbm_soft_reset |= SOFT_RESET_GRBM;
-       }
+       cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+       WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
+       WREG32(RLC_LB_PARAMS, 0x00600408);
+       WREG32(RLC_LB_CNTL, 0x80000004);
  
-       if (reset_mask & RADEON_RESET_DMA)
-               srbm_soft_reset |= SOFT_RESET_SDMA;
+       WREG32(RLC_MC_CNTL, 0);
+       WREG32(RLC_UCODE_CNTL, 0);
  
-       if (reset_mask & RADEON_RESET_DMA1)
-               srbm_soft_reset |= SOFT_RESET_SDMA1;
+       fw_data = (const __be32 *)rdev->rlc_fw->data;
+               WREG32(RLC_GPM_UCODE_ADDR, 0);
+       for (i = 0; i < size; i++)
+               WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
+       WREG32(RLC_GPM_UCODE_ADDR, 0);
  
-       if (reset_mask & RADEON_RESET_DISPLAY)
-               srbm_soft_reset |= SOFT_RESET_DC;
+       /* XXX - find out what chips support lbpw */
+       cik_enable_lbpw(rdev, false);
  
-       if (reset_mask & RADEON_RESET_RLC)
-               grbm_soft_reset |= SOFT_RESET_RLC;
+       if (rdev->family == CHIP_BONAIRE)
+               WREG32(RLC_DRIVER_DMA_STATUS, 0);
  
-       if (reset_mask & RADEON_RESET_SEM)
-               srbm_soft_reset |= SOFT_RESET_SEM;
+       cik_rlc_start(rdev);
  
-       if (reset_mask & RADEON_RESET_IH)
-               srbm_soft_reset |= SOFT_RESET_IH;
+       return 0;
+ }
  
-       if (reset_mask & RADEON_RESET_GRBM)
-               srbm_soft_reset |= SOFT_RESET_GRBM;
+ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
+ {
+       u32 data, orig, tmp, tmp2;
  
-       if (reset_mask & RADEON_RESET_VMC)
-               srbm_soft_reset |= SOFT_RESET_VMC;
+       orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
  
-       if (!(rdev->flags & RADEON_IS_IGP)) {
-               if (reset_mask & RADEON_RESET_MC)
-                       srbm_soft_reset |= SOFT_RESET_MC;
-       }
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
+               cik_enable_gui_idle_interrupt(rdev, true);
  
-       if (grbm_soft_reset) {
-               tmp = RREG32(GRBM_SOFT_RESET);
-               tmp |= grbm_soft_reset;
-               dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
-               WREG32(GRBM_SOFT_RESET, tmp);
-               tmp = RREG32(GRBM_SOFT_RESET);
+               tmp = cik_halt_rlc(rdev);
  
-               udelay(50);
+               cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+               WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+               WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+               tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
+               WREG32(RLC_SERDES_WR_CTRL, tmp2);
  
-               tmp &= ~grbm_soft_reset;
-               WREG32(GRBM_SOFT_RESET, tmp);
-               tmp = RREG32(GRBM_SOFT_RESET);
-       }
+               cik_update_rlc(rdev, tmp);
  
-       if (srbm_soft_reset) {
-               tmp = RREG32(SRBM_SOFT_RESET);
-               tmp |= srbm_soft_reset;
-               dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
-               WREG32(SRBM_SOFT_RESET, tmp);
-               tmp = RREG32(SRBM_SOFT_RESET);
+               data |= CGCG_EN | CGLS_EN;
+       } else {
+               cik_enable_gui_idle_interrupt(rdev, false);
  
-               udelay(50);
+               RREG32(CB_CGTT_SCLK_CTRL);
+               RREG32(CB_CGTT_SCLK_CTRL);
+               RREG32(CB_CGTT_SCLK_CTRL);
+               RREG32(CB_CGTT_SCLK_CTRL);
  
-               tmp &= ~srbm_soft_reset;
-               WREG32(SRBM_SOFT_RESET, tmp);
-               tmp = RREG32(SRBM_SOFT_RESET);
+               data &= ~(CGCG_EN | CGLS_EN);
        }
  
-       /* Wait a little for things to settle down */
-       udelay(50);
-       evergreen_mc_resume(rdev, &save);
-       udelay(50);
+       if (orig != data)
+               WREG32(RLC_CGCG_CGLS_CTRL, data);
  
-       cik_print_gpu_status_regs(rdev);
  }
  
- /**
-  * cik_asic_reset - soft reset GPU
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Look up which blocks are hung and attempt
-  * to reset them.
-  * Returns 0 for success.
-  */
- int cik_asic_reset(struct radeon_device *rdev)
+ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
  {
-       u32 reset_mask;
-       reset_mask = cik_gpu_check_soft_reset(rdev);
-       if (reset_mask)
-               r600_set_bios_scratch_engine_hung(rdev, true);
+       u32 data, orig, tmp = 0;
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
+               if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
+                       if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
+                               orig = data = RREG32(CP_MEM_SLP_CNTL);
+                               data |= CP_MEM_LS_EN;
+                               if (orig != data)
+                                       WREG32(CP_MEM_SLP_CNTL, data);
+                       }
+               }
  
-       cik_gpu_soft_reset(rdev, reset_mask);
+               orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+               data &= 0xfffffffd;
+               if (orig != data)
+                       WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
+               tmp = cik_halt_rlc(rdev);
+               cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+               WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+               WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+               data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
+               WREG32(RLC_SERDES_WR_CTRL, data);
+               cik_update_rlc(rdev, tmp);
+               if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
+                       orig = data = RREG32(CGTS_SM_CTRL_REG);
+                       data &= ~SM_MODE_MASK;
+                       data |= SM_MODE(0x2);
+                       data |= SM_MODE_ENABLE;
+                       data &= ~CGTS_OVERRIDE;
+                       if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
+                           (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
+                               data &= ~CGTS_LS_OVERRIDE;
+                       data &= ~ON_MONITOR_ADD_MASK;
+                       data |= ON_MONITOR_ADD_EN;
+                       data |= ON_MONITOR_ADD(0x96);
+                       if (orig != data)
+                               WREG32(CGTS_SM_CTRL_REG, data);
+               }
+       } else {
+               orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+               data |= 0x00000002;
+               if (orig != data)
+                       WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
+               data = RREG32(RLC_MEM_SLP_CNTL);
+               if (data & RLC_MEM_LS_EN) {
+                       data &= ~RLC_MEM_LS_EN;
+                       WREG32(RLC_MEM_SLP_CNTL, data);
+               }
  
-       reset_mask = cik_gpu_check_soft_reset(rdev);
+               data = RREG32(CP_MEM_SLP_CNTL);
+               if (data & CP_MEM_LS_EN) {
+                       data &= ~CP_MEM_LS_EN;
+                       WREG32(CP_MEM_SLP_CNTL, data);
+               }
  
-       if (!reset_mask)
-               r600_set_bios_scratch_engine_hung(rdev, false);
+               orig = data = RREG32(CGTS_SM_CTRL_REG);
+               data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
+               if (orig != data)
+                       WREG32(CGTS_SM_CTRL_REG, data);
  
-       return 0;
- }
+               tmp = cik_halt_rlc(rdev);
  
- /**
-  * cik_gfx_is_lockup - check if the 3D engine is locked up
-  *
-  * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
-  *
-  * Check if the 3D engine is locked up (CIK).
-  * Returns true if the engine is locked, false if not.
-  */
- bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
- {
-       u32 reset_mask = cik_gpu_check_soft_reset(rdev);
+               cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+               WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+               WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+               data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
+               WREG32(RLC_SERDES_WR_CTRL, data);
  
-       if (!(reset_mask & (RADEON_RESET_GFX |
-                           RADEON_RESET_COMPUTE |
-                           RADEON_RESET_CP))) {
-               radeon_ring_lockup_update(ring);
-               return false;
+               cik_update_rlc(rdev, tmp);
        }
-       /* force CP activities */
-       radeon_ring_force_activity(rdev, ring);
-       return radeon_ring_test_lockup(rdev, ring);
  }
  
- /**
-  * cik_sdma_is_lockup - Check if the DMA engine is locked up
-  *
-  * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
-  *
-  * Check if the async DMA engine is locked up (CIK).
-  * Returns true if the engine appears to be locked up, false if not.
-  */
- bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+ static const u32 mc_cg_registers[] =
  {
-       u32 reset_mask = cik_gpu_check_soft_reset(rdev);
-       u32 mask;
-       if (ring->idx == R600_RING_TYPE_DMA_INDEX)
-               mask = RADEON_RESET_DMA;
-       else
-               mask = RADEON_RESET_DMA1;
-       if (!(reset_mask & mask)) {
-               radeon_ring_lockup_update(ring);
-               return false;
-       }
-       /* force ring activities */
-       radeon_ring_force_activity(rdev, ring);
-       return radeon_ring_test_lockup(rdev, ring);
- }
+       MC_HUB_MISC_HUB_CG,
+       MC_HUB_MISC_SIP_CG,
+       MC_HUB_MISC_VM_CG,
+       MC_XPB_CLK_GAT,
+       ATC_MISC_CG,
+       MC_CITF_MISC_WR_CG,
+       MC_CITF_MISC_RD_CG,
+       MC_CITF_MISC_VM_CG,
+       VM_L2_CG,
+ };
  
- /* MC */
- /**
-  * cik_mc_program - program the GPU memory controller
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Set the location of vram, gart, and AGP in the GPU's
-  * physical address space (CIK).
-  */
- static void cik_mc_program(struct radeon_device *rdev)
+ static void cik_enable_mc_ls(struct radeon_device *rdev,
+                            bool enable)
  {
-       struct evergreen_mc_save save;
-       u32 tmp;
-       int i, j;
+       int i;
+       u32 orig, data;
  
-       /* Initialize HDP */
-       for (i = 0, j = 0; i < 32; i++, j += 0x18) {
-               WREG32((0x2c14 + j), 0x00000000);
-               WREG32((0x2c18 + j), 0x00000000);
-               WREG32((0x2c1c + j), 0x00000000);
-               WREG32((0x2c20 + j), 0x00000000);
-               WREG32((0x2c24 + j), 0x00000000);
+       for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+               orig = data = RREG32(mc_cg_registers[i]);
+               if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
+                       data |= MC_LS_ENABLE;
+               else
+                       data &= ~MC_LS_ENABLE;
+               if (data != orig)
+                       WREG32(mc_cg_registers[i], data);
        }
-       WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
+ }
  
-       evergreen_mc_stop(rdev, &save);
-       if (radeon_mc_wait_for_idle(rdev)) {
-               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
-       }
-       /* Lockout access through VGA aperture*/
-       WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
-       /* Update configuration */
-       WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
-              rdev->mc.vram_start >> 12);
-       WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-              rdev->mc.vram_end >> 12);
-       WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
-              rdev->vram_scratch.gpu_addr >> 12);
-       tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
-       tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
-       WREG32(MC_VM_FB_LOCATION, tmp);
-       /* XXX double check these! */
-       WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
-       WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
-       WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
-       WREG32(MC_VM_AGP_BASE, 0);
-       WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
-       WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
-       if (radeon_mc_wait_for_idle(rdev)) {
-               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+ static void cik_enable_mc_mgcg(struct radeon_device *rdev,
+                              bool enable)
+ {
+       int i;
+       u32 orig, data;
+       for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+               orig = data = RREG32(mc_cg_registers[i]);
+               if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
+                       data |= MC_CG_ENABLE;
+               else
+                       data &= ~MC_CG_ENABLE;
+               if (data != orig)
+                       WREG32(mc_cg_registers[i], data);
        }
-       evergreen_mc_resume(rdev, &save);
-       /* we need to own VRAM, so turn off the VGA renderer here
-        * to stop it overwriting our objects */
-       rv515_vga_render_disable(rdev);
  }
  
- /**
-  * cik_mc_init - initialize the memory controller driver params
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Look up the amount of vram, vram width, and decide how to place
-  * vram and gart within the GPU's physical address space (CIK).
-  * Returns 0 for success.
-  */
- static int cik_mc_init(struct radeon_device *rdev)
+ static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
+                                bool enable)
  {
-       u32 tmp;
-       int chansize, numchan;
+       u32 orig, data;
  
-       /* Get VRAM informations */
-       rdev->mc.vram_is_ddr = true;
-       tmp = RREG32(MC_ARB_RAMCFG);
-       if (tmp & CHANSIZE_MASK) {
-               chansize = 64;
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
+               WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
+               WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
        } else {
-               chansize = 32;
-       }
-       tmp = RREG32(MC_SHARED_CHMAP);
-       switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
-       case 0:
-       default:
-               numchan = 1;
-               break;
-       case 1:
-               numchan = 2;
-               break;
-       case 2:
-               numchan = 4;
-               break;
-       case 3:
-               numchan = 8;
-               break;
-       case 4:
-               numchan = 3;
-               break;
-       case 5:
-               numchan = 6;
-               break;
-       case 6:
-               numchan = 10;
-               break;
-       case 7:
-               numchan = 12;
-               break;
-       case 8:
-               numchan = 16;
-               break;
-       }
-       rdev->mc.vram_width = numchan * chansize;
-       /* Could aper size report 0 ? */
-       rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
-       rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
-       /* size in MB on si */
-       rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
-       rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
-       rdev->mc.visible_vram_size = rdev->mc.aper_size;
-       si_vram_gtt_location(rdev, &rdev->mc);
-       radeon_update_bandwidth_info(rdev);
+               orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
+               data |= 0xff000000;
+               if (data != orig)
+                       WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
  
-       return 0;
+               orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
+               data |= 0xff000000;
+               if (data != orig)
+                       WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
+       }
  }
  
- /*
-  * GART
-  * VMID 0 is the physical GPU addresses as used by the kernel.
-  * VMIDs 1-15 are used for userspace clients and are handled
-  * by the radeon vm/hsa code.
-  */
- /**
-  * cik_pcie_gart_tlb_flush - gart tlb flush callback
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Flush the TLB for the VMID 0 page table (CIK).
-  */
- void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
+ static void cik_enable_sdma_mgls(struct radeon_device *rdev,
+                                bool enable)
  {
-       /* flush hdp cache */
-       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+       u32 orig, data;
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
+               orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+               data |= 0x100;
+               if (orig != data)
+                       WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
+               orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+               data |= 0x100;
+               if (orig != data)
+                       WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+       } else {
+               orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+               data &= ~0x100;
+               if (orig != data)
+                       WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
  
-       /* bits 0-15 are the VM contexts0-15 */
-       WREG32(VM_INVALIDATE_REQUEST, 0x1);
+               orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+               data &= ~0x100;
+               if (orig != data)
+                       WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+       }
  }
  
- /**
-  * cik_pcie_gart_enable - gart enable
-  *
-  * @rdev: radeon_device pointer
-  *
-  * This sets up the TLBs, programs the page tables for VMID0,
-  * sets up the hw for VMIDs 1-15 which are allocated on
-  * demand, and sets up the global locations for the LDS, GDS,
-  * and GPUVM for FSA64 clients (CIK).
-  * Returns 0 for success, errors for failure.
-  */
- static int cik_pcie_gart_enable(struct radeon_device *rdev)
+ static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
+                               bool enable)
  {
-       int r, i;
+       u32 orig, data;
  
-       if (rdev->gart.robj == NULL) {
-               dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
-               return -EINVAL;
-       }
-       r = radeon_gart_table_vram_pin(rdev);
-       if (r)
-               return r;
-       radeon_gart_restore(rdev);
-       /* Setup TLB control */
-       WREG32(MC_VM_MX_L1_TLB_CNTL,
-              (0xA << 7) |
-              ENABLE_L1_TLB |
-              SYSTEM_ACCESS_MODE_NOT_IN_SYS |
-              ENABLE_ADVANCED_DRIVER_MODEL |
-              SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
-       /* Setup L2 cache */
-       WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
-              ENABLE_L2_FRAGMENT_PROCESSING |
-              ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
-              ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
-              EFFECTIVE_L2_QUEUE_SIZE(7) |
-              CONTEXT1_IDENTITY_ACCESS_MODE(1));
-       WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
-       WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-              L2_CACHE_BIGK_FRAGMENT_SIZE(6));
-       /* setup context0 */
-       WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
-       WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
-                       (u32)(rdev->dummy_page.addr >> 12));
-       WREG32(VM_CONTEXT0_CNTL2, 0);
-       WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
-                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
+               data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
+               data = 0xfff;
+               WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
  
-       WREG32(0x15D4, 0);
-       WREG32(0x15D8, 0);
-       WREG32(0x15DC, 0);
+               orig = data = RREG32(UVD_CGC_CTRL);
+               data |= DCM;
+               if (orig != data)
+                       WREG32(UVD_CGC_CTRL, data);
+       } else {
+               data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
+               data &= ~0xfff;
+               WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
  
-       /* empty context1-15 */
-       /* FIXME start with 4G, once using 2 level pt switch to full
-        * vm size space
-        */
-       /* set vm size, must be a multiple of 4 */
-       WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
-       WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
-       for (i = 1; i < 16; i++) {
-               if (i < 8)
-                       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
-                              rdev->gart.table_addr >> 12);
-               else
-                       WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
-                              rdev->gart.table_addr >> 12);
+               orig = data = RREG32(UVD_CGC_CTRL);
+               data &= ~DCM;
+               if (orig != data)
+                       WREG32(UVD_CGC_CTRL, data);
        }
+ }
  
-       /* enable context1-15 */
-       WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
-              (u32)(rdev->dummy_page.addr >> 12));
-       WREG32(VM_CONTEXT1_CNTL2, 4);
-       WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
-                               RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
-                               DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
-                               PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
-                               VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
-                               READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               READ_PROTECTION_FAULT_ENABLE_DEFAULT |
-                               WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
-                               WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
+ static void cik_enable_bif_mgls(struct radeon_device *rdev,
+                              bool enable)
+ {
+       u32 orig, data;
  
-       /* TC cache setup ??? */
-       WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
-       WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
-       WREG32(TC_CFG_L1_STORE_POLICY, 0);
+       orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
  
-       WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
-       WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
-       WREG32(TC_CFG_L2_STORE_POLICY0, 0);
-       WREG32(TC_CFG_L2_STORE_POLICY1, 0);
-       WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
+               data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
+                       REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
+       else
+               data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
+                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
  
-       WREG32(TC_CFG_L1_VOLATILE, 0);
-       WREG32(TC_CFG_L2_VOLATILE, 0);
+       if (orig != data)
+               WREG32_PCIE_PORT(PCIE_CNTL2, data);
+ }
  
-       if (rdev->family == CHIP_KAVERI) {
-               u32 tmp = RREG32(CHUB_CONTROL);
-               tmp &= ~BYPASS_VM;
-               WREG32(CHUB_CONTROL, tmp);
-       }
+ static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
+                               bool enable)
+ {
+       u32 orig, data;
  
-       /* XXX SH_MEM regs */
-       /* where to put LDS, scratch, GPUVM in FSA64 space */
-       for (i = 0; i < 16; i++) {
-               cik_srbm_select(rdev, 0, 0, 0, i);
-               /* CP and shaders */
-               WREG32(SH_MEM_CONFIG, 0);
-               WREG32(SH_MEM_APE1_BASE, 1);
-               WREG32(SH_MEM_APE1_LIMIT, 0);
-               WREG32(SH_MEM_BASES, 0);
-               /* SDMA GFX */
-               WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
-               WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
-               WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
-               WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
-               /* XXX SDMA RLC - todo */
-       }
-       cik_srbm_select(rdev, 0, 0, 0, 0);
+       orig = data = RREG32(HDP_HOST_PATH_CNTL);
  
-       cik_pcie_gart_tlb_flush(rdev);
-       DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
-                (unsigned)(rdev->mc.gtt_size >> 20),
-                (unsigned long long)rdev->gart.table_addr);
-       rdev->gart.ready = true;
-       return 0;
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
+               data &= ~CLOCK_GATING_DIS;
+       else
+               data |= CLOCK_GATING_DIS;
+       if (orig != data)
+               WREG32(HDP_HOST_PATH_CNTL, data);
  }
  
- /**
-  * cik_pcie_gart_disable - gart disable
-  *
-  * @rdev: radeon_device pointer
-  *
-  * This disables all VM page table (CIK).
-  */
- static void cik_pcie_gart_disable(struct radeon_device *rdev)
+ static void cik_enable_hdp_ls(struct radeon_device *rdev,
+                             bool enable)
  {
-       /* Disable all tables */
-       WREG32(VM_CONTEXT0_CNTL, 0);
-       WREG32(VM_CONTEXT1_CNTL, 0);
-       /* Setup TLB control */
-       WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
-              SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
-       /* Setup L2 cache */
-       WREG32(VM_L2_CNTL,
-              ENABLE_L2_FRAGMENT_PROCESSING |
-              ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
-              ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
-              EFFECTIVE_L2_QUEUE_SIZE(7) |
-              CONTEXT1_IDENTITY_ACCESS_MODE(1));
-       WREG32(VM_L2_CNTL2, 0);
-       WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-              L2_CACHE_BIGK_FRAGMENT_SIZE(6));
-       radeon_gart_table_vram_unpin(rdev);
+       u32 orig, data;
+       orig = data = RREG32(HDP_MEM_POWER_LS);
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
+               data |= HDP_LS_ENABLE;
+       else
+               data &= ~HDP_LS_ENABLE;
+       if (orig != data)
+               WREG32(HDP_MEM_POWER_LS, data);
+ }
+ void cik_update_cg(struct radeon_device *rdev,
+                  u32 block, bool enable)
+ {
+       if (block & RADEON_CG_BLOCK_GFX) {
+               /* order matters! */
+               if (enable) {
+                       cik_enable_mgcg(rdev, true);
+                       cik_enable_cgcg(rdev, true);
+               } else {
+                       cik_enable_cgcg(rdev, false);
+                       cik_enable_mgcg(rdev, false);
+               }
+       }
+       if (block & RADEON_CG_BLOCK_MC) {
+               if (!(rdev->flags & RADEON_IS_IGP)) {
+                       cik_enable_mc_mgcg(rdev, enable);
+                       cik_enable_mc_ls(rdev, enable);
+               }
+       }
+       if (block & RADEON_CG_BLOCK_SDMA) {
+               cik_enable_sdma_mgcg(rdev, enable);
+               cik_enable_sdma_mgls(rdev, enable);
+       }
+       if (block & RADEON_CG_BLOCK_BIF) {
+               cik_enable_bif_mgls(rdev, enable);
+       }
+       if (block & RADEON_CG_BLOCK_UVD) {
+               if (rdev->has_uvd)
+                       cik_enable_uvd_mgcg(rdev, enable);
+       }
+       if (block & RADEON_CG_BLOCK_HDP) {
+               cik_enable_hdp_mgcg(rdev, enable);
+               cik_enable_hdp_ls(rdev, enable);
+       }
  }
  
- /**
-  * cik_pcie_gart_fini - vm fini callback
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Tears down the driver GART/VM setup (CIK).
-  */
- static void cik_pcie_gart_fini(struct radeon_device *rdev)
+ static void cik_init_cg(struct radeon_device *rdev)
  {
-       cik_pcie_gart_disable(rdev);
-       radeon_gart_table_vram_free(rdev);
-       radeon_gart_fini(rdev);
+       cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
+       if (rdev->has_uvd)
+               si_init_uvd_internal_cg(rdev);
+       cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
+                            RADEON_CG_BLOCK_SDMA |
+                            RADEON_CG_BLOCK_BIF |
+                            RADEON_CG_BLOCK_UVD |
+                            RADEON_CG_BLOCK_HDP), true);
  }
  
- /* vm parser */
- /**
-  * cik_ib_parse - vm ib_parse callback
-  *
-  * @rdev: radeon_device pointer
-  * @ib: indirect buffer pointer
-  *
-  * CIK uses hw IB checking so this is a nop (CIK).
-  */
- int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+ static void cik_fini_cg(struct radeon_device *rdev)
  {
-       return 0;
+       cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
+                            RADEON_CG_BLOCK_SDMA |
+                            RADEON_CG_BLOCK_BIF |
+                            RADEON_CG_BLOCK_UVD |
+                            RADEON_CG_BLOCK_HDP), false);
+       cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
  }
  
- /*
-  * vm
-  * VMID 0 is the physical GPU addresses as used by the kernel.
-  * VMIDs 1-15 are used for userspace clients and are handled
-  * by the radeon vm/hsa code.
-  */
- /**
-  * cik_vm_init - cik vm init callback
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Inits cik specific vm parameters (number of VMs, base of vram for
-  * VMIDs 1-15) (CIK).
-  * Returns 0 for success.
-  */
- int cik_vm_init(struct radeon_device *rdev)
+ static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
+                                         bool enable)
  {
-       /* number of VMs */
-       rdev->vm_manager.nvm = 16;
-       /* base offset of vram pages */
-       if (rdev->flags & RADEON_IS_IGP) {
-               u64 tmp = RREG32(MC_VM_FB_OFFSET);
-               tmp <<= 22;
-               rdev->vm_manager.vram_base_offset = tmp;
-       } else
-               rdev->vm_manager.vram_base_offset = 0;
+       u32 data, orig;
  
-       return 0;
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
+               data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
+       else
+               data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
  }
  
- /**
-  * cik_vm_fini - cik vm fini callback
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Tear down any asic specific VM setup (CIK).
-  */
- void cik_vm_fini(struct radeon_device *rdev)
+ static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
+                                         bool enable)
  {
+       u32 data, orig;
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
+               data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
+       else
+               data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
  }
  
- /**
-  * cik_vm_decode_fault - print human readable fault info
-  *
-  * @rdev: radeon_device pointer
-  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
-  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
-  *
-  * Print human readable fault information (CIK).
-  */
- static void cik_vm_decode_fault(struct radeon_device *rdev,
-                               u32 status, u32 addr, u32 mc_client)
+ static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
  {
-       u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
-       u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
-       u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
-       char *block = (char *)&mc_client;
+       u32 data, orig;
  
-       printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
-              protections, vmid, addr,
-              (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
-              block, mc_id);
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
+               data &= ~DISABLE_CP_PG;
+       else
+               data |= DISABLE_CP_PG;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
  }
  
- /**
-  * cik_vm_flush - cik vm flush using the CP
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Update the page table base and flush the VM TLB
-  * using the CP (CIK).
-  */
- void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+ static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
  {
-       struct radeon_ring *ring = &rdev->ring[ridx];
+       u32 data, orig;
  
-       if (vm == NULL)
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
+               data &= ~DISABLE_GDS_PG;
+       else
+               data |= DISABLE_GDS_PG;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
+ }
+ #define CP_ME_TABLE_SIZE    96
+ #define CP_ME_TABLE_OFFSET  2048
+ #define CP_MEC_TABLE_OFFSET 4096
+ void cik_init_cp_pg_table(struct radeon_device *rdev)
+ {
+       const __be32 *fw_data;
+       volatile u32 *dst_ptr;
+       int me, i, max_me = 4;
+       u32 bo_offset = 0;
+       u32 table_offset;
+       if (rdev->family == CHIP_KAVERI)
+               max_me = 5;
+       if (rdev->rlc.cp_table_ptr == NULL)
                return;
  
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       if (vm->id < 8) {
-               radeon_ring_write(ring,
-                                 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
-       } else {
-               radeon_ring_write(ring,
-                                 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+       /* write the cp table buffer */
+       dst_ptr = rdev->rlc.cp_table_ptr;
+       for (me = 0; me < max_me; me++) {
+               if (me == 0) {
+                       fw_data = (const __be32 *)rdev->ce_fw->data;
+                       table_offset = CP_ME_TABLE_OFFSET;
+               } else if (me == 1) {
+                       fw_data = (const __be32 *)rdev->pfp_fw->data;
+                       table_offset = CP_ME_TABLE_OFFSET;
+               } else if (me == 2) {
+                       fw_data = (const __be32 *)rdev->me_fw->data;
+                       table_offset = CP_ME_TABLE_OFFSET;
+               } else {
+                       fw_data = (const __be32 *)rdev->mec_fw->data;
+                       table_offset = CP_MEC_TABLE_OFFSET;
+               }
+               for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
+                       dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
+               }
+               bo_offset += CP_ME_TABLE_SIZE;
        }
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+ }
  
-       /* update SH_MEM_* regs */
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, VMID(vm->id));
+ static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
+                               bool enable)
+ {
+       u32 data, orig;
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
+               orig = data = RREG32(RLC_PG_CNTL);
+               data |= GFX_PG_ENABLE;
+               if (orig != data)
+                       WREG32(RLC_PG_CNTL, data);
+               orig = data = RREG32(RLC_AUTO_PG_CTRL);
+               data |= AUTO_PG_EN;
+               if (orig != data)
+                       WREG32(RLC_AUTO_PG_CTRL, data);
+       } else {
+               orig = data = RREG32(RLC_PG_CNTL);
+               data &= ~GFX_PG_ENABLE;
+               if (orig != data)
+                       WREG32(RLC_PG_CNTL, data);
  
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       radeon_ring_write(ring, SH_MEM_BASES >> 2);
-       radeon_ring_write(ring, 0);
+               orig = data = RREG32(RLC_AUTO_PG_CTRL);
+               data &= ~AUTO_PG_EN;
+               if (orig != data)
+                       WREG32(RLC_AUTO_PG_CTRL, data);
  
-       radeon_ring_write(ring, 0); /* SH_MEM_BASES */
-       radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
-       radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
-       radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
+               data = RREG32(DB_RENDER_CONTROL);
+       }
+ }
  
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, VMID(0));
+ static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
+ {
+       u32 mask = 0, tmp, tmp1;
+       int i;
  
-       /* HDP flush */
-       /* We should be using the WAIT_REG_MEM packet here like in
-        * cik_fence_ring_emit(), but it causes the CP to hang in this
-        * context...
-        */
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, 0);
+       cik_select_se_sh(rdev, se, sh);
+       tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
+       tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
+       cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
  
-       /* bits 0-15 are the VM contexts0-15 */
-       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(0)));
-       radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, 1 << vm->id);
+       tmp &= 0xffff0000;
  
-       /* compute doesn't have PFP */
-       if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
-               /* sync PFP to ME, otherwise we might get invalid PFP reads */
-               radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
-               radeon_ring_write(ring, 0x0);
+       tmp |= tmp1;
+       tmp >>= 16;
+       for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
+               mask <<= 1;
+               mask |= 1;
        }
+       return (~tmp) & mask;
  }
  
- /**
-  * cik_vm_set_page - update the page tables using sDMA
-  *
-  * @rdev: radeon_device pointer
-  * @ib: indirect buffer to fill with commands
-  * @pe: addr of the page entry
-  * @addr: dst addr to write into pe
-  * @count: number of page entries to update
-  * @incr: increase next addr by incr bytes
-  * @flags: access flags
-  *
-  * Update the page tables using CP or sDMA (CIK).
-  */
- void cik_vm_set_page(struct radeon_device *rdev,
-                    struct radeon_ib *ib,
-                    uint64_t pe,
-                    uint64_t addr, unsigned count,
-                    uint32_t incr, uint32_t flags)
+ static void cik_init_ao_cu_mask(struct radeon_device *rdev)
  {
-       uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
-       uint64_t value;
-       unsigned ndw;
-       if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
-               /* CP */
-               while (count) {
-                       ndw = 2 + count * 2;
-                       if (ndw > 0x3FFE)
-                               ndw = 0x3FFE;
+       u32 i, j, k, active_cu_number = 0;
+       u32 mask, counter, cu_bitmap;
+       u32 tmp = 0;
  
-                       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
-                       ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
-                                                   WRITE_DATA_DST_SEL(1));
-                       ib->ptr[ib->length_dw++] = pe;
-                       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-                       for (; ndw > 2; ndw -= 2, --count, pe += 8) {
-                               if (flags & RADEON_VM_PAGE_SYSTEM) {
-                                       value = radeon_vm_map_gart(rdev, addr);
-                                       value &= 0xFFFFFFFFFFFFF000ULL;
-                               } else if (flags & RADEON_VM_PAGE_VALID) {
-                                       value = addr;
-                               } else {
-                                       value = 0;
-                               }
-                               addr += incr;
-                               value |= r600_flags;
-                               ib->ptr[ib->length_dw++] = value;
-                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-                       }
-               }
-       } else {
-               /* DMA */
-               if (flags & RADEON_VM_PAGE_SYSTEM) {
-                       while (count) {
-                               ndw = count * 2;
-                               if (ndw > 0xFFFFE)
-                                       ndw = 0xFFFFE;
-                               /* for non-physically contiguous pages (system) */
-                               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
-                               ib->ptr[ib->length_dw++] = pe;
-                               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-                               ib->ptr[ib->length_dw++] = ndw;
-                               for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-                                       if (flags & RADEON_VM_PAGE_SYSTEM) {
-                                               value = radeon_vm_map_gart(rdev, addr);
-                                               value &= 0xFFFFFFFFFFFFF000ULL;
-                                       } else if (flags & RADEON_VM_PAGE_VALID) {
-                                               value = addr;
-                                       } else {
-                                               value = 0;
-                                       }
-                                       addr += incr;
-                                       value |= r600_flags;
-                                       ib->ptr[ib->length_dw++] = value;
-                                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
+       for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+               for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+                       mask = 1;
+                       cu_bitmap = 0;
+                       counter = 0;
+                       for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
+                               if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
+                                       if (counter < 2)
+                                               cu_bitmap |= mask;
+                                       counter ++;
                                }
+                               mask <<= 1;
                        }
-               } else {
-                       while (count) {
-                               ndw = count;
-                               if (ndw > 0x7FFFF)
-                                       ndw = 0x7FFFF;
  
-                               if (flags & RADEON_VM_PAGE_VALID)
-                                       value = addr;
-                               else
-                                       value = 0;
-                               /* for physically contiguous pages (vram) */
-                               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
-                               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-                               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-                               ib->ptr[ib->length_dw++] = r600_flags; /* mask */
-                               ib->ptr[ib->length_dw++] = 0;
-                               ib->ptr[ib->length_dw++] = value; /* value */
-                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-                               ib->ptr[ib->length_dw++] = incr; /* increment size */
-                               ib->ptr[ib->length_dw++] = 0;
-                               ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-                               pe += ndw * 8;
-                               addr += ndw * incr;
-                               count -= ndw;
-                       }
+                       active_cu_number += counter;
+                       tmp |= (cu_bitmap << (i * 16 + j * 8));
                }
-               while (ib->length_dw & 0x7)
-                       ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
        }
+       WREG32(RLC_PG_AO_CU_MASK, tmp);
+       tmp = RREG32(RLC_MAX_PG_CU);
+       tmp &= ~MAX_PU_CU_MASK;
+       tmp |= MAX_PU_CU(active_cu_number);
+       WREG32(RLC_MAX_PG_CU, tmp);
  }
  
- /**
-  * cik_dma_vm_flush - cik vm flush using sDMA
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Update the page table base and flush the VM TLB
-  * using sDMA (CIK).
-  */
- void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+ static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
+                                      bool enable)
  {
-       struct radeon_ring *ring = &rdev->ring[ridx];
-       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
-                         SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
-       u32 ref_and_mask;
+       u32 data, orig;
  
-       if (vm == NULL)
-               return;
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
+               data |= STATIC_PER_CU_PG_ENABLE;
+       else
+               data &= ~STATIC_PER_CU_PG_ENABLE;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
+ }
+ static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
+                                       bool enable)
+ {
+       u32 data, orig;
  
-       if (ridx == R600_RING_TYPE_DMA_INDEX)
-               ref_and_mask = SDMA0;
+       orig = data = RREG32(RLC_PG_CNTL);
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
+               data |= DYN_PER_CU_PG_ENABLE;
        else
-               ref_and_mask = SDMA1;
+               data &= ~DYN_PER_CU_PG_ENABLE;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
+ }
  
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       if (vm->id < 8) {
-               radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+ #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
+ #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
+ static void cik_init_gfx_cgpg(struct radeon_device *rdev)
+ {
+       u32 data, orig;
+       u32 i;
+       if (rdev->rlc.cs_data) {
+               WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
+               WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
+               WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
+               WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
        } else {
-               radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+               WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
+               for (i = 0; i < 3; i++)
+                       WREG32(RLC_GPM_SCRATCH_DATA, 0);
+       }
+       if (rdev->rlc.reg_list) {
+               WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
+               for (i = 0; i < rdev->rlc.reg_list_size; i++)
+                       WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
        }
-       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
-       /* update SH_MEM_* regs */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-       radeon_ring_write(ring, VMID(vm->id));
  
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SH_MEM_BASES >> 2);
-       radeon_ring_write(ring, 0);
+       orig = data = RREG32(RLC_PG_CNTL);
+       data |= GFX_PG_SRC;
+       if (orig != data)
+               WREG32(RLC_PG_CNTL, data);
  
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
-       radeon_ring_write(ring, 0);
+       WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
+       WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
  
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
-       radeon_ring_write(ring, 1);
+       data = RREG32(CP_RB_WPTR_POLL_CNTL);
+       data &= ~IDLE_POLL_COUNT_MASK;
+       data |= IDLE_POLL_COUNT(0x60);
+       WREG32(CP_RB_WPTR_POLL_CNTL, data);
  
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
-       radeon_ring_write(ring, 0);
+       data = 0x10101010;
+       WREG32(RLC_PG_DELAY, data);
  
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-       radeon_ring_write(ring, VMID(0));
+       data = RREG32(RLC_PG_DELAY_2);
+       data &= ~0xff;
+       data |= 0x3;
+       WREG32(RLC_PG_DELAY_2, data);
  
-       /* flush HDP */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
-       radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
-       radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
-       radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
-       radeon_ring_write(ring, ref_and_mask); /* MASK */
-       radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
+       data = RREG32(RLC_AUTO_PG_CTRL);
+       data &= ~GRBM_REG_SGIT_MASK;
+       data |= GRBM_REG_SGIT(0x700);
+       WREG32(RLC_AUTO_PG_CTRL, data);
  
-       /* flush TLB */
-       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-       radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
-       radeon_ring_write(ring, 1 << vm->id);
  }
  
- /*
-  * RLC
-  * The RLC is a multi-purpose microengine that handles a
-  * variety of functions, the most important of which is
-  * the interrupt controller.
-  */
- /**
-  * cik_rlc_stop - stop the RLC ME
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Halt the RLC ME (MicroEngine) (CIK).
-  */
- static void cik_rlc_stop(struct radeon_device *rdev)
+ static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
  {
-       int i, j, k;
-       u32 mask, tmp;
-       tmp = RREG32(CP_INT_CNTL_RING0);
-       tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
-       WREG32(CP_INT_CNTL_RING0, tmp);
-       RREG32(CB_CGTT_SCLK_CTRL);
-       RREG32(CB_CGTT_SCLK_CTRL);
-       RREG32(CB_CGTT_SCLK_CTRL);
-       RREG32(CB_CGTT_SCLK_CTRL);
+       cik_enable_gfx_cgpg(rdev, enable);
+       cik_enable_gfx_static_mgpg(rdev, enable);
+       cik_enable_gfx_dynamic_mgpg(rdev, enable);
+ }
  
-       tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
-       WREG32(RLC_CGCG_CGLS_CTRL, tmp);
+ u32 cik_get_csb_size(struct radeon_device *rdev)
+ {
+       u32 count = 0;
+       const struct cs_section_def *sect = NULL;
+       const struct cs_extent_def *ext = NULL;
  
-       WREG32(RLC_CNTL, 0);
+       if (rdev->rlc.cs_data == NULL)
+               return 0;
  
-       for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
-               for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
-                       cik_select_se_sh(rdev, i, j);
-                       for (k = 0; k < rdev->usec_timeout; k++) {
-                               if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
-                                       break;
-                               udelay(1);
-                       }
+       /* begin clear state */
+       count += 2;
+       /* context control state */
+       count += 3;
+       for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+               for (ext = sect->section; ext->extent != NULL; ++ext) {
+                       if (sect->id == SECT_CONTEXT)
+                               count += 2 + ext->reg_count;
+                       else
+                               return 0;
                }
        }
-       cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+       /* pa_sc_raster_config/pa_sc_raster_config1 */
+       count += 4;
+       /* end clear state */
+       count += 2;
+       /* clear state */
+       count += 2;
  
-       mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
-       for (k = 0; k < rdev->usec_timeout; k++) {
-               if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
-                       break;
-               udelay(1);
-       }
+       return count;
  }
  
- /**
-  * cik_rlc_start - start the RLC ME
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Unhalt the RLC ME (MicroEngine) (CIK).
-  */
- static void cik_rlc_start(struct radeon_device *rdev)
+ void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
  {
-       u32 tmp;
-       WREG32(RLC_CNTL, RLC_ENABLE);
-       tmp = RREG32(CP_INT_CNTL_RING0);
-       tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
-       WREG32(CP_INT_CNTL_RING0, tmp);
-       udelay(50);
- }
+       u32 count = 0, i;
+       const struct cs_section_def *sect = NULL;
+       const struct cs_extent_def *ext = NULL;
  
- /**
-  * cik_rlc_resume - setup the RLC hw
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Initialize the RLC registers, load the ucode,
-  * and start the RLC (CIK).
-  * Returns 0 for success, -EINVAL if the ucode is not available.
-  */
- static int cik_rlc_resume(struct radeon_device *rdev)
- {
-       u32 i, size;
-       u32 clear_state_info[3];
-       const __be32 *fw_data;
+       if (rdev->rlc.cs_data == NULL)
+               return;
+       if (buffer == NULL)
+               return;
  
-       if (!rdev->rlc_fw)
-               return -EINVAL;
+       buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+       buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
+       buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
+       buffer[count++] = 0x80000000;
+       buffer[count++] = 0x80000000;
+       for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+               for (ext = sect->section; ext->extent != NULL; ++ext) {
+                       if (sect->id == SECT_CONTEXT) {
+                               buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
+                               buffer[count++] = ext->reg_index - 0xa000;
+                               for (i = 0; i < ext->reg_count; i++)
+                                       buffer[count++] = ext->extent[i];
+                       } else {
+                               return;
+                       }
+               }
+       }
  
+       buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
+       buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
        switch (rdev->family) {
        case CHIP_BONAIRE:
-       default:
-               size = BONAIRE_RLC_UCODE_SIZE;
+               buffer[count++] = 0x16000012;
+               buffer[count++] = 0x00000000;
                break;
        case CHIP_KAVERI:
-               size = KV_RLC_UCODE_SIZE;
+               buffer[count++] = 0x00000000; /* XXX */
+               buffer[count++] = 0x00000000;
                break;
        case CHIP_KABINI:
-               size = KB_RLC_UCODE_SIZE;
+               buffer[count++] = 0x00000000; /* XXX */
+               buffer[count++] = 0x00000000;
+               break;
+       default:
+               buffer[count++] = 0x00000000;
+               buffer[count++] = 0x00000000;
                break;
        }
  
-       cik_rlc_stop(rdev);
-       WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
-       RREG32(GRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(GRBM_SOFT_RESET, 0);
-       RREG32(GRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(RLC_LB_CNTR_INIT, 0);
-       WREG32(RLC_LB_CNTR_MAX, 0x00008000);
-       cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
-       WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
-       WREG32(RLC_LB_PARAMS, 0x00600408);
-       WREG32(RLC_LB_CNTL, 0x80000004);
-       WREG32(RLC_MC_CNTL, 0);
-       WREG32(RLC_UCODE_CNTL, 0);
-       fw_data = (const __be32 *)rdev->rlc_fw->data;
-               WREG32(RLC_GPM_UCODE_ADDR, 0);
-       for (i = 0; i < size; i++)
-               WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
-       WREG32(RLC_GPM_UCODE_ADDR, 0);
+       buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+       buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
  
-       /* XXX */
-       clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
-       clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
-       clear_state_info[2] = 0;//cik_default_size;
-       WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
-       for (i = 0; i < 3; i++)
-               WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
-       WREG32(RLC_DRIVER_DMA_STATUS, 0);
+       buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
+       buffer[count++] = 0;
+ }
  
-       cik_rlc_start(rdev);
+ static void cik_init_pg(struct radeon_device *rdev)
+ {
+       if (rdev->pg_flags) {
+               cik_enable_sck_slowdown_on_pu(rdev, true);
+               cik_enable_sck_slowdown_on_pd(rdev, true);
+               if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
+                       cik_init_gfx_cgpg(rdev);
+                       cik_enable_cp_pg(rdev, true);
+                       cik_enable_gds_pg(rdev, true);
+               }
+               cik_init_ao_cu_mask(rdev);
+               cik_update_gfx_pg(rdev, true);
+       }
+ }
  
-       return 0;
+ static void cik_fini_pg(struct radeon_device *rdev)
+ {
+       if (rdev->pg_flags) {
+               cik_update_gfx_pg(rdev, false);
+               if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
+                       cik_enable_cp_pg(rdev, false);
+                       cik_enable_gds_pg(rdev, false);
+               }
+       }
  }
  
  /*
@@@ -5030,7 -5988,7 +5988,7 @@@ static int cik_irq_init(struct radeon_d
        WREG32(INTERRUPT_CNTL, interrupt_cntl);
  
        WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
 -      rb_bufsz = drm_order(rdev->ih.ring_size / 4);
 +      rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
  
        ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
                      IH_WPTR_OVERFLOW_CLEAR |
@@@ -5086,6 -6044,7 +6044,7 @@@ int cik_irq_set(struct radeon_device *r
        u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
        u32 grbm_int_cntl = 0;
        u32 dma_cntl, dma_cntl1;
+       u32 thermal_int;
  
        if (!rdev->irq.installed) {
                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
        cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
        cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
  
+       if (rdev->flags & RADEON_IS_IGP)
+               thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
+                       ~(THERM_INTH_MASK | THERM_INTL_MASK);
+       else
+               thermal_int = RREG32_SMC(CG_THERMAL_INT) &
+                       ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
        /* enable CP interrupts on all rings */
        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
                DRM_DEBUG("cik_irq_set: sw int gfx\n");
                hpd6 |= DC_HPDx_INT_EN;
        }
  
+       if (rdev->irq.dpm_thermal) {
+               DRM_DEBUG("dpm thermal\n");
+               if (rdev->flags & RADEON_IS_IGP)
+                       thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
+               else
+                       thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
+       }
        WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
  
        WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
        WREG32(DC_HPD5_INT_CONTROL, hpd5);
        WREG32(DC_HPD6_INT_CONTROL, hpd6);
  
+       if (rdev->flags & RADEON_IS_IGP)
+               WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
+       else
+               WREG32_SMC(CG_THERMAL_INT, thermal_int);
        return 0;
  }
  
@@@ -5520,6 -6499,7 +6499,7 @@@ int cik_irq_process(struct radeon_devic
        bool queue_hotplug = false;
        bool queue_reset = false;
        u32 addr, status, mc_client;
+       bool queue_thermal = false;
  
        if (!rdev->ih.enabled || rdev->shutdown)
                return IRQ_NONE;
@@@ -5753,6 -6733,10 +6733,10 @@@ restart_ih
                                break;
                        }
                        break;
+               case 124: /* UVD */
+                       DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
+                       radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
+                       break;
                case 146:
                case 147:
                        addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
                                break;
                        }
                        break;
+               case 230: /* thermal low to high */
+                       DRM_DEBUG("IH: thermal low to high\n");
+                       rdev->pm.dpm.thermal.high_to_low = false;
+                       queue_thermal = true;
+                       break;
+               case 231: /* thermal high to low */
+                       DRM_DEBUG("IH: thermal high to low\n");
+                       rdev->pm.dpm.thermal.high_to_low = true;
+                       queue_thermal = true;
+                       break;
+               case 233: /* GUI IDLE */
+                       DRM_DEBUG("IH: GUI idle\n");
+                       break;
                case 241: /* SDMA Privileged inst */
                case 247: /* SDMA Privileged inst */
                        DRM_ERROR("Illegal instruction in SDMA command stream\n");
                                break;
                        }
                        break;
-               case 233: /* GUI IDLE */
-                       DRM_DEBUG("IH: GUI idle\n");
-                       break;
                default:
                        DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
                        break;
                schedule_work(&rdev->hotplug_work);
        if (queue_reset)
                schedule_work(&rdev->reset_work);
+       if (queue_thermal)
+               schedule_work(&rdev->pm.dpm.thermal.work);
        rdev->ih.rptr = rptr;
        WREG32(IH_RB_RPTR, rdev->ih.rptr);
        atomic_set(&rdev->ih.lock, 0);
@@@ -5954,6 -6950,18 +6950,18 @@@ static int cik_startup(struct radeon_de
        struct radeon_ring *ring;
        int r;
  
+       /* enable pcie gen2/3 link */
+       cik_pcie_gen3_enable(rdev);
+       /* enable aspm */
+       cik_program_aspm(rdev);
+       /* scratch needs to be initialized before MC */
+       r = r600_vram_scratch_init(rdev);
+       if (r)
+               return r;
+       cik_mc_program(rdev);
        if (rdev->flags & RADEON_IS_IGP) {
                if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
                    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
                }
        }
  
-       r = r600_vram_scratch_init(rdev);
-       if (r)
-               return r;
-       cik_mc_program(rdev);
        r = cik_pcie_gart_enable(rdev);
        if (r)
                return r;
        cik_gpu_init(rdev);
  
        /* allocate rlc buffers */
-       r = si_rlc_init(rdev);
+       if (rdev->flags & RADEON_IS_IGP) {
+               if (rdev->family == CHIP_KAVERI) {
+                       rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
+                       rdev->rlc.reg_list_size =
+                               (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
+               } else {
+                       rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
+                       rdev->rlc.reg_list_size =
+                               (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
+               }
+       }
+       rdev->rlc.cs_data = ci_cs_data;
+       rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+       r = sumo_rlc_init(rdev);
        if (r) {
                DRM_ERROR("Failed to init rlc BOs!\n");
                return r;
                return r;
        }
  
-       r = cik_uvd_resume(rdev);
+       r = radeon_uvd_resume(rdev);
        if (!r) {
-               r = radeon_fence_driver_start_ring(rdev,
-                                                  R600_RING_TYPE_UVD_INDEX);
-               if (r)
-                       dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+               r = uvd_v4_2_resume(rdev);
+               if (!r) {
+                       r = radeon_fence_driver_start_ring(rdev,
+                                                          R600_RING_TYPE_UVD_INDEX);
+                       if (r)
+                               dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+               }
        }
        if (r)
                rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
                             CP_RB0_RPTR, CP_RB0_WPTR,
-                            0, 0xfffff, RADEON_CP_PACKET2);
+                            RADEON_CP_PACKET2);
        if (r)
                return r;
  
        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
                             CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
-                            0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
+                            PACKET3(PACKET3_NOP, 0x3FFF));
        if (r)
                return r;
        ring->me = 1; /* first MEC */
        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
                             CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
-                            0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
+                            PACKET3(PACKET3_NOP, 0x3FFF));
        if (r)
                return r;
        /* dGPU only have 1 MEC */
        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
                             SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
                             SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
-                            2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+                            SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
        if (r)
                return r;
  
        r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
                             SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
                             SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
-                            2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+                            SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
        if (r)
                return r;
  
  
        ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
        if (ring->ring_size) {
-               r = radeon_ring_init(rdev, ring, ring->ring_size,
-                                    R600_WB_UVD_RPTR_OFFSET,
+               r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
                                     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
-                                    0, 0xfffff, RADEON_CP_PACKET2);
+                                    RADEON_CP_PACKET2);
                if (!r)
-                       r = r600_uvd_init(rdev);
+                       r = uvd_v1_0_init(rdev);
                if (r)
                        DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
        }
                return r;
        }
  
+       r = dce6_audio_init(rdev);
+       if (r)
+               return r;
        return 0;
  }
  
@@@ -6191,11 -7213,14 +7213,14 @@@ int cik_resume(struct radeon_device *rd
   */
  int cik_suspend(struct radeon_device *rdev)
  {
+       dce6_audio_fini(rdev);
        radeon_vm_manager_fini(rdev);
        cik_cp_enable(rdev, false);
        cik_sdma_enable(rdev, false);
-       r600_uvd_rbc_stop(rdev);
+       uvd_v1_0_fini(rdev);
        radeon_uvd_suspend(rdev);
+       cik_fini_pg(rdev);
+       cik_fini_cg(rdev);
        cik_irq_suspend(rdev);
        radeon_wb_disable(rdev);
        cik_pcie_gart_disable(rdev);
@@@ -6316,7 -7341,7 +7341,7 @@@ int cik_init(struct radeon_device *rdev
                cik_cp_fini(rdev);
                cik_sdma_fini(rdev);
                cik_irq_fini(rdev);
-               si_rlc_fini(rdev);
+               sumo_rlc_fini(rdev);
                cik_mec_fini(rdev);
                radeon_wb_fini(rdev);
                radeon_ib_pool_fini(rdev);
@@@ -6351,13 -7376,16 +7376,16 @@@ void cik_fini(struct radeon_device *rde
  {
        cik_cp_fini(rdev);
        cik_sdma_fini(rdev);
+       cik_fini_pg(rdev);
+       cik_fini_cg(rdev);
        cik_irq_fini(rdev);
-       si_rlc_fini(rdev);
+       sumo_rlc_fini(rdev);
        cik_mec_fini(rdev);
        radeon_wb_fini(rdev);
        radeon_vm_manager_fini(rdev);
        radeon_ib_pool_fini(rdev);
        radeon_irq_kms_fini(rdev);
+       uvd_v1_0_fini(rdev);
        radeon_uvd_fini(rdev);
        cik_pcie_gart_fini(rdev);
        r600_vram_scratch_fini(rdev);
@@@ -6386,8 -7414,8 +7414,8 @@@ static u32 dce8_line_buffer_adjust(stru
                                   struct radeon_crtc *radeon_crtc,
                                   struct drm_display_mode *mode)
  {
-       u32 tmp;
+       u32 tmp, buffer_alloc, i;
+       u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
        /*
         * Line Buffer Setup
         * There are 6 line buffers, one for each display controllers.
         * them using the stereo blender.
         */
        if (radeon_crtc->base.enabled && mode) {
-               if (mode->crtc_hdisplay < 1920)
+               if (mode->crtc_hdisplay < 1920) {
                        tmp = 1;
-               else if (mode->crtc_hdisplay < 2560)
+                       buffer_alloc = 2;
+               } else if (mode->crtc_hdisplay < 2560) {
                        tmp = 2;
-               else if (mode->crtc_hdisplay < 4096)
+                       buffer_alloc = 2;
+               } else if (mode->crtc_hdisplay < 4096) {
                        tmp = 0;
-               else {
+                       buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
+               } else {
                        DRM_DEBUG_KMS("Mode too big for LB!\n");
                        tmp = 0;
+                       buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
                }
-       } else
+       } else {
                tmp = 1;
+               buffer_alloc = 0;
+       }
  
        WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
               LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
  
+       WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+              DMIF_BUFFERS_ALLOCATED(buffer_alloc));
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+                   DMIF_BUFFERS_ALLOCATED_COMPLETED)
+                       break;
+               udelay(1);
+       }
        if (radeon_crtc->base.enabled && mode) {
                switch (tmp) {
                case 0:
@@@ -6814,7 -7857,7 +7857,7 @@@ static void dce8_program_watermarks(str
                                    u32 lb_size, u32 num_heads)
  {
        struct drm_display_mode *mode = &radeon_crtc->base.mode;
-       struct dce8_wm_params wm;
+       struct dce8_wm_params wm_low, wm_high;
        u32 pixel_period;
        u32 line_time = 0;
        u32 latency_watermark_a = 0, latency_watermark_b = 0;
                pixel_period = 1000000 / (u32)mode->clock;
                line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
  
-               wm.yclk = rdev->pm.current_mclk * 10;
-               wm.sclk = rdev->pm.current_sclk * 10;
-               wm.disp_clk = mode->clock;
-               wm.src_width = mode->crtc_hdisplay;
-               wm.active_time = mode->crtc_hdisplay * pixel_period;
-               wm.blank_time = line_time - wm.active_time;
-               wm.interlaced = false;
+               /* watermark for high clocks */
+               if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
+                   rdev->pm.dpm_enabled) {
+                       wm_high.yclk =
+                               radeon_dpm_get_mclk(rdev, false) * 10;
+                       wm_high.sclk =
+                               radeon_dpm_get_sclk(rdev, false) * 10;
+               } else {
+                       wm_high.yclk = rdev->pm.current_mclk * 10;
+                       wm_high.sclk = rdev->pm.current_sclk * 10;
+               }
+               wm_high.disp_clk = mode->clock;
+               wm_high.src_width = mode->crtc_hdisplay;
+               wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+               wm_high.blank_time = line_time - wm_high.active_time;
+               wm_high.interlaced = false;
                if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-                       wm.interlaced = true;
-               wm.vsc = radeon_crtc->vsc;
-               wm.vtaps = 1;
+                       wm_high.interlaced = true;
+               wm_high.vsc = radeon_crtc->vsc;
+               wm_high.vtaps = 1;
                if (radeon_crtc->rmx_type != RMX_OFF)
-                       wm.vtaps = 2;
-               wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
-               wm.lb_size = lb_size;
-               wm.dram_channels = cik_get_number_of_dram_channels(rdev);
-               wm.num_heads = num_heads;
+                       wm_high.vtaps = 2;
+               wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+               wm_high.lb_size = lb_size;
+               wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
+               wm_high.num_heads = num_heads;
  
                /* set for high clocks */
-               latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
+               latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
+               /* possibly force display priority to high */
+               /* should really do this at mode validation time... */
+               if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+                   !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+                   !dce8_check_latency_hiding(&wm_high) ||
+                   (rdev->disp_priority == 2)) {
+                       DRM_DEBUG_KMS("force priority to high\n");
+               }
+               /* watermark for low clocks */
+               if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
+                   rdev->pm.dpm_enabled) {
+                       wm_low.yclk =
+                               radeon_dpm_get_mclk(rdev, true) * 10;
+                       wm_low.sclk =
+                               radeon_dpm_get_sclk(rdev, true) * 10;
+               } else {
+                       wm_low.yclk = rdev->pm.current_mclk * 10;
+                       wm_low.sclk = rdev->pm.current_sclk * 10;
+               }
+               wm_low.disp_clk = mode->clock;
+               wm_low.src_width = mode->crtc_hdisplay;
+               wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+               wm_low.blank_time = line_time - wm_low.active_time;
+               wm_low.interlaced = false;
+               if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+                       wm_low.interlaced = true;
+               wm_low.vsc = radeon_crtc->vsc;
+               wm_low.vtaps = 1;
+               if (radeon_crtc->rmx_type != RMX_OFF)
+                       wm_low.vtaps = 2;
+               wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+               wm_low.lb_size = lb_size;
+               wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
+               wm_low.num_heads = num_heads;
                /* set for low clocks */
-               /* wm.yclk = low clk; wm.sclk = low clk */
-               latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
+               latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
  
                /* possibly force display priority to high */
                /* should really do this at mode validation time... */
-               if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
-                   !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
-                   !dce8_check_latency_hiding(&wm) ||
+               if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+                   !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+                   !dce8_check_latency_hiding(&wm_low) ||
                    (rdev->disp_priority == 2)) {
                        DRM_DEBUG_KMS("force priority to high\n");
                }
                LATENCY_HIGH_WATERMARK(line_time)));
        /* restore original selection */
        WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
+       /* save values for DPM */
+       radeon_crtc->line_time = line_time;
+       radeon_crtc->wm_high = latency_watermark_a;
+       radeon_crtc->wm_low = latency_watermark_b;
  }
  
  /**
@@@ -6966,39 -8061,307 +8061,307 @@@ int cik_set_uvd_clocks(struct radeon_de
        return r;
  }
  
int cik_uvd_resume(struct radeon_device *rdev)
static void cik_pcie_gen3_enable(struct radeon_device *rdev)
  {
-       uint64_t addr;
-       uint32_t size;
-       int r;
+       struct pci_dev *root = rdev->pdev->bus->self;
+       int bridge_pos, gpu_pos;
+       u32 speed_cntl, mask, current_data_rate;
+       int ret, i;
+       u16 tmp16;
  
-       r = radeon_uvd_resume(rdev);
-       if (r)
-               return r;
+       if (radeon_pcie_gen2 == 0)
+               return;
  
-       /* programm the VCPU memory controller bits 0-27 */
-       addr = rdev->uvd.gpu_addr >> 3;
-       size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3;
-       WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
-       WREG32(UVD_VCPU_CACHE_SIZE0, size);
+       if (rdev->flags & RADEON_IS_IGP)
+               return;
  
-       addr += size;
-       size = RADEON_UVD_STACK_SIZE >> 3;
-       WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
-       WREG32(UVD_VCPU_CACHE_SIZE1, size);
+       if (!(rdev->flags & RADEON_IS_PCIE))
+               return;
  
-       addr += size;
-       size = RADEON_UVD_HEAP_SIZE >> 3;
-       WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
-       WREG32(UVD_VCPU_CACHE_SIZE2, size);
+       ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
+       if (ret != 0)
+               return;
  
-       /* bits 28-31 */
-       addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
-       WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+       if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
+               return;
  
-       /* bits 32-39 */
-       addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
-       WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+       speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+       current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
+               LC_CURRENT_DATA_RATE_SHIFT;
+       if (mask & DRM_PCIE_SPEED_80) {
+               if (current_data_rate == 2) {
+                       DRM_INFO("PCIE gen 3 link speeds already enabled\n");
+                       return;
+               }
+               DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
+       } else if (mask & DRM_PCIE_SPEED_50) {
+               if (current_data_rate == 1) {
+                       DRM_INFO("PCIE gen 2 link speeds already enabled\n");
+                       return;
+               }
+               DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
+       }
  
-       return 0;
+       bridge_pos = pci_pcie_cap(root);
+       if (!bridge_pos)
+               return;
+       gpu_pos = pci_pcie_cap(rdev->pdev);
+       if (!gpu_pos)
+               return;
+       if (mask & DRM_PCIE_SPEED_80) {
+               /* re-try equalization if gen3 is not already enabled */
+               if (current_data_rate != 2) {
+                       u16 bridge_cfg, gpu_cfg;
+                       u16 bridge_cfg2, gpu_cfg2;
+                       u32 max_lw, current_lw, tmp;
+                       pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
+                       pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+                       tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
+                       pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+                       tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
+                       pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+                       tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
+                       max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
+                       current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+                       if (current_lw < max_lw) {
+                               tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+                               if (tmp & LC_RENEGOTIATION_SUPPORT) {
+                                       tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
+                                       tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
+                                       tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
+                                       WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+                               }
+                       }
+                       for (i = 0; i < 10; i++) {
+                               /* check status */
+                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+                               if (tmp16 & PCI_EXP_DEVSTA_TRPND)
+                                       break;
+                               pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
+                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+                               pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
+                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+                               tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+                               tmp |= LC_SET_QUIESCE;
+                               WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+                               tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+                               tmp |= LC_REDO_EQ;
+                               WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+                               mdelay(100);
+                               /* linkctl */
+                               pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+                               tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+                               tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
+                               pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+                               tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+                               tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
+                               pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+                               /* linkctl2 */
+                               pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
+                               tmp16 &= ~((1 << 4) | (7 << 9));
+                               tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
+                               pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
+                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
+                               tmp16 &= ~((1 << 4) | (7 << 9));
+                               tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
+                               pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+                               tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+                               tmp &= ~LC_SET_QUIESCE;
+                               WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+                       }
+               }
+       }
+       /* set the link speed */
+       speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
+       speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
+       WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+       pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
+       tmp16 &= ~0xf;
+       if (mask & DRM_PCIE_SPEED_80)
+               tmp16 |= 3; /* gen3 */
+       else if (mask & DRM_PCIE_SPEED_50)
+               tmp16 |= 2; /* gen2 */
+       else
+               tmp16 |= 1; /* gen1 */
+       pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+       speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+       speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
+       WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+               if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+                       break;
+               udelay(1);
+       }
+ }
+ static void cik_program_aspm(struct radeon_device *rdev)
+ {
+       u32 data, orig;
+       bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
+       bool disable_clkreq = false;
+       if (radeon_aspm == 0)
+               return;
+       /* XXX double check IGPs */
+       if (rdev->flags & RADEON_IS_IGP)
+               return;
+       if (!(rdev->flags & RADEON_IS_PCIE))
+               return;
+       orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+       data &= ~LC_XMIT_N_FTS_MASK;
+       data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+       if (orig != data)
+               WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+       orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
+       data |= LC_GO_TO_RECOVERY;
+       if (orig != data)
+               WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+       orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
+       data |= P_IGNORE_EDB_ERR;
+       if (orig != data)
+               WREG32_PCIE_PORT(PCIE_P_CNTL, data);
+       orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+       data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
+       data |= LC_PMI_TO_L1_DIS;
+       if (!disable_l0s)
+               data |= LC_L0S_INACTIVITY(7);
+       if (!disable_l1) {
+               data |= LC_L1_INACTIVITY(7);
+               data &= ~LC_PMI_TO_L1_DIS;
+               if (orig != data)
+                       WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+               if (!disable_plloff_in_l1) {
+                       bool clk_req_support;
+                       orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
+                       data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+                       data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+                       if (orig != data)
+                               WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
+                       orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
+                       data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+                       data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+                       if (orig != data)
+                               WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
+                       orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
+                       data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+                       data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+                       if (orig != data)
+                               WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
+                       orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
+                       data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+                       data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+                       if (orig != data)
+                               WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
+                       orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+                       data &= ~LC_DYN_LANES_PWR_STATE_MASK;
+                       data |= LC_DYN_LANES_PWR_STATE(3);
+                       if (orig != data)
+                               WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+                       if (!disable_clkreq) {
+                               struct pci_dev *root = rdev->pdev->bus->self;
+                               u32 lnkcap;
+                               clk_req_support = false;
+                               pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
+                               if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
+                                       clk_req_support = true;
+                       } else {
+                               clk_req_support = false;
+                       }
+                       if (clk_req_support) {
+                               orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
+                               data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+                               if (orig != data)
+                                       WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+                               orig = data = RREG32_SMC(THM_CLK_CNTL);
+                               data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
+                               data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+                               if (orig != data)
+                                       WREG32_SMC(THM_CLK_CNTL, data);
+                               orig = data = RREG32_SMC(MISC_CLK_CTRL);
+                               data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
+                               data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+                               if (orig != data)
+                                       WREG32_SMC(MISC_CLK_CTRL, data);
+                               orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
+                               data &= ~BCLK_AS_XCLK;
+                               if (orig != data)
+                                       WREG32_SMC(CG_CLKPIN_CNTL, data);
+                               orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
+                               data &= ~FORCE_BIF_REFCLK_EN;
+                               if (orig != data)
+                                       WREG32_SMC(CG_CLKPIN_CNTL_2, data);
+                               orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
+                               data &= ~MPLL_CLKOUT_SEL_MASK;
+                               data |= MPLL_CLKOUT_SEL(4);
+                               if (orig != data)
+                                       WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
+                       }
+               }
+       } else {
+               if (orig != data)
+                       WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+       }
+       orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
+       data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+       if (orig != data)
+               WREG32_PCIE_PORT(PCIE_CNTL2, data);
+       if (!disable_l0s) {
+               data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+               if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
+                       data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
+                       if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
+                               orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+                               data &= ~LC_L0S_INACTIVITY_MASK;
+                               if (orig != data)
+                                       WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+                       }
+               }
+       }
  }
index 0000000000000000000000000000000000000000,8925185a004957b15f5ade83b8e9f7f31e6ddf63..b6286068e111613e3b325054dd434bc29f9c4252
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,785 +1,785 @@@
 -              rb_bufsz = drm_order(ring->ring_size / 4);
+ /*
+  * Copyright 2013 Advanced Micro Devices, Inc.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in
+  * all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  *
+  * Authors: Alex Deucher
+  */
+ #include <linux/firmware.h>
+ #include <drm/drmP.h>
+ #include "radeon.h"
+ #include "radeon_asic.h"
+ #include "cikd.h"
+ /* sdma */
+ #define CIK_SDMA_UCODE_SIZE 1050
+ #define CIK_SDMA_UCODE_VERSION 64
+ u32 cik_gpu_check_soft_reset(struct radeon_device *rdev);
+ /*
+  * sDMA - System DMA
+  * Starting with CIK, the GPU has new asynchronous
+  * DMA engines.  These engines are used for compute
+  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
+  * and each one supports 1 ring buffer used for gfx
+  * and 2 queues used for compute.
+  *
+  * The programming model is very similar to the CP
+  * (ring buffer, IBs, etc.), but sDMA has it's own
+  * packet format that is different from the PM4 format
+  * used by the CP. sDMA supports copying data, writing
+  * embedded data, solid fills, and a number of other
+  * things.  It also has support for tiling/detiling of
+  * buffers.
+  */
+ /**
+  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @ib: IB object to schedule
+  *
+  * Schedule an IB in the DMA ring (CIK).
+  */
+ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
+                             struct radeon_ib *ib)
+ {
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+       u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+       if (rdev->wb.enabled) {
+               u32 next_rptr = ring->wptr + 5;
+               while ((next_rptr & 7) != 4)
+                       next_rptr++;
+               next_rptr += 4;
+               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
+               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+               radeon_ring_write(ring, 1); /* number of DWs to follow */
+               radeon_ring_write(ring, next_rptr);
+       }
+       /* IB packet must end on a 8 DW boundary */
+       while ((ring->wptr & 7) != 4)
+               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
+       radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
+       radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
+       radeon_ring_write(ring, ib->length_dw);
+ }
+ /**
+  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
+  *
+  * @rdev: radeon_device pointer
+  * @fence: radeon fence object
+  *
+  * Add a DMA fence packet to the ring to write
+  * the fence seq number and DMA trap packet to generate
+  * an interrupt if needed (CIK).
+  */
+ void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
+                             struct radeon_fence *fence)
+ {
+       struct radeon_ring *ring = &rdev->ring[fence->ring];
+       u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
+                         SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
+       u32 ref_and_mask;
+       if (fence->ring == R600_RING_TYPE_DMA_INDEX)
+               ref_and_mask = SDMA0;
+       else
+               ref_and_mask = SDMA1;
+       /* write the fence */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
+       radeon_ring_write(ring, addr & 0xffffffff);
+       radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+       radeon_ring_write(ring, fence->seq);
+       /* generate an interrupt */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
+       /* flush HDP */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+       radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
+       radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
+       radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
+       radeon_ring_write(ring, ref_and_mask); /* MASK */
+       radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
+ }
+ /**
+  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  * @semaphore: radeon semaphore object
+  * @emit_wait: wait or signal semaphore
+  *
+  * Add a DMA semaphore packet to the ring wait on or signal
+  * other rings (CIK).
+  */
+ void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
+                                 struct radeon_ring *ring,
+                                 struct radeon_semaphore *semaphore,
+                                 bool emit_wait)
+ {
+       u64 addr = semaphore->gpu_addr;
+       u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
+       radeon_ring_write(ring, addr & 0xfffffff8);
+       radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ }
+ /**
+  * cik_sdma_gfx_stop - stop the gfx async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the gfx async dma ring buffers (CIK).
+  */
+ static void cik_sdma_gfx_stop(struct radeon_device *rdev)
+ {
+       u32 rb_cntl, reg_offset;
+       int i;
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+       for (i = 0; i < 2; i++) {
+               if (i == 0)
+                       reg_offset = SDMA0_REGISTER_OFFSET;
+               else
+                       reg_offset = SDMA1_REGISTER_OFFSET;
+               rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
+               rb_cntl &= ~SDMA_RB_ENABLE;
+               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
+               WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
+       }
+ }
+ /**
+  * cik_sdma_rlc_stop - stop the compute async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the compute async dma queues (CIK).
+  */
+ static void cik_sdma_rlc_stop(struct radeon_device *rdev)
+ {
+       /* XXX todo */
+ }
+ /**
+  * cik_sdma_enable - stop the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  * @enable: enable/disable the DMA MEs.
+  *
+  * Halt or unhalt the async dma engines (CIK).
+  */
+ void cik_sdma_enable(struct radeon_device *rdev, bool enable)
+ {
+       u32 me_cntl, reg_offset;
+       int i;
+       for (i = 0; i < 2; i++) {
+               if (i == 0)
+                       reg_offset = SDMA0_REGISTER_OFFSET;
+               else
+                       reg_offset = SDMA1_REGISTER_OFFSET;
+               me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
+               if (enable)
+                       me_cntl &= ~SDMA_HALT;
+               else
+                       me_cntl |= SDMA_HALT;
+               WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
+       }
+ }
+ /**
+  * cik_sdma_gfx_resume - setup and start the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Set up the gfx DMA ring buffers and enable them (CIK).
+  * Returns 0 for success, error for failure.
+  */
+ static int cik_sdma_gfx_resume(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring;
+       u32 rb_cntl, ib_cntl;
+       u32 rb_bufsz;
+       u32 reg_offset, wb_offset;
+       int i, r;
+       for (i = 0; i < 2; i++) {
+               if (i == 0) {
+                       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+                       reg_offset = SDMA0_REGISTER_OFFSET;
+                       wb_offset = R600_WB_DMA_RPTR_OFFSET;
+               } else {
+                       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+                       reg_offset = SDMA1_REGISTER_OFFSET;
+                       wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
+               }
+               WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
+               WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+               /* Set ring buffer size in dwords */
++              rb_bufsz = order_base_2(ring->ring_size / 4);
+               rb_cntl = rb_bufsz << 1;
+ #ifdef __BIG_ENDIAN
+               rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
+ #endif
+               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
+               /* Initialize the ring buffer's read and write pointers */
+               WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
+               WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
+               /* set the wb address whether it's enabled or not */
+               WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
+                      upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+               WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
+                      ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+               if (rdev->wb.enabled)
+                       rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
+               WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
+               WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
+               ring->wptr = 0;
+               WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
+               ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
+               /* enable DMA RB */
+               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
+               ib_cntl = SDMA_IB_ENABLE;
+ #ifdef __BIG_ENDIAN
+               ib_cntl |= SDMA_IB_SWAP_ENABLE;
+ #endif
+               /* enable DMA IBs */
+               WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
+               ring->ready = true;
+               r = radeon_ring_test(rdev, ring->idx, ring);
+               if (r) {
+                       ring->ready = false;
+                       return r;
+               }
+       }
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+       return 0;
+ }
+ /**
+  * cik_sdma_rlc_resume - setup and start the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Set up the compute DMA queues and enable them (CIK).
+  * Returns 0 for success, error for failure.
+  */
+ static int cik_sdma_rlc_resume(struct radeon_device *rdev)
+ {
+       /* XXX todo */
+       return 0;
+ }
+ /**
+  * cik_sdma_load_microcode - load the sDMA ME ucode
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Loads the sDMA0/1 ucode.
+  * Returns 0 for success, -EINVAL if the ucode is not available.
+  */
+ static int cik_sdma_load_microcode(struct radeon_device *rdev)
+ {
+       const __be32 *fw_data;
+       int i;
+       if (!rdev->sdma_fw)
+               return -EINVAL;
+       /* stop the gfx rings and rlc compute queues */
+       cik_sdma_gfx_stop(rdev);
+       cik_sdma_rlc_stop(rdev);
+       /* halt the MEs */
+       cik_sdma_enable(rdev, false);
+       /* sdma0 */
+       fw_data = (const __be32 *)rdev->sdma_fw->data;
+       WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
+       for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
+               WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
+       WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
+       /* sdma1 */
+       fw_data = (const __be32 *)rdev->sdma_fw->data;
+       WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
+       for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
+               WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
+       WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
+       WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
+       WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
+       return 0;
+ }
+ /**
+  * cik_sdma_resume - setup and start the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Set up the DMA engines and enable them (CIK).
+  * Returns 0 for success, error for failure.
+  */
+ int cik_sdma_resume(struct radeon_device *rdev)
+ {
+       int r;
+       /* Reset dma */
+       WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
+       RREG32(SRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(SRBM_SOFT_RESET, 0);
+       RREG32(SRBM_SOFT_RESET);
+       r = cik_sdma_load_microcode(rdev);
+       if (r)
+               return r;
+       /* unhalt the MEs */
+       cik_sdma_enable(rdev, true);
+       /* start the gfx rings and rlc compute queues */
+       r = cik_sdma_gfx_resume(rdev);
+       if (r)
+               return r;
+       r = cik_sdma_rlc_resume(rdev);
+       if (r)
+               return r;
+       return 0;
+ }
+ /**
+  * cik_sdma_fini - tear down the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the async dma engines and free the rings (CIK).
+  */
+ void cik_sdma_fini(struct radeon_device *rdev)
+ {
+       /* stop the gfx rings and rlc compute queues */
+       cik_sdma_gfx_stop(rdev);
+       cik_sdma_rlc_stop(rdev);
+       /* halt the MEs */
+       cik_sdma_enable(rdev, false);
+       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
+       /* XXX - compute dma queue tear down */
+ }
+ /**
+  * cik_copy_dma - copy pages using the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @src_offset: src GPU address
+  * @dst_offset: dst GPU address
+  * @num_gpu_pages: number of GPU pages to xfer
+  * @fence: radeon fence object
+  *
+  * Copy GPU paging using the DMA engine (CIK).
+  * Used by the radeon ttm implementation to move pages if
+  * registered as the asic copy callback.
+  */
+ int cik_copy_dma(struct radeon_device *rdev,
+                uint64_t src_offset, uint64_t dst_offset,
+                unsigned num_gpu_pages,
+                struct radeon_fence **fence)
+ {
+       struct radeon_semaphore *sem = NULL;
+       int ring_index = rdev->asic->copy.dma_ring_index;
+       struct radeon_ring *ring = &rdev->ring[ring_index];
+       u32 size_in_bytes, cur_size_in_bytes;
+       int i, num_loops;
+       int r = 0;
+       r = radeon_semaphore_create(rdev, &sem);
+       if (r) {
+               DRM_ERROR("radeon: moving bo (%d).\n", r);
+               return r;
+       }
+       size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
+       num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
+       r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
+       if (r) {
+               DRM_ERROR("radeon: moving bo (%d).\n", r);
+               radeon_semaphore_free(rdev, &sem, NULL);
+               return r;
+       }
+       if (radeon_fence_need_sync(*fence, ring->idx)) {
+               radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+                                           ring->idx);
+               radeon_fence_note_sync(*fence, ring->idx);
+       } else {
+               radeon_semaphore_free(rdev, &sem, NULL);
+       }
+       for (i = 0; i < num_loops; i++) {
+               cur_size_in_bytes = size_in_bytes;
+               if (cur_size_in_bytes > 0x1fffff)
+                       cur_size_in_bytes = 0x1fffff;
+               size_in_bytes -= cur_size_in_bytes;
+               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
+               radeon_ring_write(ring, cur_size_in_bytes);
+               radeon_ring_write(ring, 0); /* src/dst endian swap */
+               radeon_ring_write(ring, src_offset & 0xffffffff);
+               radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
+               radeon_ring_write(ring, dst_offset & 0xfffffffc);
+               radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
+               src_offset += cur_size_in_bytes;
+               dst_offset += cur_size_in_bytes;
+       }
+       r = radeon_fence_emit(rdev, fence, ring->idx);
+       if (r) {
+               radeon_ring_unlock_undo(rdev, ring);
+               return r;
+       }
+       radeon_ring_unlock_commit(rdev, ring);
+       radeon_semaphore_free(rdev, &sem, *fence);
+       return r;
+ }
+ /**
+  * cik_sdma_ring_test - simple async dma engine test
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Test the DMA engine by writing using it to write an
+  * value to memory. (CIK).
+  * Returns 0 for success, error for failure.
+  */
+ int cik_sdma_ring_test(struct radeon_device *rdev,
+                      struct radeon_ring *ring)
+ {
+       unsigned i;
+       int r;
+       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+       u32 tmp;
+       if (!ptr) {
+               DRM_ERROR("invalid vram scratch pointer\n");
+               return -EINVAL;
+       }
+       tmp = 0xCAFEDEAD;
+       writel(tmp, ptr);
+       r = radeon_ring_lock(rdev, ring, 4);
+       if (r) {
+               DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
+               return r;
+       }
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
+       radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
+       radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
+       radeon_ring_write(ring, 1); /* number of DWs to follow */
+       radeon_ring_write(ring, 0xDEADBEEF);
+       radeon_ring_unlock_commit(rdev, ring);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = readl(ptr);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+       } else {
+               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+                         ring->idx, tmp);
+               r = -EINVAL;
+       }
+       return r;
+ }
+ /**
+  * cik_sdma_ib_test - test an IB on the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Test a simple IB in the DMA ring (CIK).
+  * Returns 0 on success, error on failure.
+  */
+ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       struct radeon_ib ib;
+       unsigned i;
+       int r;
+       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+       u32 tmp = 0;
+       if (!ptr) {
+               DRM_ERROR("invalid vram scratch pointer\n");
+               return -EINVAL;
+       }
+       tmp = 0xCAFEDEAD;
+       writel(tmp, ptr);
+       r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
+       if (r) {
+               DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+               return r;
+       }
+       ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+       ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
+       ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
+       ib.ptr[3] = 1;
+       ib.ptr[4] = 0xDEADBEEF;
+       ib.length_dw = 5;
+       r = radeon_ib_schedule(rdev, &ib, NULL);
+       if (r) {
+               radeon_ib_free(rdev, &ib);
+               DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+               return r;
+       }
+       r = radeon_fence_wait(ib.fence, false);
+       if (r) {
+               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+               return r;
+       }
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = readl(ptr);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+       } else {
+               DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
+               r = -EINVAL;
+       }
+       radeon_ib_free(rdev, &ib);
+       return r;
+ }
+ /**
+  * cik_sdma_is_lockup - Check if the DMA engine is locked up
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Check if the async DMA engine is locked up (CIK).
+  * Returns true if the engine appears to be locked up, false if not.
+  */
+ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       u32 reset_mask = cik_gpu_check_soft_reset(rdev);
+       u32 mask;
+       if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+               mask = RADEON_RESET_DMA;
+       else
+               mask = RADEON_RESET_DMA1;
+       if (!(reset_mask & mask)) {
+               radeon_ring_lockup_update(ring);
+               return false;
+       }
+       /* force ring activities */
+       radeon_ring_force_activity(rdev, ring);
+       return radeon_ring_test_lockup(rdev, ring);
+ }
+ /**
+  * cik_sdma_vm_set_page - update the page tables using sDMA
+  *
+  * @rdev: radeon_device pointer
+  * @ib: indirect buffer to fill with commands
+  * @pe: addr of the page entry
+  * @addr: dst addr to write into pe
+  * @count: number of page entries to update
+  * @incr: increase next addr by incr bytes
+  * @flags: access flags
+  *
+  * Update the page tables using sDMA (CIK).
+  */
+ void cik_sdma_vm_set_page(struct radeon_device *rdev,
+                         struct radeon_ib *ib,
+                         uint64_t pe,
+                         uint64_t addr, unsigned count,
+                         uint32_t incr, uint32_t flags)
+ {
+       uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+       uint64_t value;
+       unsigned ndw;
+       if (flags & RADEON_VM_PAGE_SYSTEM) {
+               while (count) {
+                       ndw = count * 2;
+                       if (ndw > 0xFFFFE)
+                               ndw = 0xFFFFE;
+                       /* for non-physically contiguous pages (system) */
+                       ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+                       ib->ptr[ib->length_dw++] = pe;
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+                       ib->ptr[ib->length_dw++] = ndw;
+                       for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+                               if (flags & RADEON_VM_PAGE_SYSTEM) {
+                                       value = radeon_vm_map_gart(rdev, addr);
+                                       value &= 0xFFFFFFFFFFFFF000ULL;
+                               } else if (flags & RADEON_VM_PAGE_VALID) {
+                                       value = addr;
+                               } else {
+                                       value = 0;
+                               }
+                               addr += incr;
+                               value |= r600_flags;
+                               ib->ptr[ib->length_dw++] = value;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                       }
+               }
+       } else {
+               while (count) {
+                       ndw = count;
+                       if (ndw > 0x7FFFF)
+                               ndw = 0x7FFFF;
+                       if (flags & RADEON_VM_PAGE_VALID)
+                               value = addr;
+                       else
+                               value = 0;
+                       /* for physically contiguous pages (vram) */
+                       ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+                       ib->ptr[ib->length_dw++] = pe; /* dst addr */
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+                       ib->ptr[ib->length_dw++] = r600_flags; /* mask */
+                       ib->ptr[ib->length_dw++] = 0;
+                       ib->ptr[ib->length_dw++] = value; /* value */
+                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                       ib->ptr[ib->length_dw++] = incr; /* increment size */
+                       ib->ptr[ib->length_dw++] = 0;
+                       ib->ptr[ib->length_dw++] = ndw; /* number of entries */
+                       pe += ndw * 8;
+                       addr += ndw * incr;
+                       count -= ndw;
+               }
+       }
+       while (ib->length_dw & 0x7)
+               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
+ }
+ /**
+  * cik_dma_vm_flush - cik vm flush using sDMA
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Update the page table base and flush the VM TLB
+  * using sDMA (CIK).
+  */
+ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+ {
+       struct radeon_ring *ring = &rdev->ring[ridx];
+       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
+                         SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
+       u32 ref_and_mask;
+       if (vm == NULL)
+               return;
+       if (ridx == R600_RING_TYPE_DMA_INDEX)
+               ref_and_mask = SDMA0;
+       else
+               ref_and_mask = SDMA1;
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       if (vm->id < 8) {
+               radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+       } else {
+               radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+       }
+       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+       /* update SH_MEM_* regs */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
+       radeon_ring_write(ring, VMID(vm->id));
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SH_MEM_BASES >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
+       radeon_ring_write(ring, 1);
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
+       radeon_ring_write(ring, VMID(0));
+       /* flush HDP */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+       radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
+       radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
+       radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
+       radeon_ring_write(ring, ref_and_mask); /* MASK */
+       radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
+       /* flush TLB */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+       radeon_ring_write(ring, 1 << vm->id);
+ }
index b67c9ec7f690258a5df72d1807da2a21a4966e32,6398c1f76fb84b91cf9128442e6d619e5e3c23c9..555164e270a79347fb36b9cadab09d90754c4306
@@@ -47,7 -47,7 +47,7 @@@ static const u32 crtc_offsets[6] 
  
  #include "clearstate_evergreen.h"
  
- static u32 sumo_rlc_save_restore_register_list[] =
+ static const u32 sumo_rlc_save_restore_register_list[] =
  {
        0x98fc,
        0x9830,
        0x9150,
        0x802c,
  };
- static u32 sumo_rlc_save_restore_register_list_size = ARRAY_SIZE(sumo_rlc_save_restore_register_list);
  
  static void evergreen_gpu_init(struct radeon_device *rdev);
  void evergreen_fini(struct radeon_device *rdev);
@@@ -141,6 -140,12 +140,12 @@@ extern void cayman_cp_int_cntl_setup(st
                                     int ring, u32 cp_int_cntl);
  extern void cayman_vm_decode_fault(struct radeon_device *rdev,
                                   u32 status, u32 addr);
+ void cik_init_cp_pg_table(struct radeon_device *rdev);
+ extern u32 si_get_csb_size(struct radeon_device *rdev);
+ extern void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer);
+ extern u32 cik_get_csb_size(struct radeon_device *rdev);
+ extern void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer);
  
  static const u32 evergreen_golden_registers[] =
  {
@@@ -1807,7 -1812,8 +1812,8 @@@ static u32 evergreen_line_buffer_adjust
                                        struct drm_display_mode *mode,
                                        struct drm_display_mode *other_mode)
  {
-       u32 tmp;
+       u32 tmp, buffer_alloc, i;
+       u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
        /*
         * Line Buffer Setup
         * There are 3 line buffers, each one shared by 2 display controllers.
         * non-linked crtcs for maximum line buffer allocation.
         */
        if (radeon_crtc->base.enabled && mode) {
-               if (other_mode)
+               if (other_mode) {
                        tmp = 0; /* 1/2 */
-               else
+                       buffer_alloc = 1;
+               } else {
                        tmp = 2; /* whole */
-       } else
+                       buffer_alloc = 2;
+               }
+       } else {
                tmp = 0;
+               buffer_alloc = 0;
+       }
  
        /* second controller of the pair uses second half of the lb */
        if (radeon_crtc->crtc_id % 2)
                tmp += 4;
        WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset, tmp);
  
+       if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
+               WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+                      DMIF_BUFFERS_ALLOCATED(buffer_alloc));
+               for (i = 0; i < rdev->usec_timeout; i++) {
+                       if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+                           DMIF_BUFFERS_ALLOCATED_COMPLETED)
+                               break;
+                       udelay(1);
+               }
+       }
        if (radeon_crtc->base.enabled && mode) {
                switch (tmp) {
                case 0:
@@@ -2881,8 -2903,8 +2903,8 @@@ static int evergreen_cp_resume(struct r
        RREG32(GRBM_SOFT_RESET);
  
        /* Set ring buffer size */
 -      rb_bufsz = drm_order(ring->ring_size / 8);
 -      tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 +      rb_bufsz = order_base_2(ring->ring_size / 8);
 +      tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
  #ifdef __BIG_ENDIAN
        tmp |= BUF_SWAP_32BIT;
  #endif
@@@ -3613,7 -3635,7 +3635,7 @@@ bool evergreen_is_display_hung(struct r
        return true;
  }
  
static u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev)
+ u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev)
  {
        u32 reset_mask = 0;
        u32 tmp;
@@@ -3839,28 -3861,6 +3861,6 @@@ bool evergreen_gfx_is_lockup(struct rad
        return radeon_ring_test_lockup(rdev, ring);
  }
  
- /**
-  * evergreen_dma_is_lockup - Check if the DMA engine is locked up
-  *
-  * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
-  *
-  * Check if the async DMA engine is locked up.
-  * Returns true if the engine appears to be locked up, false if not.
-  */
- bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
- {
-       u32 reset_mask = evergreen_gpu_check_soft_reset(rdev);
-       if (!(reset_mask & RADEON_RESET_DMA)) {
-               radeon_ring_lockup_update(ring);
-               return false;
-       }
-       /* force ring activities */
-       radeon_ring_force_activity(rdev, ring);
-       return radeon_ring_test_lockup(rdev, ring);
- }
  /*
   * RLC
   */
@@@ -3894,147 -3894,231 +3894,231 @@@ void sumo_rlc_fini(struct radeon_devic
                radeon_bo_unref(&rdev->rlc.clear_state_obj);
                rdev->rlc.clear_state_obj = NULL;
        }
+       /* clear state block */
+       if (rdev->rlc.cp_table_obj) {
+               r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false);
+               if (unlikely(r != 0))
+                       dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+               radeon_bo_unpin(rdev->rlc.cp_table_obj);
+               radeon_bo_unreserve(rdev->rlc.cp_table_obj);
+               radeon_bo_unref(&rdev->rlc.cp_table_obj);
+               rdev->rlc.cp_table_obj = NULL;
+       }
  }
  
+ #define CP_ME_TABLE_SIZE    96
  int sumo_rlc_init(struct radeon_device *rdev)
  {
-       u32 *src_ptr;
+       const u32 *src_ptr;
        volatile u32 *dst_ptr;
        u32 dws, data, i, j, k, reg_num;
-       u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
+       u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index = 0;
        u64 reg_list_mc_addr;
-       struct cs_section_def *cs_data;
+       const struct cs_section_def *cs_data;
        int r;
  
        src_ptr = rdev->rlc.reg_list;
        dws = rdev->rlc.reg_list_size;
+       if (rdev->family >= CHIP_BONAIRE) {
+               dws += (5 * 16) + 48 + 48 + 64;
+       }
        cs_data = rdev->rlc.cs_data;
  
-       /* save restore block */
-       if (rdev->rlc.save_restore_obj == NULL) {
-               r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-                                    RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.save_restore_obj);
+       if (src_ptr) {
+               /* save restore block */
+               if (rdev->rlc.save_restore_obj == NULL) {
+                       r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
+                                            RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.save_restore_obj);
+                       if (r) {
+                               dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
+                               return r;
+                       }
+               }
+               r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
+               if (unlikely(r != 0)) {
+                       sumo_rlc_fini(rdev);
+                       return r;
+               }
+               r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
+                                 &rdev->rlc.save_restore_gpu_addr);
                if (r) {
-                       dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
+                       radeon_bo_unreserve(rdev->rlc.save_restore_obj);
+                       dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
+                       sumo_rlc_fini(rdev);
                        return r;
                }
-       }
  
-       r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
-       if (unlikely(r != 0)) {
-               sumo_rlc_fini(rdev);
-               return r;
-       }
-       r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
-                         &rdev->rlc.save_restore_gpu_addr);
-       if (r) {
+               r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
+               if (r) {
+                       dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
+                       sumo_rlc_fini(rdev);
+                       return r;
+               }
+               /* write the sr buffer */
+               dst_ptr = rdev->rlc.sr_ptr;
+               if (rdev->family >= CHIP_TAHITI) {
+                       /* SI */
+                       for (i = 0; i < rdev->rlc.reg_list_size; i++)
+                               dst_ptr[i] = src_ptr[i];
+               } else {
+                       /* ON/LN/TN */
+                       /* format:
+                        * dw0: (reg2 << 16) | reg1
+                        * dw1: reg1 save space
+                        * dw2: reg2 save space
+                        */
+                       for (i = 0; i < dws; i++) {
+                               data = src_ptr[i] >> 2;
+                               i++;
+                               if (i < dws)
+                                       data |= (src_ptr[i] >> 2) << 16;
+                               j = (((i - 1) * 3) / 2);
+                               dst_ptr[j] = data;
+                       }
+                       j = ((i * 3) / 2);
+                       dst_ptr[j] = RLC_SAVE_RESTORE_LIST_END_MARKER;
+               }
+               radeon_bo_kunmap(rdev->rlc.save_restore_obj);
                radeon_bo_unreserve(rdev->rlc.save_restore_obj);
-               dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
-               sumo_rlc_fini(rdev);
-               return r;
        }
-       r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
-       if (r) {
-               dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
-               sumo_rlc_fini(rdev);
-               return r;
-       }
-       /* write the sr buffer */
-       dst_ptr = rdev->rlc.sr_ptr;
-       /* format:
-        * dw0: (reg2 << 16) | reg1
-        * dw1: reg1 save space
-        * dw2: reg2 save space
-        */
-       for (i = 0; i < dws; i++) {
-               data = src_ptr[i] >> 2;
-               i++;
-               if (i < dws)
-                       data |= (src_ptr[i] >> 2) << 16;
-               j = (((i - 1) * 3) / 2);
-               dst_ptr[j] = data;
-       }
-       j = ((i * 3) / 2);
-       dst_ptr[j] = RLC_SAVE_RESTORE_LIST_END_MARKER;
-       radeon_bo_kunmap(rdev->rlc.save_restore_obj);
-       radeon_bo_unreserve(rdev->rlc.save_restore_obj);
  
-       /* clear state block */
-       reg_list_num = 0;
-       dws = 0;
-       for (i = 0; cs_data[i].section != NULL; i++) {
-               for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
-                       reg_list_num++;
-                       dws += cs_data[i].section[j].reg_count;
+       if (cs_data) {
+               /* clear state block */
+               if (rdev->family >= CHIP_BONAIRE) {
+                       rdev->rlc.clear_state_size = dws = cik_get_csb_size(rdev);
+               } else if (rdev->family >= CHIP_TAHITI) {
+                       rdev->rlc.clear_state_size = si_get_csb_size(rdev);
+                       dws = rdev->rlc.clear_state_size + (256 / 4);
+               } else {
+                       reg_list_num = 0;
+                       dws = 0;
+                       for (i = 0; cs_data[i].section != NULL; i++) {
+                               for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
+                                       reg_list_num++;
+                                       dws += cs_data[i].section[j].reg_count;
+                               }
+                       }
+                       reg_list_blk_index = (3 * reg_list_num + 2);
+                       dws += reg_list_blk_index;
+                       rdev->rlc.clear_state_size = dws;
                }
-       }
-       reg_list_blk_index = (3 * reg_list_num + 2);
-       dws += reg_list_blk_index;
  
-       if (rdev->rlc.clear_state_obj == NULL) {
-               r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-                                    RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
+               if (rdev->rlc.clear_state_obj == NULL) {
+                       r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
+                                            RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
+                       if (r) {
+                               dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
+                               sumo_rlc_fini(rdev);
+                               return r;
+                       }
+               }
+               r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
+               if (unlikely(r != 0)) {
+                       sumo_rlc_fini(rdev);
+                       return r;
+               }
+               r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
+                                 &rdev->rlc.clear_state_gpu_addr);
                if (r) {
-                       dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
+                       radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+                       dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
                        sumo_rlc_fini(rdev);
                        return r;
                }
-       }
-       r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
-       if (unlikely(r != 0)) {
-               sumo_rlc_fini(rdev);
-               return r;
-       }
-       r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
-                         &rdev->rlc.clear_state_gpu_addr);
-       if (r) {
  
-               radeon_bo_unreserve(rdev->rlc.clear_state_obj);
-               dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
-               sumo_rlc_fini(rdev);
-               return r;
-       }
-       r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
-       if (r) {
-               dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
-               sumo_rlc_fini(rdev);
-               return r;
-       }
-       /* set up the cs buffer */
-       dst_ptr = rdev->rlc.cs_ptr;
-       reg_list_hdr_blk_index = 0;
-       reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
-       data = upper_32_bits(reg_list_mc_addr);
-       dst_ptr[reg_list_hdr_blk_index] = data;
-       reg_list_hdr_blk_index++;
-       for (i = 0; cs_data[i].section != NULL; i++) {
-               for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
-                       reg_num = cs_data[i].section[j].reg_count;
-                       data = reg_list_mc_addr & 0xffffffff;
-                       dst_ptr[reg_list_hdr_blk_index] = data;
-                       reg_list_hdr_blk_index++;
-                       data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
-                       dst_ptr[reg_list_hdr_blk_index] = data;
-                       reg_list_hdr_blk_index++;
-                       data = 0x08000000 | (reg_num * 4);
+               r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
+               if (r) {
+                       dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
+                       sumo_rlc_fini(rdev);
+                       return r;
+               }
+               /* set up the cs buffer */
+               dst_ptr = rdev->rlc.cs_ptr;
+               if (rdev->family >= CHIP_BONAIRE) {
+                       cik_get_csb_buffer(rdev, dst_ptr);
+               } else if (rdev->family >= CHIP_TAHITI) {
+                       reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + 256;
+                       dst_ptr[0] = upper_32_bits(reg_list_mc_addr);
+                       dst_ptr[1] = lower_32_bits(reg_list_mc_addr);
+                       dst_ptr[2] = rdev->rlc.clear_state_size;
+                       si_get_csb_buffer(rdev, &dst_ptr[(256/4)]);
+               } else {
+                       reg_list_hdr_blk_index = 0;
+                       reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
+                       data = upper_32_bits(reg_list_mc_addr);
                        dst_ptr[reg_list_hdr_blk_index] = data;
                        reg_list_hdr_blk_index++;
-                       for (k = 0; k < reg_num; k++) {
-                               data = cs_data[i].section[j].extent[k];
-                               dst_ptr[reg_list_blk_index + k] = data;
+                       for (i = 0; cs_data[i].section != NULL; i++) {
+                               for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
+                                       reg_num = cs_data[i].section[j].reg_count;
+                                       data = reg_list_mc_addr & 0xffffffff;
+                                       dst_ptr[reg_list_hdr_blk_index] = data;
+                                       reg_list_hdr_blk_index++;
+                                       data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
+                                       dst_ptr[reg_list_hdr_blk_index] = data;
+                                       reg_list_hdr_blk_index++;
+                                       data = 0x08000000 | (reg_num * 4);
+                                       dst_ptr[reg_list_hdr_blk_index] = data;
+                                       reg_list_hdr_blk_index++;
+                                       for (k = 0; k < reg_num; k++) {
+                                               data = cs_data[i].section[j].extent[k];
+                                               dst_ptr[reg_list_blk_index + k] = data;
+                                       }
+                                       reg_list_mc_addr += reg_num * 4;
+                                       reg_list_blk_index += reg_num;
+                               }
                        }
-                       reg_list_mc_addr += reg_num * 4;
-                       reg_list_blk_index += reg_num;
+                       dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
                }
+               radeon_bo_kunmap(rdev->rlc.clear_state_obj);
+               radeon_bo_unreserve(rdev->rlc.clear_state_obj);
        }
-       dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
  
-       radeon_bo_kunmap(rdev->rlc.clear_state_obj);
-       radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+       if (rdev->rlc.cp_table_size) {
+               if (rdev->rlc.cp_table_obj == NULL) {
+                       r = radeon_bo_create(rdev, rdev->rlc.cp_table_size, PAGE_SIZE, true,
+                                            RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.cp_table_obj);
+                       if (r) {
+                               dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r);
+                               sumo_rlc_fini(rdev);
+                               return r;
+                       }
+               }
+               r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false);
+               if (unlikely(r != 0)) {
+                       dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+                       sumo_rlc_fini(rdev);
+                       return r;
+               }
+               r = radeon_bo_pin(rdev->rlc.cp_table_obj, RADEON_GEM_DOMAIN_VRAM,
+                                 &rdev->rlc.cp_table_gpu_addr);
+               if (r) {
+                       radeon_bo_unreserve(rdev->rlc.cp_table_obj);
+                       dev_warn(rdev->dev, "(%d) pin RLC cp_table bo failed\n", r);
+                       sumo_rlc_fini(rdev);
+                       return r;
+               }
+               r = radeon_bo_kmap(rdev->rlc.cp_table_obj, (void **)&rdev->rlc.cp_table_ptr);
+               if (r) {
+                       dev_warn(rdev->dev, "(%d) map RLC cp table bo failed\n", r);
+                       sumo_rlc_fini(rdev);
+                       return r;
+               }
+               cik_init_cp_pg_table(rdev);
+               radeon_bo_kunmap(rdev->rlc.cp_table_obj);
+               radeon_bo_unreserve(rdev->rlc.cp_table_obj);
+       }
  
        return 0;
  }
@@@ -4959,143 -5043,6 +5043,6 @@@ restart_ih
        return IRQ_HANDLED;
  }
  
- /**
-  * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
-  *
-  * @rdev: radeon_device pointer
-  * @fence: radeon fence object
-  *
-  * Add a DMA fence packet to the ring to write
-  * the fence seq number and DMA trap packet to generate
-  * an interrupt if needed (evergreen-SI).
-  */
- void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
-                                  struct radeon_fence *fence)
- {
-       struct radeon_ring *ring = &rdev->ring[fence->ring];
-       u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
-       /* write the fence */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0));
-       radeon_ring_write(ring, addr & 0xfffffffc);
-       radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
-       radeon_ring_write(ring, fence->seq);
-       /* generate an interrupt */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0));
-       /* flush HDP */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0));
-       radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
-       radeon_ring_write(ring, 1);
- }
- /**
-  * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
-  *
-  * @rdev: radeon_device pointer
-  * @ib: IB object to schedule
-  *
-  * Schedule an IB in the DMA ring (evergreen).
-  */
- void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
-                                  struct radeon_ib *ib)
- {
-       struct radeon_ring *ring = &rdev->ring[ib->ring];
-       if (rdev->wb.enabled) {
-               u32 next_rptr = ring->wptr + 4;
-               while ((next_rptr & 7) != 5)
-                       next_rptr++;
-               next_rptr += 3;
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1));
-               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
-               radeon_ring_write(ring, next_rptr);
-       }
-       /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
-        * Pad as necessary with NOPs.
-        */
-       while ((ring->wptr & 7) != 5)
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0));
-       radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
-       radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
- }
- /**
-  * evergreen_copy_dma - copy pages using the DMA engine
-  *
-  * @rdev: radeon_device pointer
-  * @src_offset: src GPU address
-  * @dst_offset: dst GPU address
-  * @num_gpu_pages: number of GPU pages to xfer
-  * @fence: radeon fence object
-  *
-  * Copy GPU paging using the DMA engine (evergreen-cayman).
-  * Used by the radeon ttm implementation to move pages if
-  * registered as the asic copy callback.
-  */
- int evergreen_copy_dma(struct radeon_device *rdev,
-                      uint64_t src_offset, uint64_t dst_offset,
-                      unsigned num_gpu_pages,
-                      struct radeon_fence **fence)
- {
-       struct radeon_semaphore *sem = NULL;
-       int ring_index = rdev->asic->copy.dma_ring_index;
-       struct radeon_ring *ring = &rdev->ring[ring_index];
-       u32 size_in_dw, cur_size_in_dw;
-       int i, num_loops;
-       int r = 0;
-       r = radeon_semaphore_create(rdev, &sem);
-       if (r) {
-               DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
-       }
-       size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
-       num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
-       r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
-       if (r) {
-               DRM_ERROR("radeon: moving bo (%d).\n", r);
-               radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
-       }
-       if (radeon_fence_need_sync(*fence, ring->idx)) {
-               radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
-                                           ring->idx);
-               radeon_fence_note_sync(*fence, ring->idx);
-       } else {
-               radeon_semaphore_free(rdev, &sem, NULL);
-       }
-       for (i = 0; i < num_loops; i++) {
-               cur_size_in_dw = size_in_dw;
-               if (cur_size_in_dw > 0xFFFFF)
-                       cur_size_in_dw = 0xFFFFF;
-               size_in_dw -= cur_size_in_dw;
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw));
-               radeon_ring_write(ring, dst_offset & 0xfffffffc);
-               radeon_ring_write(ring, src_offset & 0xfffffffc);
-               radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
-               radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
-               src_offset += cur_size_in_dw * 4;
-               dst_offset += cur_size_in_dw * 4;
-       }
-       r = radeon_fence_emit(rdev, fence, ring->idx);
-       if (r) {
-               radeon_ring_unlock_undo(rdev, ring);
-               return r;
-       }
-       radeon_ring_unlock_commit(rdev, ring);
-       radeon_semaphore_free(rdev, &sem, *fence);
-       return r;
- }
  static int evergreen_startup(struct radeon_device *rdev)
  {
        struct radeon_ring *ring;
        /* enable aspm */
        evergreen_program_aspm(rdev);
  
+       /* scratch needs to be initialized before MC */
+       r = r600_vram_scratch_init(rdev);
+       if (r)
+               return r;
+       evergreen_mc_program(rdev);
        if (ASIC_IS_DCE5(rdev)) {
                if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
                        r = ni_init_microcode(rdev);
                }
        }
  
-       r = r600_vram_scratch_init(rdev);
-       if (r)
-               return r;
-       evergreen_mc_program(rdev);
        if (rdev->flags & RADEON_IS_AGP) {
                evergreen_agp_enable(rdev);
        } else {
        }
        evergreen_gpu_init(rdev);
  
-       r = evergreen_blit_init(rdev);
-       if (r) {
-               r600_blit_fini(rdev);
-               rdev->asic->copy.copy = NULL;
-               dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
-       }
        /* allocate rlc buffers */
        if (rdev->flags & RADEON_IS_IGP) {
                rdev->rlc.reg_list = sumo_rlc_save_restore_register_list;
-               rdev->rlc.reg_list_size = sumo_rlc_save_restore_register_list_size;
+               rdev->rlc.reg_list_size =
+                       (u32)ARRAY_SIZE(sumo_rlc_save_restore_register_list);
                rdev->rlc.cs_data = evergreen_cs_data;
                r = sumo_rlc_init(rdev);
                if (r) {
                return r;
        }
  
-       r = rv770_uvd_resume(rdev);
+       r = uvd_v2_2_resume(rdev);
        if (!r) {
                r = radeon_fence_driver_start_ring(rdev,
                                                   R600_RING_TYPE_UVD_INDEX);
        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
                             R600_CP_RB_RPTR, R600_CP_RB_WPTR,
-                            0, 0xfffff, RADEON_CP_PACKET2);
+                            RADEON_CP_PACKET2);
        if (r)
                return r;
  
        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
                             DMA_RB_RPTR, DMA_RB_WPTR,
-                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
+                            DMA_PACKET(DMA_PACKET_NOP, 0, 0));
        if (r)
                return r;
  
  
        ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
        if (ring->ring_size) {
-               r = radeon_ring_init(rdev, ring, ring->ring_size,
-                                    R600_WB_UVD_RPTR_OFFSET,
+               r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
                                     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
-                                    0, 0xfffff, RADEON_CP_PACKET2);
+                                    RADEON_CP_PACKET2);
                if (!r)
-                       r = r600_uvd_init(rdev);
+                       r = uvd_v1_0_init(rdev);
  
                if (r)
                        DRM_ERROR("radeon: error initializing UVD (%d).\n", r);
@@@ -5291,10 -5233,10 +5233,10 @@@ int evergreen_resume(struct radeon_devi
  int evergreen_suspend(struct radeon_device *rdev)
  {
        r600_audio_fini(rdev);
+       uvd_v1_0_fini(rdev);
        radeon_uvd_suspend(rdev);
        r700_cp_stop(rdev);
        r600_dma_stop(rdev);
-       r600_uvd_rbc_stop(rdev);
        evergreen_irq_suspend(rdev);
        radeon_wb_disable(rdev);
        evergreen_pcie_gart_disable(rdev);
@@@ -5419,7 -5361,6 +5361,6 @@@ int evergreen_init(struct radeon_devic
  void evergreen_fini(struct radeon_device *rdev)
  {
        r600_audio_fini(rdev);
-       r600_blit_fini(rdev);
        r700_cp_fini(rdev);
        r600_dma_fini(rdev);
        r600_irq_fini(rdev);
        radeon_ib_pool_fini(rdev);
        radeon_irq_kms_fini(rdev);
        evergreen_pcie_gart_fini(rdev);
+       uvd_v1_0_fini(rdev);
        radeon_uvd_fini(rdev);
        r600_vram_scratch_fini(rdev);
        radeon_gem_fini(rdev);
index 5b6e477656569e371c09e318d6e93f11caad6487,d60049efd7ac1fd981b626e37a407a40349e3b76..93c1f9ef5da9b5ee7c3474bfaaae5bbe7497cf91
@@@ -35,7 -35,7 +35,7 @@@
  #include "radeon_ucode.h"
  #include "clearstate_cayman.h"
  
- static u32 tn_rlc_save_restore_register_list[] =
+ static const u32 tn_rlc_save_restore_register_list[] =
  {
        0x98fc,
        0x98f0,
        0x9830,
        0x802c,
  };
- static u32 tn_rlc_save_restore_register_list_size = ARRAY_SIZE(tn_rlc_save_restore_register_list);
  
  extern bool evergreen_is_display_hung(struct radeon_device *rdev);
  extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
@@@ -175,6 -174,11 +174,11 @@@ extern void evergreen_pcie_gen2_enable(
  extern void evergreen_program_aspm(struct radeon_device *rdev);
  extern void sumo_rlc_fini(struct radeon_device *rdev);
  extern int sumo_rlc_init(struct radeon_device *rdev);
+ extern void cayman_dma_vm_set_page(struct radeon_device *rdev,
+                                  struct radeon_ib *ib,
+                                  uint64_t pe,
+                                  uint64_t addr, unsigned count,
+                                  uint32_t incr, uint32_t flags);
  
  /* Firmware Names */
  MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
@@@ -794,9 -798,13 +798,13 @@@ int ni_init_microcode(struct radeon_dev
        if ((rdev->family >= CHIP_BARTS) && (rdev->family <= CHIP_CAYMAN)) {
                snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
                err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
-               if (err)
-                       goto out;
-               if (rdev->smc_fw->size != smc_req_size) {
+               if (err) {
+                       printk(KERN_ERR
+                              "smc: error loading firmware \"%s\"\n",
+                              fw_name);
+                       release_firmware(rdev->smc_fw);
+                       rdev->smc_fw = NULL;
+               } else if (rdev->smc_fw->size != smc_req_size) {
                        printk(KERN_ERR
                               "ni_mc: Bogus length %zu in firmware \"%s\"\n",
                               rdev->mc_fw->size, fw_name);
@@@ -1370,23 -1378,6 +1378,6 @@@ void cayman_ring_ib_execute(struct rade
        radeon_ring_write(ring, 10); /* poll interval */
  }
  
- void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
-                              struct radeon_ring *ring,
-                              struct radeon_semaphore *semaphore,
-                              bool emit_wait)
- {
-       uint64_t addr = semaphore->gpu_addr;
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
-       radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
-       radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
-       radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
- }
  static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
  {
        if (enable)
@@@ -1560,8 -1551,8 +1551,8 @@@ static int cayman_cp_resume(struct rade
  
                /* Set ring buffer size */
                ring = &rdev->ring[ridx[i]];
 -              rb_cntl = drm_order(ring->ring_size / 8);
 -              rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
 +              rb_cntl = order_base_2(ring->ring_size / 8);
 +              rb_cntl |= order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8;
  #ifdef __BIG_ENDIAN
                rb_cntl |= BUF_SWAP_32BIT;
  #endif
        return 0;
  }
  
- /*
-  * DMA
-  * Starting with R600, the GPU has an asynchronous
-  * DMA engine.  The programming model is very similar
-  * to the 3D engine (ring buffer, IBs, etc.), but the
-  * DMA controller has it's own packet format that is
-  * different form the PM4 format used by the 3D engine.
-  * It supports copying data, writing embedded data,
-  * solid fills, and a number of other things.  It also
-  * has support for tiling/detiling of buffers.
-  * Cayman and newer support two asynchronous DMA engines.
-  */
- /**
-  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
-  *
-  * @rdev: radeon_device pointer
-  * @ib: IB object to schedule
-  *
-  * Schedule an IB in the DMA ring (cayman-SI).
-  */
- void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
-                               struct radeon_ib *ib)
- {
-       struct radeon_ring *ring = &rdev->ring[ib->ring];
-       if (rdev->wb.enabled) {
-               u32 next_rptr = ring->wptr + 4;
-               while ((next_rptr & 7) != 5)
-                       next_rptr++;
-               next_rptr += 3;
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
-               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
-               radeon_ring_write(ring, next_rptr);
-       }
-       /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
-        * Pad as necessary with NOPs.
-        */
-       while ((ring->wptr & 7) != 5)
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-       radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
-       radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
-       radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
- }
- /**
-  * cayman_dma_stop - stop the async dma engines
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Stop the async dma engines (cayman-SI).
-  */
- void cayman_dma_stop(struct radeon_device *rdev)
- {
-       u32 rb_cntl;
-       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-       /* dma0 */
-       rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
-       rb_cntl &= ~DMA_RB_ENABLE;
-       WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
-       /* dma1 */
-       rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
-       rb_cntl &= ~DMA_RB_ENABLE;
-       WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
-       rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
-       rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
- }
- /**
-  * cayman_dma_resume - setup and start the async dma engines
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Set up the DMA ring buffers and enable them. (cayman-SI).
-  * Returns 0 for success, error for failure.
-  */
- int cayman_dma_resume(struct radeon_device *rdev)
- {
-       struct radeon_ring *ring;
-       u32 rb_cntl, dma_cntl, ib_cntl;
-       u32 rb_bufsz;
-       u32 reg_offset, wb_offset;
-       int i, r;
-       /* Reset dma */
-       WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
-       RREG32(SRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(SRBM_SOFT_RESET, 0);
-       for (i = 0; i < 2; i++) {
-               if (i == 0) {
-                       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
-                       reg_offset = DMA0_REGISTER_OFFSET;
-                       wb_offset = R600_WB_DMA_RPTR_OFFSET;
-               } else {
-                       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
-                       reg_offset = DMA1_REGISTER_OFFSET;
-                       wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
-               }
-               WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
-               WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
-               /* Set ring buffer size in dwords */
-               rb_bufsz = order_base_2(ring->ring_size / 4);
-               rb_cntl = rb_bufsz << 1;
- #ifdef __BIG_ENDIAN
-               rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
- #endif
-               WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
-               /* Initialize the ring buffer's read and write pointers */
-               WREG32(DMA_RB_RPTR + reg_offset, 0);
-               WREG32(DMA_RB_WPTR + reg_offset, 0);
-               /* set the wb address whether it's enabled or not */
-               WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
-                      upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
-               WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
-                      ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
-               if (rdev->wb.enabled)
-                       rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
-               WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
-               /* enable DMA IBs */
-               ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
- #ifdef __BIG_ENDIAN
-               ib_cntl |= DMA_IB_SWAP_ENABLE;
- #endif
-               WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
-               dma_cntl = RREG32(DMA_CNTL + reg_offset);
-               dma_cntl &= ~CTXEMPTY_INT_ENABLE;
-               WREG32(DMA_CNTL + reg_offset, dma_cntl);
-               ring->wptr = 0;
-               WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
-               ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
-               WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
-               ring->ready = true;
-               r = radeon_ring_test(rdev, ring->idx, ring);
-               if (r) {
-                       ring->ready = false;
-                       return r;
-               }
-       }
-       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
-       return 0;
- }
- /**
-  * cayman_dma_fini - tear down the async dma engines
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Stop the async dma engines and free the rings (cayman-SI).
-  */
- void cayman_dma_fini(struct radeon_device *rdev)
- {
-       cayman_dma_stop(rdev);
-       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
-       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
- }
- static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
+ u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
  {
        u32 reset_mask = 0;
        u32 tmp;
@@@ -2041,34 -1853,6 +1853,6 @@@ bool cayman_gfx_is_lockup(struct radeon
        return radeon_ring_test_lockup(rdev, ring);
  }
  
- /**
-  * cayman_dma_is_lockup - Check if the DMA engine is locked up
-  *
-  * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
-  *
-  * Check if the async DMA engine is locked up.
-  * Returns true if the engine appears to be locked up, false if not.
-  */
- bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
- {
-       u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
-       u32 mask;
-       if (ring->idx == R600_RING_TYPE_DMA_INDEX)
-               mask = RADEON_RESET_DMA;
-       else
-               mask = RADEON_RESET_DMA1;
-       if (!(reset_mask & mask)) {
-               radeon_ring_lockup_update(ring);
-               return false;
-       }
-       /* force ring activities */
-       radeon_ring_force_activity(rdev, ring);
-       return radeon_ring_test_lockup(rdev, ring);
- }
  static int cayman_startup(struct radeon_device *rdev)
  {
        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        /* enable aspm */
        evergreen_program_aspm(rdev);
  
+       /* scratch needs to be initialized before MC */
+       r = r600_vram_scratch_init(rdev);
+       if (r)
+               return r;
+       evergreen_mc_program(rdev);
        if (rdev->flags & RADEON_IS_IGP) {
                if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
                        r = ni_init_microcode(rdev);
                }
        }
  
-       r = r600_vram_scratch_init(rdev);
-       if (r)
-               return r;
-       evergreen_mc_program(rdev);
        r = cayman_pcie_gart_enable(rdev);
        if (r)
                return r;
        cayman_gpu_init(rdev);
  
-       r = evergreen_blit_init(rdev);
-       if (r) {
-               r600_blit_fini(rdev);
-               rdev->asic->copy.copy = NULL;
-               dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
-       }
        /* allocate rlc buffers */
        if (rdev->flags & RADEON_IS_IGP) {
                rdev->rlc.reg_list = tn_rlc_save_restore_register_list;
-               rdev->rlc.reg_list_size = tn_rlc_save_restore_register_list_size;
+               rdev->rlc.reg_list_size =
+                       (u32)ARRAY_SIZE(tn_rlc_save_restore_register_list);
                rdev->rlc.cs_data = cayman_cs_data;
                r = sumo_rlc_init(rdev);
                if (r) {
                return r;
        }
  
-       r = rv770_uvd_resume(rdev);
+       r = uvd_v2_2_resume(rdev);
        if (!r) {
                r = radeon_fence_driver_start_ring(rdev,
                                                   R600_RING_TYPE_UVD_INDEX);
  
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
                             CP_RB0_RPTR, CP_RB0_WPTR,
-                            0, 0xfffff, RADEON_CP_PACKET2);
+                            RADEON_CP_PACKET2);
        if (r)
                return r;
  
        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
                             DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
                             DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
-                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+                            DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
        if (r)
                return r;
  
        r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
                             DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
                             DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
-                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+                            DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
        if (r)
                return r;
  
  
        ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
        if (ring->ring_size) {
-               r = radeon_ring_init(rdev, ring, ring->ring_size,
-                                    R600_WB_UVD_RPTR_OFFSET,
+               r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
                                     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
-                                    0, 0xfffff, RADEON_CP_PACKET2);
+                                    RADEON_CP_PACKET2);
                if (!r)
-                       r = r600_uvd_init(rdev);
+                       r = uvd_v1_0_init(rdev);
                if (r)
                        DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
        }
                return r;
        }
  
-       r = r600_audio_init(rdev);
-       if (r)
-               return r;
+       if (ASIC_IS_DCE6(rdev)) {
+               r = dce6_audio_init(rdev);
+               if (r)
+                       return r;
+       } else {
+               r = r600_audio_init(rdev);
+               if (r)
+                       return r;
+       }
  
        return 0;
  }
@@@ -2282,11 -2067,14 +2067,14 @@@ int cayman_resume(struct radeon_device 
  
  int cayman_suspend(struct radeon_device *rdev)
  {
-       r600_audio_fini(rdev);
+       if (ASIC_IS_DCE6(rdev))
+               dce6_audio_fini(rdev);
+       else
+               r600_audio_fini(rdev);
        radeon_vm_manager_fini(rdev);
        cayman_cp_enable(rdev, false);
        cayman_dma_stop(rdev);
-       r600_uvd_rbc_stop(rdev);
+       uvd_v1_0_fini(rdev);
        radeon_uvd_suspend(rdev);
        evergreen_irq_suspend(rdev);
        radeon_wb_disable(rdev);
@@@ -2408,7 -2196,6 +2196,6 @@@ int cayman_init(struct radeon_device *r
  
  void cayman_fini(struct radeon_device *rdev)
  {
-       r600_blit_fini(rdev);
        cayman_cp_fini(rdev);
        cayman_dma_fini(rdev);
        r600_irq_fini(rdev);
        radeon_vm_manager_fini(rdev);
        radeon_ib_pool_fini(rdev);
        radeon_irq_kms_fini(rdev);
+       uvd_v1_0_fini(rdev);
        radeon_uvd_fini(rdev);
        cayman_pcie_gart_fini(rdev);
        r600_vram_scratch_fini(rdev);
@@@ -2678,61 -2466,7 +2466,7 @@@ void cayman_vm_set_page(struct radeon_d
                        }
                }
        } else {
-               if ((flags & RADEON_VM_PAGE_SYSTEM) ||
-                   (count == 1)) {
-                       while (count) {
-                               ndw = count * 2;
-                               if (ndw > 0xFFFFE)
-                                       ndw = 0xFFFFE;
-                               /* for non-physically contiguous pages (system) */
-                               ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
-                               ib->ptr[ib->length_dw++] = pe;
-                               ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-                               for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-                                       if (flags & RADEON_VM_PAGE_SYSTEM) {
-                                               value = radeon_vm_map_gart(rdev, addr);
-                                               value &= 0xFFFFFFFFFFFFF000ULL;
-                                       } else if (flags & RADEON_VM_PAGE_VALID) {
-                                               value = addr;
-                                       } else {
-                                               value = 0;
-                                       }
-                                       addr += incr;
-                                       value |= r600_flags;
-                                       ib->ptr[ib->length_dw++] = value;
-                                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
-                               }
-                       }
-                       while (ib->length_dw & 0x7)
-                               ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
-               } else {
-                       while (count) {
-                               ndw = count * 2;
-                               if (ndw > 0xFFFFE)
-                                       ndw = 0xFFFFE;
-                               if (flags & RADEON_VM_PAGE_VALID)
-                                       value = addr;
-                               else
-                                       value = 0;
-                               /* for physically contiguous pages (vram) */
-                               ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
-                               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-                               ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-                               ib->ptr[ib->length_dw++] = r600_flags; /* mask */
-                               ib->ptr[ib->length_dw++] = 0;
-                               ib->ptr[ib->length_dw++] = value; /* value */
-                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-                               ib->ptr[ib->length_dw++] = incr; /* increment size */
-                               ib->ptr[ib->length_dw++] = 0;
-                               pe += ndw * 4;
-                               addr += (ndw / 2) * incr;
-                               count -= ndw / 2;
-                       }
-               }
-               while (ib->length_dw & 0x7)
-                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
+               cayman_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
        }
  }
  
@@@ -2766,26 -2500,3 +2500,3 @@@ void cayman_vm_flush(struct radeon_devi
        radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
        radeon_ring_write(ring, 0x0);
  }
- void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
- {
-       struct radeon_ring *ring = &rdev->ring[ridx];
-       if (vm == NULL)
-               return;
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
-       radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
-       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
-       /* flush hdp cache */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
-       radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
-       radeon_ring_write(ring, 1);
-       /* bits 0-7 are the VM contexts0-7 */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
-       radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
-       radeon_ring_write(ring, 1 << vm->id);
- }
index 0000000000000000000000000000000000000000,0f3c0baea4a6dd5e1f7782ba84a591856095cea2..dd6e9688fbefe6e948226475905d54f1e9499613
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,338 +1,338 @@@
 -              rb_bufsz = drm_order(ring->ring_size / 4);
+ /*
+  * Copyright 2010 Advanced Micro Devices, Inc.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in
+  * all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  *
+  * Authors: Alex Deucher
+  */
+ #include <drm/drmP.h>
+ #include "radeon.h"
+ #include "radeon_asic.h"
+ #include "nid.h"
+ u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev);
+ /*
+  * DMA
+  * Starting with R600, the GPU has an asynchronous
+  * DMA engine.  The programming model is very similar
+  * to the 3D engine (ring buffer, IBs, etc.), but the
+  * DMA controller has it's own packet format that is
+  * different form the PM4 format used by the 3D engine.
+  * It supports copying data, writing embedded data,
+  * solid fills, and a number of other things.  It also
+  * has support for tiling/detiling of buffers.
+  * Cayman and newer support two asynchronous DMA engines.
+  */
+ /**
+  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @ib: IB object to schedule
+  *
+  * Schedule an IB in the DMA ring (cayman-SI).
+  */
+ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+                               struct radeon_ib *ib)
+ {
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+       if (rdev->wb.enabled) {
+               u32 next_rptr = ring->wptr + 4;
+               while ((next_rptr & 7) != 5)
+                       next_rptr++;
+               next_rptr += 3;
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+               radeon_ring_write(ring, next_rptr);
+       }
+       /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+        * Pad as necessary with NOPs.
+        */
+       while ((ring->wptr & 7) != 5)
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+       radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+       radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+       radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+ }
+ /**
+  * cayman_dma_stop - stop the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the async dma engines (cayman-SI).
+  */
+ void cayman_dma_stop(struct radeon_device *rdev)
+ {
+       u32 rb_cntl;
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+       /* dma0 */
+       rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+       rb_cntl &= ~DMA_RB_ENABLE;
+       WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
+       /* dma1 */
+       rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+       rb_cntl &= ~DMA_RB_ENABLE;
+       WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
+       rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+       rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
+ }
+ /**
+  * cayman_dma_resume - setup and start the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Set up the DMA ring buffers and enable them. (cayman-SI).
+  * Returns 0 for success, error for failure.
+  */
+ int cayman_dma_resume(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring;
+       u32 rb_cntl, dma_cntl, ib_cntl;
+       u32 rb_bufsz;
+       u32 reg_offset, wb_offset;
+       int i, r;
+       /* Reset dma */
+       WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+       RREG32(SRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(SRBM_SOFT_RESET, 0);
+       for (i = 0; i < 2; i++) {
+               if (i == 0) {
+                       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+                       reg_offset = DMA0_REGISTER_OFFSET;
+                       wb_offset = R600_WB_DMA_RPTR_OFFSET;
+               } else {
+                       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+                       reg_offset = DMA1_REGISTER_OFFSET;
+                       wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
+               }
+               WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
+               WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+               /* Set ring buffer size in dwords */
++              rb_bufsz = order_base_2(ring->ring_size / 4);
+               rb_cntl = rb_bufsz << 1;
+ #ifdef __BIG_ENDIAN
+               rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+ #endif
+               WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
+               /* Initialize the ring buffer's read and write pointers */
+               WREG32(DMA_RB_RPTR + reg_offset, 0);
+               WREG32(DMA_RB_WPTR + reg_offset, 0);
+               /* set the wb address whether it's enabled or not */
+               WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
+                      upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
+               WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
+                      ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+               if (rdev->wb.enabled)
+                       rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+               WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
+               /* enable DMA IBs */
+               ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
+ #ifdef __BIG_ENDIAN
+               ib_cntl |= DMA_IB_SWAP_ENABLE;
+ #endif
+               WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
+               dma_cntl = RREG32(DMA_CNTL + reg_offset);
+               dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+               WREG32(DMA_CNTL + reg_offset, dma_cntl);
+               ring->wptr = 0;
+               WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
+               ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
+               WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
+               ring->ready = true;
+               r = radeon_ring_test(rdev, ring->idx, ring);
+               if (r) {
+                       ring->ready = false;
+                       return r;
+               }
+       }
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+       return 0;
+ }
+ /**
+  * cayman_dma_fini - tear down the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the async dma engines and free the rings (cayman-SI).
+  */
+ void cayman_dma_fini(struct radeon_device *rdev)
+ {
+       cayman_dma_stop(rdev);
+       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
+ }
+ /**
+  * cayman_dma_is_lockup - Check if the DMA engine is locked up
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Check if the async DMA engine is locked up.
+  * Returns true if the engine appears to be locked up, false if not.
+  */
+ bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
+       u32 mask;
+       if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+               mask = RADEON_RESET_DMA;
+       else
+               mask = RADEON_RESET_DMA1;
+       if (!(reset_mask & mask)) {
+               radeon_ring_lockup_update(ring);
+               return false;
+       }
+       /* force ring activities */
+       radeon_ring_force_activity(rdev, ring);
+       return radeon_ring_test_lockup(rdev, ring);
+ }
+ /**
+  * cayman_dma_vm_set_page - update the page tables using the DMA
+  *
+  * @rdev: radeon_device pointer
+  * @ib: indirect buffer to fill with commands
+  * @pe: addr of the page entry
+  * @addr: dst addr to write into pe
+  * @count: number of page entries to update
+  * @incr: increase next addr by incr bytes
+  * @flags: access flags
+  * @r600_flags: hw access flags 
+  *
+  * Update the page tables using the DMA (cayman/TN).
+  */
+ void cayman_dma_vm_set_page(struct radeon_device *rdev,
+                           struct radeon_ib *ib,
+                           uint64_t pe,
+                           uint64_t addr, unsigned count,
+                           uint32_t incr, uint32_t flags)
+ {
+       uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+       uint64_t value;
+       unsigned ndw;
+       if ((flags & RADEON_VM_PAGE_SYSTEM) || (count == 1)) {
+               while (count) {
+                       ndw = count * 2;
+                       if (ndw > 0xFFFFE)
+                               ndw = 0xFFFFE;
+                       /* for non-physically contiguous pages (system) */
+                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
+                       ib->ptr[ib->length_dw++] = pe;
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+                       for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+                               if (flags & RADEON_VM_PAGE_SYSTEM) {
+                                       value = radeon_vm_map_gart(rdev, addr);
+                                       value &= 0xFFFFFFFFFFFFF000ULL;
+                               } else if (flags & RADEON_VM_PAGE_VALID) {
+                                       value = addr;
+                               } else {
+                                       value = 0;
+                               }
+                               addr += incr;
+                               value |= r600_flags;
+                               ib->ptr[ib->length_dw++] = value;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                       }
+               }
+       } else {
+               while (count) {
+                       ndw = count * 2;
+                       if (ndw > 0xFFFFE)
+                               ndw = 0xFFFFE;
+                       if (flags & RADEON_VM_PAGE_VALID)
+                               value = addr;
+                       else
+                               value = 0;
+                       /* for physically contiguous pages (vram) */
+                       ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
+                       ib->ptr[ib->length_dw++] = pe; /* dst addr */
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+                       ib->ptr[ib->length_dw++] = r600_flags; /* mask */
+                       ib->ptr[ib->length_dw++] = 0;
+                       ib->ptr[ib->length_dw++] = value; /* value */
+                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                       ib->ptr[ib->length_dw++] = incr; /* increment size */
+                       ib->ptr[ib->length_dw++] = 0;
+                       pe += ndw * 4;
+                       addr += (ndw / 2) * incr;
+                       count -= ndw / 2;
+               }
+       }
+       while (ib->length_dw & 0x7)
+               ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
+ }
+ void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+ {
+       struct radeon_ring *ring = &rdev->ring[ridx];
+       if (vm == NULL)
+               return;
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+       /* flush hdp cache */
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+       radeon_ring_write(ring, 1);
+       /* bits 0-7 are the VM contexts0-7 */
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+       radeon_ring_write(ring, 1 << vm->id);
+ }
index 5625cf706f0c3326b3e70d6d337fa23bbe7650f7,2cbc512645d467be744a1ebc70e038876336b3e2..9fc61dd68bc073b4b2fd82976caef85f66696c9c
@@@ -1097,12 -1097,12 +1097,12 @@@ int r100_cp_init(struct radeon_device *
        }
  
        /* Align ring size */
 -      rb_bufsz = drm_order(ring_size / 8);
 +      rb_bufsz = order_base_2(ring_size / 8);
        ring_size = (1 << (rb_bufsz + 1)) * 4;
        r100_cp_load_microcode(rdev);
        r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
                             RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR,
-                            0, 0x7fffff, RADEON_CP_PACKET2);
+                            RADEON_CP_PACKET2);
        if (r) {
                return r;
        }
index cfc1d28ade39e4d39329feee29dd8a63de410c66,b72d4d717a723ae63c8cb64a1f4cce6dce33ca54..ea4d3734e6d9ce269efa5bb1eb98c7da5bccd819
@@@ -1374,7 -1374,7 +1374,7 @@@ static bool r600_is_display_hung(struc
        return true;
  }
  
static u32 r600_gpu_check_soft_reset(struct radeon_device *rdev)
+ u32 r600_gpu_check_soft_reset(struct radeon_device *rdev)
  {
        u32 reset_mask = 0;
        u32 tmp;
@@@ -1622,28 -1622,6 +1622,6 @@@ bool r600_gfx_is_lockup(struct radeon_d
        return radeon_ring_test_lockup(rdev, ring);
  }
  
- /**
-  * r600_dma_is_lockup - Check if the DMA engine is locked up
-  *
-  * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
-  *
-  * Check if the async DMA engine is locked up.
-  * Returns true if the engine appears to be locked up, false if not.
-  */
- bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
- {
-       u32 reset_mask = r600_gpu_check_soft_reset(rdev);
-       if (!(reset_mask & RADEON_RESET_DMA)) {
-               radeon_ring_lockup_update(ring);
-               return false;
-       }
-       /* force ring activities */
-       radeon_ring_force_activity(rdev, ring);
-       return radeon_ring_test_lockup(rdev, ring);
- }
  u32 r6xx_remap_render_backend(struct radeon_device *rdev,
                              u32 tiling_pipe_num,
                              u32 max_rb_num,
@@@ -2299,9 -2277,13 +2277,13 @@@ int r600_init_microcode(struct radeon_d
        if ((rdev->family >= CHIP_RV770) && (rdev->family <= CHIP_HEMLOCK)) {
                snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", smc_chip_name);
                err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
-               if (err)
-                       goto out;
-               if (rdev->smc_fw->size != smc_req_size) {
+               if (err) {
+                       printk(KERN_ERR
+                              "smc: error loading firmware \"%s\"\n",
+                              fw_name);
+                       release_firmware(rdev->smc_fw);
+                       rdev->smc_fw = NULL;
+               } else if (rdev->smc_fw->size != smc_req_size) {
                        printk(KERN_ERR
                               "smc: Bogus length %zu in firmware \"%s\"\n",
                               rdev->smc_fw->size, fw_name);
@@@ -2413,8 -2395,8 +2395,8 @@@ int r600_cp_resume(struct radeon_devic
        WREG32(GRBM_SOFT_RESET, 0);
  
        /* Set ring buffer size */
 -      rb_bufsz = drm_order(ring->ring_size / 8);
 -      tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 +      rb_bufsz = order_base_2(ring->ring_size / 8);
 +      tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
  #ifdef __BIG_ENDIAN
        tmp |= BUF_SWAP_32BIT;
  #endif
@@@ -2467,7 -2449,7 +2449,7 @@@ void r600_ring_init(struct radeon_devic
        int r;
  
        /* Align ring size */
 -      rb_bufsz = drm_order(ring_size / 8);
 +      rb_bufsz = order_base_2(ring_size / 8);
        ring_size = (1 << (rb_bufsz + 1)) * 4;
        ring->ring_size = ring_size;
        ring->align_mask = 16 - 1;
@@@ -2489,327 -2471,6 +2471,6 @@@ void r600_cp_fini(struct radeon_device 
        radeon_scratch_free(rdev, ring->rptr_save_reg);
  }
  
- /*
-  * DMA
-  * Starting with R600, the GPU has an asynchronous
-  * DMA engine.  The programming model is very similar
-  * to the 3D engine (ring buffer, IBs, etc.), but the
-  * DMA controller has it's own packet format that is
-  * different form the PM4 format used by the 3D engine.
-  * It supports copying data, writing embedded data,
-  * solid fills, and a number of other things.  It also
-  * has support for tiling/detiling of buffers.
-  */
- /**
-  * r600_dma_stop - stop the async dma engine
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Stop the async dma engine (r6xx-evergreen).
-  */
- void r600_dma_stop(struct radeon_device *rdev)
- {
-       u32 rb_cntl = RREG32(DMA_RB_CNTL);
-       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-       rb_cntl &= ~DMA_RB_ENABLE;
-       WREG32(DMA_RB_CNTL, rb_cntl);
-       rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
- }
- /**
-  * r600_dma_resume - setup and start the async dma engine
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
-  * Returns 0 for success, error for failure.
-  */
- int r600_dma_resume(struct radeon_device *rdev)
- {
-       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
-       u32 rb_cntl, dma_cntl, ib_cntl;
-       u32 rb_bufsz;
-       int r;
-       /* Reset dma */
-       if (rdev->family >= CHIP_RV770)
-               WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
-       else
-               WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
-       RREG32(SRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(SRBM_SOFT_RESET, 0);
-       WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
-       WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
-       /* Set ring buffer size in dwords */
-       rb_bufsz = order_base_2(ring->ring_size / 4);
-       rb_cntl = rb_bufsz << 1;
- #ifdef __BIG_ENDIAN
-       rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
- #endif
-       WREG32(DMA_RB_CNTL, rb_cntl);
-       /* Initialize the ring buffer's read and write pointers */
-       WREG32(DMA_RB_RPTR, 0);
-       WREG32(DMA_RB_WPTR, 0);
-       /* set the wb address whether it's enabled or not */
-       WREG32(DMA_RB_RPTR_ADDR_HI,
-              upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
-       WREG32(DMA_RB_RPTR_ADDR_LO,
-              ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
-       if (rdev->wb.enabled)
-               rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
-       WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
-       /* enable DMA IBs */
-       ib_cntl = DMA_IB_ENABLE;
- #ifdef __BIG_ENDIAN
-       ib_cntl |= DMA_IB_SWAP_ENABLE;
- #endif
-       WREG32(DMA_IB_CNTL, ib_cntl);
-       dma_cntl = RREG32(DMA_CNTL);
-       dma_cntl &= ~CTXEMPTY_INT_ENABLE;
-       WREG32(DMA_CNTL, dma_cntl);
-       if (rdev->family >= CHIP_RV770)
-               WREG32(DMA_MODE, 1);
-       ring->wptr = 0;
-       WREG32(DMA_RB_WPTR, ring->wptr << 2);
-       ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
-       WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
-       ring->ready = true;
-       r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
-       if (r) {
-               ring->ready = false;
-               return r;
-       }
-       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
-       return 0;
- }
- /**
-  * r600_dma_fini - tear down the async dma engine
-  *
-  * @rdev: radeon_device pointer
-  *
-  * Stop the async dma engine and free the ring (r6xx-evergreen).
-  */
- void r600_dma_fini(struct radeon_device *rdev)
- {
-       r600_dma_stop(rdev);
-       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
- }
- /*
-  * UVD
-  */
- int r600_uvd_rbc_start(struct radeon_device *rdev)
- {
-       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
-       uint64_t rptr_addr;
-       uint32_t rb_bufsz, tmp;
-       int r;
-       rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET;
-       if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) {
-               DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n");
-               return -EINVAL;
-       }
-       /* force RBC into idle state */
-       WREG32(UVD_RBC_RB_CNTL, 0x11010101);
-       /* Set the write pointer delay */
-       WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
-       /* set the wb address */
-       WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2);
-       /* programm the 4GB memory segment for rptr and ring buffer */
-       WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) |
-                                  (0x7 << 16) | (0x1 << 31));
-       /* Initialize the ring buffer's read and write pointers */
-       WREG32(UVD_RBC_RB_RPTR, 0x0);
-       ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR);
-       WREG32(UVD_RBC_RB_WPTR, ring->wptr);
-       /* set the ring address */
-       WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
-       /* Set ring buffer size */
-       rb_bufsz = order_base_2(ring->ring_size);
-       rb_bufsz = (0x1 << 8) | rb_bufsz;
-       WREG32(UVD_RBC_RB_CNTL, rb_bufsz);
-       ring->ready = true;
-       r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
-       if (r) {
-               ring->ready = false;
-               return r;
-       }
-       r = radeon_ring_lock(rdev, ring, 10);
-       if (r) {
-               DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
-               return r;
-       }
-       tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
-       radeon_ring_write(ring, tmp);
-       radeon_ring_write(ring, 0xFFFFF);
-       tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
-       radeon_ring_write(ring, tmp);
-       radeon_ring_write(ring, 0xFFFFF);
-       tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
-       radeon_ring_write(ring, tmp);
-       radeon_ring_write(ring, 0xFFFFF);
-       /* Clear timeout status bits */
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
-       radeon_ring_write(ring, 0x8);
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
-       radeon_ring_write(ring, 3);
-       radeon_ring_unlock_commit(rdev, ring);
-       return 0;
- }
- void r600_uvd_rbc_stop(struct radeon_device *rdev)
- {
-       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
-       /* force RBC into idle state */
-       WREG32(UVD_RBC_RB_CNTL, 0x11010101);
-       ring->ready = false;
- }
- int r600_uvd_init(struct radeon_device *rdev)
- {
-       int i, j, r;
-       /* disable byte swapping */
-       u32 lmi_swap_cntl = 0;
-       u32 mp_swap_cntl = 0;
-       /* raise clocks while booting up the VCPU */
-       radeon_set_uvd_clocks(rdev, 53300, 40000);
-       /* disable clock gating */
-       WREG32(UVD_CGC_GATE, 0);
-       /* disable interupt */
-       WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
-       /* put LMI, VCPU, RBC etc... into reset */
-       WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
-              LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
-              CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
-       mdelay(5);
-       /* take UVD block out of reset */
-       WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
-       mdelay(5);
-       /* initialize UVD memory controller */
-       WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
-                            (1 << 21) | (1 << 9) | (1 << 20));
- #ifdef __BIG_ENDIAN
-       /* swap (8 in 32) RB and IB */
-       lmi_swap_cntl = 0xa;
-       mp_swap_cntl = 0;
- #endif
-       WREG32(UVD_LMI_SWAP_CNTL, lmi_swap_cntl);
-       WREG32(UVD_MP_SWAP_CNTL, mp_swap_cntl);
-       WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
-       WREG32(UVD_MPC_SET_MUXA1, 0x0);
-       WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
-       WREG32(UVD_MPC_SET_MUXB1, 0x0);
-       WREG32(UVD_MPC_SET_ALU, 0);
-       WREG32(UVD_MPC_SET_MUX, 0x88);
-       /* Stall UMC */
-       WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
-       WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
-       /* take all subblocks out of reset, except VCPU */
-       WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
-       mdelay(5);
-       /* enable VCPU clock */
-       WREG32(UVD_VCPU_CNTL,  1 << 9);
-       /* enable UMC */
-       WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
-       /* boot up the VCPU */
-       WREG32(UVD_SOFT_RESET, 0);
-       mdelay(10);
-       WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
-       for (i = 0; i < 10; ++i) {
-               uint32_t status;
-               for (j = 0; j < 100; ++j) {
-                       status = RREG32(UVD_STATUS);
-                       if (status & 2)
-                               break;
-                       mdelay(10);
-               }
-               r = 0;
-               if (status & 2)
-                       break;
-               DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
-               WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
-               mdelay(10);
-               WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
-               mdelay(10);
-               r = -1;
-       }
-       if (r) {
-               DRM_ERROR("UVD not responding, giving up!!!\n");
-               radeon_set_uvd_clocks(rdev, 0, 0);
-               return r;
-       }
-       /* enable interupt */
-       WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
-       r = r600_uvd_rbc_start(rdev);
-       if (!r)
-               DRM_INFO("UVD initialized successfully.\n");
-       /* lower clocks again */
-       radeon_set_uvd_clocks(rdev, 0, 0);
-       return r;
- }
  /*
   * GPU scratch registers helpers function.
   */
@@@ -2865,94 -2526,6 +2526,6 @@@ int r600_ring_test(struct radeon_devic
        return r;
  }
  
- /**
-  * r600_dma_ring_test - simple async dma engine test
-  *
-  * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
-  *
-  * Test the DMA engine by writing using it to write an
-  * value to memory. (r6xx-SI).
-  * Returns 0 for success, error for failure.
-  */
- int r600_dma_ring_test(struct radeon_device *rdev,
-                      struct radeon_ring *ring)
- {
-       unsigned i;
-       int r;
-       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
-       u32 tmp;
-       if (!ptr) {
-               DRM_ERROR("invalid vram scratch pointer\n");
-               return -EINVAL;
-       }
-       tmp = 0xCAFEDEAD;
-       writel(tmp, ptr);
-       r = radeon_ring_lock(rdev, ring, 4);
-       if (r) {
-               DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
-               return r;
-       }
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
-       radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
-       radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
-       radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
-       for (i = 0; i < rdev->usec_timeout; i++) {
-               tmp = readl(ptr);
-               if (tmp == 0xDEADBEEF)
-                       break;
-               DRM_UDELAY(1);
-       }
-       if (i < rdev->usec_timeout) {
-               DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
-       } else {
-               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
-                         ring->idx, tmp);
-               r = -EINVAL;
-       }
-       return r;
- }
- int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
- {
-       uint32_t tmp = 0;
-       unsigned i;
-       int r;
-       WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
-       r = radeon_ring_lock(rdev, ring, 3);
-       if (r) {
-               DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
-                         ring->idx, r);
-               return r;
-       }
-       radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
-       radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
-       for (i = 0; i < rdev->usec_timeout; i++) {
-               tmp = RREG32(UVD_CONTEXT_ID);
-               if (tmp == 0xDEADBEEF)
-                       break;
-               DRM_UDELAY(1);
-       }
-       if (i < rdev->usec_timeout) {
-               DRM_INFO("ring test on %d succeeded in %d usecs\n",
-                        ring->idx, i);
-       } else {
-               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
-                         ring->idx, tmp);
-               r = -EINVAL;
-       }
-       return r;
- }
  /*
   * CP fences/semaphores
   */
@@@ -3004,30 -2577,6 +2577,6 @@@ void r600_fence_ring_emit(struct radeon
        }
  }
  
- void r600_uvd_fence_emit(struct radeon_device *rdev,
-                        struct radeon_fence *fence)
- {
-       struct radeon_ring *ring = &rdev->ring[fence->ring];
-       uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
-       radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
-       radeon_ring_write(ring, fence->seq);
-       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
-       radeon_ring_write(ring, addr & 0xffffffff);
-       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
-       radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
-       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
-       radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
-       radeon_ring_write(ring, 2);
-       return;
- }
  void r600_semaphore_ring_emit(struct radeon_device *rdev,
                              struct radeon_ring *ring,
                              struct radeon_semaphore *semaphore,
        radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
  }
  
- /*
-  * DMA fences/semaphores
-  */
- /**
-  * r600_dma_fence_ring_emit - emit a fence on the DMA ring
-  *
-  * @rdev: radeon_device pointer
-  * @fence: radeon fence object
-  *
-  * Add a DMA fence packet to the ring to write
-  * the fence seq number and DMA trap packet to generate
-  * an interrupt if needed (r6xx-r7xx).
-  */
- void r600_dma_fence_ring_emit(struct radeon_device *rdev,
-                             struct radeon_fence *fence)
- {
-       struct radeon_ring *ring = &rdev->ring[fence->ring];
-       u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
-       /* write the fence */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
-       radeon_ring_write(ring, addr & 0xfffffffc);
-       radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
-       radeon_ring_write(ring, lower_32_bits(fence->seq));
-       /* generate an interrupt */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
- }
- /**
-  * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
-  *
-  * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
-  * @semaphore: radeon semaphore object
-  * @emit_wait: wait or signal semaphore
-  *
-  * Add a DMA semaphore packet to the ring wait on or signal
-  * other rings (r6xx-SI).
-  */
- void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
-                                 struct radeon_ring *ring,
-                                 struct radeon_semaphore *semaphore,
-                                 bool emit_wait)
- {
-       u64 addr = semaphore->gpu_addr;
-       u32 s = emit_wait ? 0 : 1;
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
-       radeon_ring_write(ring, addr & 0xfffffffc);
-       radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
- }
- void r600_uvd_semaphore_emit(struct radeon_device *rdev,
-                            struct radeon_ring *ring,
-                            struct radeon_semaphore *semaphore,
-                            bool emit_wait)
- {
-       uint64_t addr = semaphore->gpu_addr;
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
-       radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
-       radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
-       radeon_ring_write(ring, emit_wait ? 1 : 0);
- }
- int r600_copy_blit(struct radeon_device *rdev,
-                  uint64_t src_offset,
-                  uint64_t dst_offset,
-                  unsigned num_gpu_pages,
-                  struct radeon_fence **fence)
- {
-       struct radeon_semaphore *sem = NULL;
-       struct radeon_sa_bo *vb = NULL;
-       int r;
-       r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem);
-       if (r) {
-               return r;
-       }
-       r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb);
-       r600_blit_done_copy(rdev, fence, vb, sem);
-       return 0;
- }
  /**
   * r600_copy_cpdma - copy pages using the CP DMA engine
   *
@@@ -3217,80 -2677,6 +2677,6 @@@ int r600_copy_cpdma(struct radeon_devic
        return r;
  }
  
- /**
-  * r600_copy_dma - copy pages using the DMA engine
-  *
-  * @rdev: radeon_device pointer
-  * @src_offset: src GPU address
-  * @dst_offset: dst GPU address
-  * @num_gpu_pages: number of GPU pages to xfer
-  * @fence: radeon fence object
-  *
-  * Copy GPU paging using the DMA engine (r6xx).
-  * Used by the radeon ttm implementation to move pages if
-  * registered as the asic copy callback.
-  */
- int r600_copy_dma(struct radeon_device *rdev,
-                 uint64_t src_offset, uint64_t dst_offset,
-                 unsigned num_gpu_pages,
-                 struct radeon_fence **fence)
- {
-       struct radeon_semaphore *sem = NULL;
-       int ring_index = rdev->asic->copy.dma_ring_index;
-       struct radeon_ring *ring = &rdev->ring[ring_index];
-       u32 size_in_dw, cur_size_in_dw;
-       int i, num_loops;
-       int r = 0;
-       r = radeon_semaphore_create(rdev, &sem);
-       if (r) {
-               DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
-       }
-       size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
-       num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
-       r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
-       if (r) {
-               DRM_ERROR("radeon: moving bo (%d).\n", r);
-               radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
-       }
-       if (radeon_fence_need_sync(*fence, ring->idx)) {
-               radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
-                                           ring->idx);
-               radeon_fence_note_sync(*fence, ring->idx);
-       } else {
-               radeon_semaphore_free(rdev, &sem, NULL);
-       }
-       for (i = 0; i < num_loops; i++) {
-               cur_size_in_dw = size_in_dw;
-               if (cur_size_in_dw > 0xFFFE)
-                       cur_size_in_dw = 0xFFFE;
-               size_in_dw -= cur_size_in_dw;
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
-               radeon_ring_write(ring, dst_offset & 0xfffffffc);
-               radeon_ring_write(ring, src_offset & 0xfffffffc);
-               radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
-                                        (upper_32_bits(src_offset) & 0xff)));
-               src_offset += cur_size_in_dw * 4;
-               dst_offset += cur_size_in_dw * 4;
-       }
-       r = radeon_fence_emit(rdev, fence, ring->idx);
-       if (r) {
-               radeon_ring_unlock_undo(rdev, ring);
-               return r;
-       }
-       radeon_ring_unlock_commit(rdev, ring);
-       radeon_semaphore_free(rdev, &sem, *fence);
-       return r;
- }
  int r600_set_surface_reg(struct radeon_device *rdev, int reg,
                         uint32_t tiling_flags, uint32_t pitch,
                         uint32_t offset, uint32_t obj_size)
@@@ -3312,6 -2698,13 +2698,13 @@@ static int r600_startup(struct radeon_d
        /* enable pcie gen2 link */
        r600_pcie_gen2_enable(rdev);
  
+       /* scratch needs to be initialized before MC */
+       r = r600_vram_scratch_init(rdev);
+       if (r)
+               return r;
+       r600_mc_program(rdev);
        if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
                r = r600_init_microcode(rdev);
                if (r) {
                }
        }
  
-       r = r600_vram_scratch_init(rdev);
-       if (r)
-               return r;
-       r600_mc_program(rdev);
        if (rdev->flags & RADEON_IS_AGP) {
                r600_agp_enable(rdev);
        } else {
                        return r;
        }
        r600_gpu_init(rdev);
-       r = r600_blit_init(rdev);
-       if (r) {
-               r600_blit_fini(rdev);
-               rdev->asic->copy.copy = NULL;
-               dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
-       }
  
        /* allocate wb buffer */
        r = radeon_wb_init(rdev);
        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
                             R600_CP_RB_RPTR, R600_CP_RB_WPTR,
-                            0, 0xfffff, RADEON_CP_PACKET2);
+                            RADEON_CP_PACKET2);
        if (r)
                return r;
  
        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
                             DMA_RB_RPTR, DMA_RB_WPTR,
-                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+                            DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
        if (r)
                return r;
  
@@@ -3551,7 -2933,6 +2933,6 @@@ int r600_init(struct radeon_device *rde
  void r600_fini(struct radeon_device *rdev)
  {
        r600_audio_fini(rdev);
-       r600_blit_fini(rdev);
        r600_cp_fini(rdev);
        r600_dma_fini(rdev);
        r600_irq_fini(rdev);
@@@ -3603,16 -2984,6 +2984,6 @@@ void r600_ring_ib_execute(struct radeon
        radeon_ring_write(ring, ib->length_dw);
  }
  
- void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
- {
-       struct radeon_ring *ring = &rdev->ring[ib->ring];
-       radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
-       radeon_ring_write(ring, ib->gpu_addr);
-       radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
-       radeon_ring_write(ring, ib->length_dw);
- }
  int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
  {
        struct radeon_ib ib;
@@@ -3666,139 -3037,6 +3037,6 @@@ free_scratch
        return r;
  }
  
- /**
-  * r600_dma_ib_test - test an IB on the DMA engine
-  *
-  * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
-  *
-  * Test a simple IB in the DMA ring (r6xx-SI).
-  * Returns 0 on success, error on failure.
-  */
- int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
- {
-       struct radeon_ib ib;
-       unsigned i;
-       int r;
-       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
-       u32 tmp = 0;
-       if (!ptr) {
-               DRM_ERROR("invalid vram scratch pointer\n");
-               return -EINVAL;
-       }
-       tmp = 0xCAFEDEAD;
-       writel(tmp, ptr);
-       r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
-       if (r) {
-               DRM_ERROR("radeon: failed to get ib (%d).\n", r);
-               return r;
-       }
-       ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
-       ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
-       ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
-       ib.ptr[3] = 0xDEADBEEF;
-       ib.length_dw = 4;
-       r = radeon_ib_schedule(rdev, &ib, NULL);
-       if (r) {
-               radeon_ib_free(rdev, &ib);
-               DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
-               return r;
-       }
-       r = radeon_fence_wait(ib.fence, false);
-       if (r) {
-               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
-               return r;
-       }
-       for (i = 0; i < rdev->usec_timeout; i++) {
-               tmp = readl(ptr);
-               if (tmp == 0xDEADBEEF)
-                       break;
-               DRM_UDELAY(1);
-       }
-       if (i < rdev->usec_timeout) {
-               DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
-       } else {
-               DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
-               r = -EINVAL;
-       }
-       radeon_ib_free(rdev, &ib);
-       return r;
- }
- int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
- {
-       struct radeon_fence *fence = NULL;
-       int r;
-       r = radeon_set_uvd_clocks(rdev, 53300, 40000);
-       if (r) {
-               DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r);
-               return r;
-       }
-       r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
-       if (r) {
-               DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
-               goto error;
-       }
-       r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
-       if (r) {
-               DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
-               goto error;
-       }
-       r = radeon_fence_wait(fence, false);
-       if (r) {
-               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
-               goto error;
-       }
-       DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
- error:
-       radeon_fence_unref(&fence);
-       radeon_set_uvd_clocks(rdev, 0, 0);
-       return r;
- }
- /**
-  * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
-  *
-  * @rdev: radeon_device pointer
-  * @ib: IB object to schedule
-  *
-  * Schedule an IB in the DMA ring (r6xx-r7xx).
-  */
- void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
- {
-       struct radeon_ring *ring = &rdev->ring[ib->ring];
-       if (rdev->wb.enabled) {
-               u32 next_rptr = ring->wptr + 4;
-               while ((next_rptr & 7) != 5)
-                       next_rptr++;
-               next_rptr += 3;
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
-               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
-               radeon_ring_write(ring, next_rptr);
-       }
-       /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
-        * Pad as necessary with NOPs.
-        */
-       while ((ring->wptr & 7) != 5)
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
-       radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
-       radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
- }
  /*
   * Interrupts
   *
@@@ -3815,7 -3053,7 +3053,7 @@@ void r600_ih_ring_init(struct radeon_de
        u32 rb_bufsz;
  
        /* Align ring size */
 -      rb_bufsz = drm_order(ring_size / 4);
 +      rb_bufsz = order_base_2(ring_size / 4);
        ring_size = (1 << rb_bufsz) * 4;
        rdev->ih.ring_size = ring_size;
        rdev->ih.ptr_mask = rdev->ih.ring_size - 1;
@@@ -4052,7 -3290,7 +3290,7 @@@ int r600_irq_init(struct radeon_device 
        WREG32(INTERRUPT_CNTL, interrupt_cntl);
  
        WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
 -      rb_bufsz = drm_order(rdev->ih.ring_size / 4);
 +      rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
  
        ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
                      IH_WPTR_OVERFLOW_CLEAR |
index 0000000000000000000000000000000000000000,bff05576266b3db92ef661173f74942edc42a020..3b317456512a8134ddb681eccd95c82504fd9cce
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,497 +1,497 @@@
 -      rb_bufsz = drm_order(ring->ring_size / 4);
+ /*
+  * Copyright 2013 Advanced Micro Devices, Inc.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in
+  * all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  *
+  * Authors: Alex Deucher
+  */
+ #include <drm/drmP.h>
+ #include "radeon.h"
+ #include "radeon_asic.h"
+ #include "r600d.h"
+ u32 r600_gpu_check_soft_reset(struct radeon_device *rdev);
+ /*
+  * DMA
+  * Starting with R600, the GPU has an asynchronous
+  * DMA engine.  The programming model is very similar
+  * to the 3D engine (ring buffer, IBs, etc.), but the
+  * DMA controller has it's own packet format that is
+  * different form the PM4 format used by the 3D engine.
+  * It supports copying data, writing embedded data,
+  * solid fills, and a number of other things.  It also
+  * has support for tiling/detiling of buffers.
+  */
+ /**
+  * r600_dma_get_rptr - get the current read pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon ring pointer
+  *
+  * Get the current rptr from the hardware (r6xx+).
+  */
+ uint32_t r600_dma_get_rptr(struct radeon_device *rdev,
+                          struct radeon_ring *ring)
+ {
+       return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2;
+ }
+ /**
+  * r600_dma_get_wptr - get the current write pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon ring pointer
+  *
+  * Get the current wptr from the hardware (r6xx+).
+  */
+ uint32_t r600_dma_get_wptr(struct radeon_device *rdev,
+                          struct radeon_ring *ring)
+ {
+       return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2;
+ }
+ /**
+  * r600_dma_set_wptr - commit the write pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon ring pointer
+  *
+  * Write the wptr back to the hardware (r6xx+).
+  */
+ void r600_dma_set_wptr(struct radeon_device *rdev,
+                      struct radeon_ring *ring)
+ {
+       WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc);
+ }
+ /**
+  * r600_dma_stop - stop the async dma engine
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the async dma engine (r6xx-evergreen).
+  */
+ void r600_dma_stop(struct radeon_device *rdev)
+ {
+       u32 rb_cntl = RREG32(DMA_RB_CNTL);
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+       rb_cntl &= ~DMA_RB_ENABLE;
+       WREG32(DMA_RB_CNTL, rb_cntl);
+       rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+ }
+ /**
+  * r600_dma_resume - setup and start the async dma engine
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
+  * Returns 0 for success, error for failure.
+  */
+ int r600_dma_resume(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+       u32 rb_cntl, dma_cntl, ib_cntl;
+       u32 rb_bufsz;
+       int r;
+       /* Reset dma */
+       if (rdev->family >= CHIP_RV770)
+               WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
+       else
+               WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
+       RREG32(SRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(SRBM_SOFT_RESET, 0);
+       WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
+       WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+       /* Set ring buffer size in dwords */
++      rb_bufsz = order_base_2(ring->ring_size / 4);
+       rb_cntl = rb_bufsz << 1;
+ #ifdef __BIG_ENDIAN
+       rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+ #endif
+       WREG32(DMA_RB_CNTL, rb_cntl);
+       /* Initialize the ring buffer's read and write pointers */
+       WREG32(DMA_RB_RPTR, 0);
+       WREG32(DMA_RB_WPTR, 0);
+       /* set the wb address whether it's enabled or not */
+       WREG32(DMA_RB_RPTR_ADDR_HI,
+              upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
+       WREG32(DMA_RB_RPTR_ADDR_LO,
+              ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
+       if (rdev->wb.enabled)
+               rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+       WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
+       /* enable DMA IBs */
+       ib_cntl = DMA_IB_ENABLE;
+ #ifdef __BIG_ENDIAN
+       ib_cntl |= DMA_IB_SWAP_ENABLE;
+ #endif
+       WREG32(DMA_IB_CNTL, ib_cntl);
+       dma_cntl = RREG32(DMA_CNTL);
+       dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+       WREG32(DMA_CNTL, dma_cntl);
+       if (rdev->family >= CHIP_RV770)
+               WREG32(DMA_MODE, 1);
+       ring->wptr = 0;
+       WREG32(DMA_RB_WPTR, ring->wptr << 2);
+       ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
+       WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
+       ring->ready = true;
+       r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
+       if (r) {
+               ring->ready = false;
+               return r;
+       }
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+       return 0;
+ }
+ /**
+  * r600_dma_fini - tear down the async dma engine
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the async dma engine and free the ring (r6xx-evergreen).
+  */
+ void r600_dma_fini(struct radeon_device *rdev)
+ {
+       r600_dma_stop(rdev);
+       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+ }
+ /**
+  * r600_dma_is_lockup - Check if the DMA engine is locked up
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Check if the async DMA engine is locked up.
+  * Returns true if the engine appears to be locked up, false if not.
+  */
+ bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       u32 reset_mask = r600_gpu_check_soft_reset(rdev);
+       if (!(reset_mask & RADEON_RESET_DMA)) {
+               radeon_ring_lockup_update(ring);
+               return false;
+       }
+       /* force ring activities */
+       radeon_ring_force_activity(rdev, ring);
+       return radeon_ring_test_lockup(rdev, ring);
+ }
+ /**
+  * r600_dma_ring_test - simple async dma engine test
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Test the DMA engine by writing using it to write an
+  * value to memory. (r6xx-SI).
+  * Returns 0 for success, error for failure.
+  */
+ int r600_dma_ring_test(struct radeon_device *rdev,
+                      struct radeon_ring *ring)
+ {
+       unsigned i;
+       int r;
+       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+       u32 tmp;
+       if (!ptr) {
+               DRM_ERROR("invalid vram scratch pointer\n");
+               return -EINVAL;
+       }
+       tmp = 0xCAFEDEAD;
+       writel(tmp, ptr);
+       r = radeon_ring_lock(rdev, ring, 4);
+       if (r) {
+               DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
+               return r;
+       }
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+       radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
+       radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
+       radeon_ring_write(ring, 0xDEADBEEF);
+       radeon_ring_unlock_commit(rdev, ring);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = readl(ptr);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+       } else {
+               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+                         ring->idx, tmp);
+               r = -EINVAL;
+       }
+       return r;
+ }
+ /**
+  * r600_dma_fence_ring_emit - emit a fence on the DMA ring
+  *
+  * @rdev: radeon_device pointer
+  * @fence: radeon fence object
+  *
+  * Add a DMA fence packet to the ring to write
+  * the fence seq number and DMA trap packet to generate
+  * an interrupt if needed (r6xx-r7xx).
+  */
+ void r600_dma_fence_ring_emit(struct radeon_device *rdev,
+                             struct radeon_fence *fence)
+ {
+       struct radeon_ring *ring = &rdev->ring[fence->ring];
+       u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+       /* write the fence */
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+       radeon_ring_write(ring, addr & 0xfffffffc);
+       radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
+       radeon_ring_write(ring, lower_32_bits(fence->seq));
+       /* generate an interrupt */
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+ }
+ /**
+  * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  * @semaphore: radeon semaphore object
+  * @emit_wait: wait or signal semaphore
+  *
+  * Add a DMA semaphore packet to the ring wait on or signal
+  * other rings (r6xx-SI).
+  */
+ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
+                                 struct radeon_ring *ring,
+                                 struct radeon_semaphore *semaphore,
+                                 bool emit_wait)
+ {
+       u64 addr = semaphore->gpu_addr;
+       u32 s = emit_wait ? 0 : 1;
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
+       radeon_ring_write(ring, addr & 0xfffffffc);
+       radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
+ }
+ /**
+  * r600_dma_ib_test - test an IB on the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Test a simple IB in the DMA ring (r6xx-SI).
+  * Returns 0 on success, error on failure.
+  */
+ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       struct radeon_ib ib;
+       unsigned i;
+       int r;
+       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+       u32 tmp = 0;
+       if (!ptr) {
+               DRM_ERROR("invalid vram scratch pointer\n");
+               return -EINVAL;
+       }
+       tmp = 0xCAFEDEAD;
+       writel(tmp, ptr);
+       r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
+       if (r) {
+               DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+               return r;
+       }
+       ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
+       ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
+       ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
+       ib.ptr[3] = 0xDEADBEEF;
+       ib.length_dw = 4;
+       r = radeon_ib_schedule(rdev, &ib, NULL);
+       if (r) {
+               radeon_ib_free(rdev, &ib);
+               DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+               return r;
+       }
+       r = radeon_fence_wait(ib.fence, false);
+       if (r) {
+               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+               return r;
+       }
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = readl(ptr);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+       } else {
+               DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
+               r = -EINVAL;
+       }
+       radeon_ib_free(rdev, &ib);
+       return r;
+ }
+ /**
+  * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @ib: IB object to schedule
+  *
+  * Schedule an IB in the DMA ring (r6xx-r7xx).
+  */
+ void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+ {
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+       if (rdev->wb.enabled) {
+               u32 next_rptr = ring->wptr + 4;
+               while ((next_rptr & 7) != 5)
+                       next_rptr++;
+               next_rptr += 3;
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+               radeon_ring_write(ring, next_rptr);
+       }
+       /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+        * Pad as necessary with NOPs.
+        */
+       while ((ring->wptr & 7) != 5)
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
+       radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+       radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+ }
+ /**
+  * r600_copy_dma - copy pages using the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @src_offset: src GPU address
+  * @dst_offset: dst GPU address
+  * @num_gpu_pages: number of GPU pages to xfer
+  * @fence: radeon fence object
+  *
+  * Copy GPU paging using the DMA engine (r6xx).
+  * Used by the radeon ttm implementation to move pages if
+  * registered as the asic copy callback.
+  */
+ int r600_copy_dma(struct radeon_device *rdev,
+                 uint64_t src_offset, uint64_t dst_offset,
+                 unsigned num_gpu_pages,
+                 struct radeon_fence **fence)
+ {
+       struct radeon_semaphore *sem = NULL;
+       int ring_index = rdev->asic->copy.dma_ring_index;
+       struct radeon_ring *ring = &rdev->ring[ring_index];
+       u32 size_in_dw, cur_size_in_dw;
+       int i, num_loops;
+       int r = 0;
+       r = radeon_semaphore_create(rdev, &sem);
+       if (r) {
+               DRM_ERROR("radeon: moving bo (%d).\n", r);
+               return r;
+       }
+       size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+       num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
+       r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
+       if (r) {
+               DRM_ERROR("radeon: moving bo (%d).\n", r);
+               radeon_semaphore_free(rdev, &sem, NULL);
+               return r;
+       }
+       if (radeon_fence_need_sync(*fence, ring->idx)) {
+               radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+                                           ring->idx);
+               radeon_fence_note_sync(*fence, ring->idx);
+       } else {
+               radeon_semaphore_free(rdev, &sem, NULL);
+       }
+       for (i = 0; i < num_loops; i++) {
+               cur_size_in_dw = size_in_dw;
+               if (cur_size_in_dw > 0xFFFE)
+                       cur_size_in_dw = 0xFFFE;
+               size_in_dw -= cur_size_in_dw;
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+               radeon_ring_write(ring, dst_offset & 0xfffffffc);
+               radeon_ring_write(ring, src_offset & 0xfffffffc);
+               radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
+                                        (upper_32_bits(src_offset) & 0xff)));
+               src_offset += cur_size_in_dw * 4;
+               dst_offset += cur_size_in_dw * 4;
+       }
+       r = radeon_fence_emit(rdev, fence, ring->idx);
+       if (r) {
+               radeon_ring_unlock_undo(rdev, ring);
+               return r;
+       }
+       radeon_ring_unlock_commit(rdev, ring);
+       radeon_semaphore_free(rdev, &sem, *fence);
+       return r;
+ }
index 19066d1dcb7da405fd93cc2c8c90c74ff70cedf6,8cd87bac04869ff13c6fdc55bd9069b5ddf2fdeb..ff8b564ce2b2d37033de4b4795569beaad0903fa
@@@ -152,6 -152,47 +152,47 @@@ extern int radeon_aspm
  #define RADEON_RESET_MC                               (1 << 10)
  #define RADEON_RESET_DISPLAY                  (1 << 11)
  
+ /* CG block flags */
+ #define RADEON_CG_BLOCK_GFX                   (1 << 0)
+ #define RADEON_CG_BLOCK_MC                    (1 << 1)
+ #define RADEON_CG_BLOCK_SDMA                  (1 << 2)
+ #define RADEON_CG_BLOCK_UVD                   (1 << 3)
+ #define RADEON_CG_BLOCK_VCE                   (1 << 4)
+ #define RADEON_CG_BLOCK_HDP                   (1 << 5)
+ #define RADEON_CG_BLOCK_BIF                   (1 << 6)
+ /* CG flags */
+ #define RADEON_CG_SUPPORT_GFX_MGCG            (1 << 0)
+ #define RADEON_CG_SUPPORT_GFX_MGLS            (1 << 1)
+ #define RADEON_CG_SUPPORT_GFX_CGCG            (1 << 2)
+ #define RADEON_CG_SUPPORT_GFX_CGLS            (1 << 3)
+ #define RADEON_CG_SUPPORT_GFX_CGTS            (1 << 4)
+ #define RADEON_CG_SUPPORT_GFX_CGTS_LS         (1 << 5)
+ #define RADEON_CG_SUPPORT_GFX_CP_LS           (1 << 6)
+ #define RADEON_CG_SUPPORT_GFX_RLC_LS          (1 << 7)
+ #define RADEON_CG_SUPPORT_MC_LS                       (1 << 8)
+ #define RADEON_CG_SUPPORT_MC_MGCG             (1 << 9)
+ #define RADEON_CG_SUPPORT_SDMA_LS             (1 << 10)
+ #define RADEON_CG_SUPPORT_SDMA_MGCG           (1 << 11)
+ #define RADEON_CG_SUPPORT_BIF_LS              (1 << 12)
+ #define RADEON_CG_SUPPORT_UVD_MGCG            (1 << 13)
+ #define RADEON_CG_SUPPORT_VCE_MGCG            (1 << 14)
+ #define RADEON_CG_SUPPORT_HDP_LS              (1 << 15)
+ #define RADEON_CG_SUPPORT_HDP_MGCG            (1 << 16)
+ /* PG flags */
+ #define RADEON_PG_SUPPORT_GFX_CG              (1 << 0)
+ #define RADEON_PG_SUPPORT_GFX_SMG             (1 << 1)
+ #define RADEON_PG_SUPPORT_GFX_DMG             (1 << 2)
+ #define RADEON_PG_SUPPORT_UVD                 (1 << 3)
+ #define RADEON_PG_SUPPORT_VCE                 (1 << 4)
+ #define RADEON_PG_SUPPORT_CP                  (1 << 5)
+ #define RADEON_PG_SUPPORT_GDS                 (1 << 6)
+ #define RADEON_PG_SUPPORT_RLC_SMU_HS          (1 << 7)
+ #define RADEON_PG_SUPPORT_SDMA                        (1 << 8)
+ #define RADEON_PG_SUPPORT_ACP                 (1 << 9)
+ #define RADEON_PG_SUPPORT_SAMU                        (1 << 10)
  /* max cursor sizes (in pixels) */
  #define CURSOR_WIDTH 64
  #define CURSOR_HEIGHT 64
@@@ -238,6 -279,12 +279,12 @@@ int radeon_atom_get_max_vddc(struct rad
  int radeon_atom_get_leakage_vddc_based_on_leakage_idx(struct radeon_device *rdev,
                                                      u16 *voltage,
                                                      u16 leakage_idx);
+ int radeon_atom_get_leakage_id_from_vbios(struct radeon_device *rdev,
+                                         u16 *leakage_id);
+ int radeon_atom_get_leakage_vddc_based_on_leakage_params(struct radeon_device *rdev,
+                                                        u16 *vddc, u16 *vddci,
+                                                        u16 virtual_voltage_id,
+                                                        u16 vbios_voltage_id);
  int radeon_atom_round_to_true_voltage(struct radeon_device *rdev,
                                      u8 voltage_type,
                                      u16 nominal_voltage,
@@@ -492,6 -539,9 +539,6 @@@ int radeon_mode_dumb_create(struct drm_
  int radeon_mode_dumb_mmap(struct drm_file *filp,
                          struct drm_device *dev,
                          uint32_t handle, uint64_t *offset_p);
 -int radeon_mode_dumb_destroy(struct drm_file *file_priv,
 -                           struct drm_device *dev,
 -                           uint32_t handle);
  
  /*
   * Semaphores.
@@@ -679,7 -729,7 +726,7 @@@ union radeon_irq_stat_regs 
  
  #define RADEON_MAX_HPD_PINS 6
  #define RADEON_MAX_CRTCS 6
- #define RADEON_MAX_AFMT_BLOCKS 6
+ #define RADEON_MAX_AFMT_BLOCKS 7
  
  struct radeon_irq {
        bool                            installed;
@@@ -743,8 -793,6 +790,6 @@@ struct radeon_ring 
        uint32_t                align_mask;
        uint32_t                ptr_mask;
        bool                    ready;
-       u32                     ptr_reg_shift;
-       u32                     ptr_reg_mask;
        u32                     nop;
        u32                     idx;
        u64                     last_semaphore_signal_addr;
@@@ -841,35 -889,6 +886,6 @@@ struct r600_ih 
        bool                    enabled;
  };
  
- struct r600_blit_cp_primitives {
-       void (*set_render_target)(struct radeon_device *rdev, int format,
-                                 int w, int h, u64 gpu_addr);
-       void (*cp_set_surface_sync)(struct radeon_device *rdev,
-                                   u32 sync_type, u32 size,
-                                   u64 mc_addr);
-       void (*set_shaders)(struct radeon_device *rdev);
-       void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr);
-       void (*set_tex_resource)(struct radeon_device *rdev,
-                                int format, int w, int h, int pitch,
-                                u64 gpu_addr, u32 size);
-       void (*set_scissors)(struct radeon_device *rdev, int x1, int y1,
-                            int x2, int y2);
-       void (*draw_auto)(struct radeon_device *rdev);
-       void (*set_default_state)(struct radeon_device *rdev);
- };
- struct r600_blit {
-       struct radeon_bo        *shader_obj;
-       struct r600_blit_cp_primitives primitives;
-       int max_dim;
-       int ring_size_common;
-       int ring_size_per_loop;
-       u64 shader_gpu_addr;
-       u32 vs_offset, ps_offset;
-       u32 state_offset;
-       u32 state_len;
- };
  /*
   * RLC stuff
   */
@@@ -880,13 -899,19 +896,19 @@@ struct radeon_rlc 
        struct radeon_bo        *save_restore_obj;
        uint64_t                save_restore_gpu_addr;
        volatile uint32_t       *sr_ptr;
-       u32                     *reg_list;
+       const u32               *reg_list;
        u32                     reg_list_size;
        /* for clear state */
        struct radeon_bo        *clear_state_obj;
        uint64_t                clear_state_gpu_addr;
        volatile uint32_t       *cs_ptr;
-       struct cs_section_def   *cs_data;
+       const struct cs_section_def   *cs_data;
+       u32                     clear_state_size;
+       /* for cp tables */
+       struct radeon_bo        *cp_table_obj;
+       uint64_t                cp_table_gpu_addr;
+       volatile uint32_t       *cp_table_ptr;
+       u32                     cp_table_size;
  };
  
  int radeon_ib_get(struct radeon_device *rdev, int ring,
@@@ -918,8 -943,7 +940,7 @@@ unsigned radeon_ring_backup(struct rade
  int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
                        unsigned size, uint32_t *data);
  int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size,
-                    unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
-                    u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop);
+                    unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 nop);
  void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
  
  
@@@ -1033,7 -1057,6 +1054,6 @@@ struct radeon_wb 
  #define R600_WB_DMA_RPTR_OFFSET   1792
  #define R600_WB_IH_WPTR_OFFSET   2048
  #define CAYMAN_WB_DMA1_RPTR_OFFSET   2304
- #define R600_WB_UVD_RPTR_OFFSET  2560
  #define R600_WB_EVENT_OFFSET     3072
  #define CIK_WB_CP1_WPTR_OFFSET     3328
  #define CIK_WB_CP2_WPTR_OFFSET     3584
@@@ -1144,6 -1167,7 +1164,7 @@@ enum radeon_int_thermal_type 
        THERMAL_TYPE_SI,
        THERMAL_TYPE_EMC2103_WITH_INTERNAL,
        THERMAL_TYPE_CI,
+       THERMAL_TYPE_KV,
  };
  
  struct radeon_voltage {
@@@ -1217,6 -1241,9 +1238,9 @@@ struct radeon_ps 
        /* UVD clocks */
        u32 vclk;
        u32 dclk;
+       /* VCE clocks */
+       u32 evclk;
+       u32 ecclk;
        /* asic priv */
        void *ps_priv;
  };
@@@ -1267,14 -1294,21 +1291,21 @@@ struct radeon_clock_voltage_dependency_
        struct radeon_clock_voltage_dependency_entry *entries;
  };
  
- struct radeon_cac_leakage_entry {
-       u16 vddc;
-       u32 leakage;
+ union radeon_cac_leakage_entry {
+       struct {
+               u16 vddc;
+               u32 leakage;
+       };
+       struct {
+               u16 vddc1;
+               u16 vddc2;
+               u16 vddc3;
+       };
  };
  
  struct radeon_cac_leakage_table {
        u32 count;
-       struct radeon_cac_leakage_entry *entries;
+       union radeon_cac_leakage_entry *entries;
  };
  
  struct radeon_phase_shedding_limits_entry {
@@@ -1288,6 -1322,28 +1319,28 @@@ struct radeon_phase_shedding_limits_tab
        struct radeon_phase_shedding_limits_entry *entries;
  };
  
+ struct radeon_uvd_clock_voltage_dependency_entry {
+       u32 vclk;
+       u32 dclk;
+       u16 v;
+ };
+ struct radeon_uvd_clock_voltage_dependency_table {
+       u8 count;
+       struct radeon_uvd_clock_voltage_dependency_entry *entries;
+ };
+ struct radeon_vce_clock_voltage_dependency_entry {
+       u32 ecclk;
+       u32 evclk;
+       u16 v;
+ };
+ struct radeon_vce_clock_voltage_dependency_table {
+       u8 count;
+       struct radeon_vce_clock_voltage_dependency_entry *entries;
+ };
  struct radeon_ppm_table {
        u8 ppm_design;
        u16 cpu_core_number;
        u32 tj_max;
  };
  
+ struct radeon_cac_tdp_table {
+       u16 tdp;
+       u16 configurable_tdp;
+       u16 tdc;
+       u16 battery_power_limit;
+       u16 small_power_limit;
+       u16 low_cac_leakage;
+       u16 high_cac_leakage;
+       u16 maximum_power_delivery_limit;
+ };
  struct radeon_dpm_dynamic_state {
        struct radeon_clock_voltage_dependency_table vddc_dependency_on_sclk;
        struct radeon_clock_voltage_dependency_table vddci_dependency_on_mclk;
        struct radeon_clock_voltage_dependency_table vddc_dependency_on_mclk;
+       struct radeon_clock_voltage_dependency_table mvdd_dependency_on_mclk;
        struct radeon_clock_voltage_dependency_table vddc_dependency_on_dispclk;
+       struct radeon_uvd_clock_voltage_dependency_table uvd_clock_voltage_dependency_table;
+       struct radeon_vce_clock_voltage_dependency_table vce_clock_voltage_dependency_table;
+       struct radeon_clock_voltage_dependency_table samu_clock_voltage_dependency_table;
+       struct radeon_clock_voltage_dependency_table acp_clock_voltage_dependency_table;
        struct radeon_clock_array valid_sclk_values;
        struct radeon_clock_array valid_mclk_values;
        struct radeon_clock_and_voltage_limits max_clock_voltage_on_dc;
        struct radeon_cac_leakage_table cac_leakage_table;
        struct radeon_phase_shedding_limits_table phase_shedding_limits_table;
        struct radeon_ppm_table *ppm_table;
+       struct radeon_cac_tdp_table *cac_tdp_table;
  };
  
  struct radeon_dpm_fan {
@@@ -1386,11 -1459,12 +1456,12 @@@ struct radeon_dpm 
        struct radeon_dpm_thermal thermal;
        /* forced levels */
        enum radeon_dpm_forced_level forced_level;
+       /* track UVD streams */
+       unsigned sd;
+       unsigned hd;
  };
  
- void radeon_dpm_enable_power_state(struct radeon_device *rdev,
-                                   enum radeon_pm_state_type dpm_state);
+ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable);
  
  struct radeon_pm {
        struct mutex            mutex;
@@@ -1465,9 -1539,9 +1536,9 @@@ struct radeon_uvd 
        void                    *cpu_addr;
        uint64_t                gpu_addr;
        void                    *saved_bo;
-       unsigned                fw_size;
        atomic_t                handles[RADEON_MAX_UVD_HANDLES];
        struct drm_file         *filp[RADEON_MAX_UVD_HANDLES];
+       unsigned                img_size[RADEON_MAX_UVD_HANDLES];
        struct delayed_work     idle_work;
  };
  
@@@ -1496,12 -1570,21 +1567,21 @@@ int radeon_uvd_calc_upll_dividers(struc
  int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
                                  unsigned cg_upll_func_cntl);
  
- struct r600_audio {
+ struct r600_audio_pin {
        int                     channels;
        int                     rate;
        int                     bits_per_sample;
        u8                      status_bits;
        u8                      category_code;
+       u32                     offset;
+       bool                    connected;
+       u32                     id;
+ };
+ struct r600_audio {
+       bool enabled;
+       struct r600_audio_pin pin[RADEON_MAX_AFMT_BLOCKS];
+       int num_pins;
  };
  
  /*
@@@ -1533,6 -1616,34 +1613,34 @@@ int radeon_debugfs_add_files(struct rad
                             unsigned nfiles);
  int radeon_debugfs_fence_init(struct radeon_device *rdev);
  
+ /*
+  * ASIC ring specific functions.
+  */
+ struct radeon_asic_ring {
+       /* ring read/write ptr handling */
+       u32 (*get_rptr)(struct radeon_device *rdev, struct radeon_ring *ring);
+       u32 (*get_wptr)(struct radeon_device *rdev, struct radeon_ring *ring);
+       void (*set_wptr)(struct radeon_device *rdev, struct radeon_ring *ring);
+       /* validating and patching of IBs */
+       int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib);
+       int (*cs_parse)(struct radeon_cs_parser *p);
+       /* command emmit functions */
+       void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
+       void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
+       void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
+                              struct radeon_semaphore *semaphore, bool emit_wait);
+       void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+       /* testing functions */
+       int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
+       int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp);
+       bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
+       /* deprecated */
+       void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp);
+ };
  
  /*
   * ASIC specific functions.
@@@ -1576,23 -1687,7 +1684,7 @@@ struct radeon_asic 
                                 uint32_t incr, uint32_t flags);
        } vm;
        /* ring specific callbacks */
-       struct {
-               void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
-               int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib);
-               void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
-               void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
-                                      struct radeon_semaphore *semaphore, bool emit_wait);
-               int (*cs_parse)(struct radeon_cs_parser *p);
-               void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp);
-               int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
-               int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp);
-               bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
-               void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
-               u32 (*get_rptr)(struct radeon_device *rdev, struct radeon_ring *ring);
-               u32 (*get_wptr)(struct radeon_device *rdev, struct radeon_ring *ring);
-               void (*set_wptr)(struct radeon_device *rdev, struct radeon_ring *ring);
-       } ring[RADEON_NUM_RINGS];
+       struct radeon_asic_ring *ring[RADEON_NUM_RINGS];
        /* irqs */
        struct {
                int (*set)(struct radeon_device *rdev);
                void (*debugfs_print_current_performance_level)(struct radeon_device *rdev, struct seq_file *m);
                int (*force_performance_level)(struct radeon_device *rdev, enum radeon_dpm_forced_level level);
                bool (*vblank_too_short)(struct radeon_device *rdev);
+               void (*powergate_uvd)(struct radeon_device *rdev, bool gate);
        } dpm;
        /* pageflipping */
        struct {
@@@ -2063,7 -2159,7 +2156,7 @@@ struct radeon_device 
        const struct firmware *mec_fw;  /* CIK MEC firmware */
        const struct firmware *sdma_fw; /* CIK SDMA firmware */
        const struct firmware *smc_fw;  /* SMC firmware */
-       struct r600_blit r600_blit;
+       const struct firmware *uvd_fw;  /* UVD firmware */
        struct r600_vram_scratch vram_scratch;
        int msi_enabled; /* msi enabled */
        struct r600_ih ih; /* r6/700 interrupt ring */
        struct work_struct reset_work;
        int num_crtc; /* number of crtcs */
        struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
-       bool audio_enabled;
        bool has_uvd;
-       struct r600_audio audio_status; /* audio stuff */
+       struct r600_audio audio; /* audio stuff */
        struct notifier_block acpi_nb;
        /* only one userspace can use Hyperz features or CMASK at a time */
        struct drm_file *hyperz_filp;
        /* ACPI interface */
        struct radeon_atif              atif;
        struct radeon_atcs              atcs;
+       /* srbm instance registers */
+       struct mutex                    srbm_mutex;
+       /* clock, powergating flags */
+       u32 cg_flags;
+       u32 pg_flags;
  };
  
  int radeon_device_init(struct radeon_device *rdev,
@@@ -2150,6 -2250,8 +2247,8 @@@ void cik_mm_wdoorbell(struct radeon_dev
  #define WREG32_PIF_PHY1(reg, v) eg_pif_phy1_wreg(rdev, (reg), (v))
  #define RREG32_UVD_CTX(reg) r600_uvd_ctx_rreg(rdev, (reg))
  #define WREG32_UVD_CTX(reg, v) r600_uvd_ctx_wreg(rdev, (reg), (v))
+ #define RREG32_DIDT(reg) cik_didt_rreg(rdev, (reg))
+ #define WREG32_DIDT(reg, v) cik_didt_wreg(rdev, (reg), (v))
  #define WREG32_P(reg, val, mask)                              \
        do {                                                    \
                uint32_t tmp_ = RREG32(reg);                    \
                WREG32(reg, tmp_);                              \
        } while (0)
  #define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
- #define WREG32_OR(reg, or) WREG32_P(reg, or, ~or)
+ #define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
  #define WREG32_PLL_P(reg, val, mask)                          \
        do {                                                    \
                uint32_t tmp_ = RREG32_PLL(reg);                \
@@@ -2281,6 -2383,22 +2380,22 @@@ static inline void r600_uvd_ctx_wreg(st
        WREG32(R600_UVD_CTX_DATA, (v));
  }
  
+ static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
+ {
+       u32 r;
+       WREG32(CIK_DIDT_IND_INDEX, (reg));
+       r = RREG32(CIK_DIDT_IND_DATA);
+       return r;
+ }
+ static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+ {
+       WREG32(CIK_DIDT_IND_INDEX, (reg));
+       WREG32(CIK_DIDT_IND_DATA, (v));
+ }
  void r100_pll_errata_after_index(struct radeon_device *rdev);
  
  
@@@ -2376,7 -2494,7 +2491,7 @@@ void radeon_ring_write(struct radeon_ri
  #define radeon_fini(rdev) (rdev)->asic->fini((rdev))
  #define radeon_resume(rdev) (rdev)->asic->resume((rdev))
  #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
- #define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)].cs_parse((p))
+ #define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)]->cs_parse((p))
  #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
  #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
  #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev))
  #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
  #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
  #define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags)))
- #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
- #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp))
- #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp))
- #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib))
- #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib))
- #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)].is_lockup((rdev), (cp))
- #define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)].vm_flush((rdev), (r), (vm))
- #define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx].get_rptr((rdev), (r))
- #define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx].get_wptr((rdev), (r))
- #define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx].set_wptr((rdev), (r))
+ #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_start((rdev), (cp))
+ #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_test((rdev), (cp))
+ #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ib_test((rdev), (cp))
+ #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib))
+ #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib))
+ #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp))
+ #define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)]->vm_flush((rdev), (r), (vm))
+ #define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r))
+ #define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r))
+ #define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r))
  #define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev))
  #define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev))
  #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc))
  #define radeon_get_backlight_level(rdev, e) (rdev)->asic->display.get_backlight_level((e))
  #define radeon_hdmi_enable(rdev, e, b) (rdev)->asic->display.hdmi_enable((e), (b))
  #define radeon_hdmi_setmode(rdev, e, m) (rdev)->asic->display.hdmi_setmode((e), (m))
- #define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)].emit_fence((rdev), (fence))
- #define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)].emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
+ #define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)]->emit_fence((rdev), (fence))
+ #define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)]->emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
  #define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (f))
  #define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (f))
  #define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (f))
  #define radeon_dpm_debugfs_print_current_performance_level(rdev, m) rdev->asic->dpm.debugfs_print_current_performance_level((rdev), (m))
  #define radeon_dpm_force_performance_level(rdev, l) rdev->asic->dpm.force_performance_level((rdev), (l))
  #define radeon_dpm_vblank_too_short(rdev) rdev->asic->dpm.vblank_too_short((rdev))
+ #define radeon_dpm_powergate_uvd(rdev, g) rdev->asic->dpm.powergate_uvd((rdev), (g))
  
  /* Common functions */
  /* AGP */
@@@ -2519,6 -2638,8 +2635,8 @@@ int radeon_vm_bo_rmv(struct radeon_devi
  
  /* audio */
  void r600_audio_update_hdmi(struct work_struct *work);
+ struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev);
+ struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev);
  
  /*
   * R600 vram scratch functions
index 0610ca4fb6a32637288fe130d2b6434a6ebb2a70,e887641ba0033f7905139ada6236345e6fefc396..16cb8792b1e665f9048ec24f8c521eb052115662
@@@ -1003,16 -1003,28 +1003,28 @@@ static void radeon_check_arguments(stru
                radeon_vram_limit = 0;
        }
  
+       if (radeon_gart_size == -1) {
+               /* default to a larger gart size on newer asics */
+               if (rdev->family >= CHIP_RV770)
+                       radeon_gart_size = 1024;
+               else
+                       radeon_gart_size = 512;
+       }
        /* gtt size must be power of two and greater or equal to 32M */
        if (radeon_gart_size < 32) {
-               dev_warn(rdev->dev, "gart size (%d) too small forcing to 512M\n",
+               dev_warn(rdev->dev, "gart size (%d) too small\n",
                                radeon_gart_size);
-               radeon_gart_size = 512;
+               if (rdev->family >= CHIP_RV770)
+                       radeon_gart_size = 1024;
+               else
+                       radeon_gart_size = 512;
        } else if (!radeon_check_pot_argument(radeon_gart_size)) {
                dev_warn(rdev->dev, "gart size (%d) must be a power of 2\n",
                                radeon_gart_size);
-               radeon_gart_size = 512;
+               if (rdev->family >= CHIP_RV770)
+                       radeon_gart_size = 1024;
+               else
+                       radeon_gart_size = 512;
        }
        rdev->mc.gtt_size = (uint64_t)radeon_gart_size << 20;
  
@@@ -1144,7 -1156,7 +1156,7 @@@ int radeon_device_init(struct radeon_de
        rdev->family = flags & RADEON_FAMILY_MASK;
        rdev->is_atom_bios = false;
        rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT;
-       rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+       rdev->mc.gtt_size = 512 * 1024 * 1024;
        rdev->accel_working = false;
        /* set up ring ids */
        for (i = 0; i < RADEON_NUM_RINGS; i++) {
        mutex_init(&rdev->gem.mutex);
        mutex_init(&rdev->pm.mutex);
        mutex_init(&rdev->gpu_clock_mutex);
+       mutex_init(&rdev->srbm_mutex);
        init_rwsem(&rdev->pm.mclk_lock);
        init_rwsem(&rdev->exclusive_lock);
        init_waitqueue_head(&rdev->irq.vblank_queue);
        /* this will fail for cards that aren't VGA class devices, just
         * ignore it */
        vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
 -      vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops);
 +      vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, false);
  
        r = radeon_init(rdev);
        if (r)
@@@ -1519,6 -1532,7 +1532,7 @@@ int radeon_gpu_reset(struct radeon_devi
        radeon_save_bios_scratch_regs(rdev);
        /* block TTM */
        resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
+       radeon_pm_suspend(rdev);
        radeon_suspend(rdev);
  
        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@@ -1564,6 -1578,7 +1578,7 @@@ retry
                }
        }
  
+       radeon_pm_resume(rdev);
        drm_helper_resume_force_mode(rdev->ddev);
  
        ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
index 358bd96c06c58907b7465b54191221a57ff72b45,af9cd6a57efc07c8f4f6ffc819e97411bf012821..b055bddaa94c3d85e4b497721b8d239082c035a4
@@@ -345,8 -345,7 +345,8 @@@ void radeon_crtc_handle_flip(struct rad
  
  static int radeon_crtc_page_flip(struct drm_crtc *crtc,
                                 struct drm_framebuffer *fb,
 -                               struct drm_pending_vblank_event *event)
 +                               struct drm_pending_vblank_event *event,
 +                               uint32_t page_flip_flags)
  {
        struct drm_device *dev = crtc->dev;
        struct radeon_device *rdev = dev->dev_private;
@@@ -1255,41 -1254,41 +1255,41 @@@ static void radeon_afmt_init(struct rad
        for (i = 0; i < RADEON_MAX_AFMT_BLOCKS; i++)
                rdev->mode_info.afmt[i] = NULL;
  
-       if (ASIC_IS_DCE6(rdev)) {
-               /* todo */
+       if (ASIC_IS_NODCE(rdev)) {
+               /* nothing to do */
        } else if (ASIC_IS_DCE4(rdev)) {
+               static uint32_t eg_offsets[] = {
+                       EVERGREEN_CRTC0_REGISTER_OFFSET,
+                       EVERGREEN_CRTC1_REGISTER_OFFSET,
+                       EVERGREEN_CRTC2_REGISTER_OFFSET,
+                       EVERGREEN_CRTC3_REGISTER_OFFSET,
+                       EVERGREEN_CRTC4_REGISTER_OFFSET,
+                       EVERGREEN_CRTC5_REGISTER_OFFSET,
+                       0x13830 - 0x7030,
+               };
+               int num_afmt;
+               /* DCE8 has 7 audio blocks tied to DIG encoders */
+               /* DCE6 has 6 audio blocks tied to DIG encoders */
                /* DCE4/5 has 6 audio blocks tied to DIG encoders */
                /* DCE4.1 has 2 audio blocks tied to DIG encoders */
-               rdev->mode_info.afmt[0] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-               if (rdev->mode_info.afmt[0]) {
-                       rdev->mode_info.afmt[0]->offset = EVERGREEN_CRTC0_REGISTER_OFFSET;
-                       rdev->mode_info.afmt[0]->id = 0;
-               }
-               rdev->mode_info.afmt[1] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-               if (rdev->mode_info.afmt[1]) {
-                       rdev->mode_info.afmt[1]->offset = EVERGREEN_CRTC1_REGISTER_OFFSET;
-                       rdev->mode_info.afmt[1]->id = 1;
-               }
-               if (!ASIC_IS_DCE41(rdev)) {
-                       rdev->mode_info.afmt[2] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-                       if (rdev->mode_info.afmt[2]) {
-                               rdev->mode_info.afmt[2]->offset = EVERGREEN_CRTC2_REGISTER_OFFSET;
-                               rdev->mode_info.afmt[2]->id = 2;
-                       }
-                       rdev->mode_info.afmt[3] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-                       if (rdev->mode_info.afmt[3]) {
-                               rdev->mode_info.afmt[3]->offset = EVERGREEN_CRTC3_REGISTER_OFFSET;
-                               rdev->mode_info.afmt[3]->id = 3;
-                       }
-                       rdev->mode_info.afmt[4] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-                       if (rdev->mode_info.afmt[4]) {
-                               rdev->mode_info.afmt[4]->offset = EVERGREEN_CRTC4_REGISTER_OFFSET;
-                               rdev->mode_info.afmt[4]->id = 4;
-                       }
-                       rdev->mode_info.afmt[5] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-                       if (rdev->mode_info.afmt[5]) {
-                               rdev->mode_info.afmt[5]->offset = EVERGREEN_CRTC5_REGISTER_OFFSET;
-                               rdev->mode_info.afmt[5]->id = 5;
+               if (ASIC_IS_DCE8(rdev))
+                       num_afmt = 7;
+               else if (ASIC_IS_DCE6(rdev))
+                       num_afmt = 6;
+               else if (ASIC_IS_DCE5(rdev))
+                       num_afmt = 6;
+               else if (ASIC_IS_DCE41(rdev))
+                       num_afmt = 2;
+               else /* DCE4 */
+                       num_afmt = 6;
+               BUG_ON(num_afmt > ARRAY_SIZE(eg_offsets));
+               for (i = 0; i < num_afmt; i++) {
+                       rdev->mode_info.afmt[i] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
+                       if (rdev->mode_info.afmt[i]) {
+                               rdev->mode_info.afmt[i]->offset = eg_offsets[i];
+                               rdev->mode_info.afmt[i]->id = i;
                        }
                }
        } else if (ASIC_IS_DCE3(rdev)) {
index 1f93dd50364647cd95f132291d68fcab165dfcbb,62bd176a7289e4660b27253a66126bed342e6d64..6d09258fb9f2d6ecb72a35f7d38cc0fd3f8bb07f
@@@ -81,6 -81,7 +81,6 @@@
  #define KMS_DRIVER_PATCHLEVEL 0
  int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
  int radeon_driver_unload_kms(struct drm_device *dev);
 -int radeon_driver_firstopen_kms(struct drm_device *dev);
  void radeon_driver_lastclose_kms(struct drm_device *dev);
  int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
  void radeon_driver_postclose_kms(struct drm_device *dev,
@@@ -100,6 -101,8 +100,6 @@@ void radeon_driver_irq_preinstall_kms(s
  int radeon_driver_irq_postinstall_kms(struct drm_device *dev);
  void radeon_driver_irq_uninstall_kms(struct drm_device *dev);
  irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS);
 -int radeon_dma_ioctl_kms(struct drm_device *dev, void *data,
 -                       struct drm_file *file_priv);
  int radeon_gem_object_init(struct drm_gem_object *obj);
  void radeon_gem_object_free(struct drm_gem_object *obj);
  int radeon_gem_object_open(struct drm_gem_object *obj,
@@@ -108,7 -111,7 +108,7 @@@ void radeon_gem_object_close(struct drm
                                struct drm_file *file_priv);
  extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc,
                                      int *vpos, int *hpos);
 -extern struct drm_ioctl_desc radeon_ioctls_kms[];
 +extern const struct drm_ioctl_desc radeon_ioctls_kms[];
  extern int radeon_max_kms_ioctl;
  int radeon_mmap(struct file *filp, struct vm_area_struct *vma);
  int radeon_mode_dumb_mmap(struct drm_file *filp,
  int radeon_mode_dumb_create(struct drm_file *file_priv,
                            struct drm_device *dev,
                            struct drm_mode_create_dumb *args);
 -int radeon_mode_dumb_destroy(struct drm_file *file_priv,
 -                           struct drm_device *dev,
 -                           uint32_t handle);
  struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj);
  struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev,
                                                        size_t size,
@@@ -148,7 -154,7 +148,7 @@@ int radeon_dynclks = -1
  int radeon_r4xx_atom = 0;
  int radeon_agpmode = 0;
  int radeon_vram_limit = 0;
- int radeon_gart_size = 512; /* default gart size */
+ int radeon_gart_size = -1; /* auto */
  int radeon_benchmarking = 0;
  int radeon_testing = 0;
  int radeon_connector_table = 0;
@@@ -181,7 -187,7 +181,7 @@@ module_param_named(vramlimit, radeon_vr
  MODULE_PARM_DESC(agpmode, "AGP Mode (-1 == PCI)");
  module_param_named(agpmode, radeon_agpmode, int, 0444);
  
- MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc)");
+ MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc., -1 = auto)");
  module_param_named(gartsize, radeon_gart_size, int, 0600);
  
  MODULE_PARM_DESC(benchmark, "Run benchmark");
@@@ -266,6 -272,7 +266,6 @@@ static const struct file_operations rad
        .unlocked_ioctl = drm_ioctl,
        .mmap = drm_mmap,
        .poll = drm_poll,
 -      .fasync = drm_fasync,
        .read = drm_read,
  #ifdef CONFIG_COMPAT
        .compat_ioctl = radeon_compat_ioctl,
  
  static struct drm_driver driver_old = {
        .driver_features =
 -          DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG |
 +          DRIVER_USE_AGP | DRIVER_PCI_DMA | DRIVER_SG |
            DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED,
        .dev_priv_size = sizeof(drm_radeon_buf_priv_t),
        .load = radeon_driver_load,
@@@ -374,6 -381,7 +374,6 @@@ static const struct file_operations rad
        .unlocked_ioctl = drm_ioctl,
        .mmap = radeon_mmap,
        .poll = drm_poll,
 -      .fasync = drm_fasync,
        .read = drm_read,
  #ifdef CONFIG_COMPAT
        .compat_ioctl = radeon_kms_compat_ioctl,
  
  static struct drm_driver kms_driver = {
        .driver_features =
 -          DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG |
 -          DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED | DRIVER_GEM |
 +          DRIVER_USE_AGP |
 +          DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
            DRIVER_PRIME,
        .dev_priv_size = 0,
        .load = radeon_driver_load_kms,
 -      .firstopen = radeon_driver_firstopen_kms,
        .open = radeon_driver_open_kms,
        .preclose = radeon_driver_preclose_kms,
        .postclose = radeon_driver_postclose_kms,
        .gem_free_object = radeon_gem_object_free,
        .gem_open_object = radeon_gem_object_open,
        .gem_close_object = radeon_gem_object_close,
 -      .dma_ioctl = radeon_dma_ioctl_kms,
        .dumb_create = radeon_mode_dumb_create,
        .dumb_map_offset = radeon_mode_dumb_mmap,
 -      .dumb_destroy = radeon_mode_dumb_destroy,
 +      .dumb_destroy = drm_gem_dumb_destroy,
        .fops = &radeon_driver_kms_fops,
  
        .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
index b46a5616664a5b2a5de691b83d5d316321e23bc4,cc2ca380e0c1729df2e2b6ab164fd2d9ea3b3006..205440d9544b5ed089ebc0b5e92b5e28caaf9e30
@@@ -433,6 -433,9 +433,9 @@@ int radeon_info_ioctl(struct drm_devic
                        return -EINVAL;
                }
                break;
+       case RADEON_INFO_SI_CP_DMA_COMPUTE:
+               *value = 1;
+               break;
        default:
                DRM_DEBUG_KMS("Invalid request %d\n", info->request);
                return -EINVAL;
  /*
   * Outdated mess for old drm with Xorg being in charge (void function now).
   */
 -/**
 - * radeon_driver_firstopen_kms - drm callback for first open
 - *
 - * @dev: drm dev pointer
 - *
 - * Nothing to be done for KMS (all asics).
 - * Returns 0 on success.
 - */
 -int radeon_driver_firstopen_kms(struct drm_device *dev)
 -{
 -      return 0;
 -}
 -
  /**
   * radeon_driver_firstopen_kms - drm callback for last close
   *
@@@ -670,6 -686,16 +673,6 @@@ int radeon_get_vblank_timestamp_kms(str
                                                     drmcrtc);
  }
  
 -/*
 - * IOCTL.
 - */
 -int radeon_dma_ioctl_kms(struct drm_device *dev, void *data,
 -                       struct drm_file *file_priv)
 -{
 -      /* Not valid in KMS. */
 -      return -EINVAL;
 -}
 -
  #define KMS_INVALID_IOCTL(name)                                               \
  int name(struct drm_device *dev, void *data, struct drm_file *file_priv)\
  {                                                                     \
@@@ -709,7 -735,7 +712,7 @@@ KMS_INVALID_IOCTL(radeon_surface_alloc_
  KMS_INVALID_IOCTL(radeon_surface_free_kms)
  
  
 -struct drm_ioctl_desc radeon_ioctls_kms[] = {
 +const struct drm_ioctl_desc radeon_ioctls_kms[] = {
        DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
        DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
        DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
index d71037f4f68ffb05bb8b067c3e7628f5bddd0d6a,fe8bca6869007ea6b4b0c811592578e4bfa553ea..3e23b757dcfa578859d7ff5d96b04a9c7ed85691
@@@ -68,6 -68,8 +68,8 @@@ MODULE_FIRMWARE("radeon/HAINAN_smc.bin"
  
  static void si_pcie_gen3_enable(struct radeon_device *rdev);
  static void si_program_aspm(struct radeon_device *rdev);
+ extern void sumo_rlc_fini(struct radeon_device *rdev);
+ extern int sumo_rlc_init(struct radeon_device *rdev);
  extern int r600_ih_ring_alloc(struct radeon_device *rdev);
  extern void r600_ih_ring_fini(struct radeon_device *rdev);
  extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
@@@ -76,6 -78,11 +78,11 @@@ extern void evergreen_mc_resume(struct 
  extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
  extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
  extern bool evergreen_is_display_hung(struct radeon_device *rdev);
+ extern void si_dma_vm_set_page(struct radeon_device *rdev,
+                              struct radeon_ib *ib,
+                              uint64_t pe,
+                              uint64_t addr, unsigned count,
+                              uint32_t incr, uint32_t flags);
  
  static const u32 verde_rlc_save_restore_register_list[] =
  {
@@@ -1663,9 -1670,13 +1670,13 @@@ static int si_init_microcode(struct rad
  
        snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
        err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
-       if (err)
-               goto out;
-       if (rdev->smc_fw->size != smc_req_size) {
+       if (err) {
+               printk(KERN_ERR
+                      "smc: error loading firmware \"%s\"\n",
+                      fw_name);
+               release_firmware(rdev->smc_fw);
+               rdev->smc_fw = NULL;
+       } else if (rdev->smc_fw->size != smc_req_size) {
                printk(KERN_ERR
                       "si_smc: Bogus length %zu in firmware \"%s\"\n",
                       rdev->smc_fw->size, fw_name);
@@@ -1700,7 -1711,8 +1711,8 @@@ static u32 dce6_line_buffer_adjust(stru
                                   struct drm_display_mode *mode,
                                   struct drm_display_mode *other_mode)
  {
-       u32 tmp;
+       u32 tmp, buffer_alloc, i;
+       u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
        /*
         * Line Buffer Setup
         * There are 3 line buffers, each one shared by 2 display controllers.
         * non-linked crtcs for maximum line buffer allocation.
         */
        if (radeon_crtc->base.enabled && mode) {
-               if (other_mode)
+               if (other_mode) {
                        tmp = 0; /* 1/2 */
-               else
+                       buffer_alloc = 1;
+               } else {
                        tmp = 2; /* whole */
-       } else
+                       buffer_alloc = 2;
+               }
+       } else {
                tmp = 0;
+               buffer_alloc = 0;
+       }
  
        WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
               DC_LB_MEMORY_CONFIG(tmp));
  
+       WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+              DMIF_BUFFERS_ALLOCATED(buffer_alloc));
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+                   DMIF_BUFFERS_ALLOCATED_COMPLETED)
+                       break;
+               udelay(1);
+       }
        if (radeon_crtc->base.enabled && mode) {
                switch (tmp) {
                case 0:
@@@ -3360,17 -3386,6 +3386,6 @@@ static int si_cp_resume(struct radeon_d
        u32 rb_bufsz;
        int r;
  
-       /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
-       WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
-                                SOFT_RESET_PA |
-                                SOFT_RESET_VGT |
-                                SOFT_RESET_SPI |
-                                SOFT_RESET_SX));
-       RREG32(GRBM_SOFT_RESET);
-       mdelay(15);
-       WREG32(GRBM_SOFT_RESET, 0);
-       RREG32(GRBM_SOFT_RESET);
        WREG32(CP_SEM_WAIT_TIMER, 0x0);
        WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
  
        /* ring 0 - compute and gfx */
        /* Set ring buffer size */
        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 -      rb_bufsz = drm_order(ring->ring_size / 8);
 -      tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 +      rb_bufsz = order_base_2(ring->ring_size / 8);
 +      tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
  #ifdef __BIG_ENDIAN
        tmp |= BUF_SWAP_32BIT;
  #endif
        /* ring1  - compute only */
        /* Set ring buffer size */
        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
 -      rb_bufsz = drm_order(ring->ring_size / 8);
 -      tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 +      rb_bufsz = order_base_2(ring->ring_size / 8);
 +      tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
  #ifdef __BIG_ENDIAN
        tmp |= BUF_SWAP_32BIT;
  #endif
        /* ring2 - compute only */
        /* Set ring buffer size */
        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
 -      rb_bufsz = drm_order(ring->ring_size / 8);
 -      tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 +      rb_bufsz = order_base_2(ring->ring_size / 8);
 +      tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
  #ifdef __BIG_ENDIAN
        tmp |= BUF_SWAP_32BIT;
  #endif
        return 0;
  }
  
static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
+ u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
  {
        u32 reset_mask = 0;
        u32 tmp;
@@@ -3738,34 -3753,6 +3753,6 @@@ bool si_gfx_is_lockup(struct radeon_dev
        return radeon_ring_test_lockup(rdev, ring);
  }
  
- /**
-  * si_dma_is_lockup - Check if the DMA engine is locked up
-  *
-  * @rdev: radeon_device pointer
-  * @ring: radeon_ring structure holding ring information
-  *
-  * Check if the async DMA engine is locked up.
-  * Returns true if the engine appears to be locked up, false if not.
-  */
- bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
- {
-       u32 reset_mask = si_gpu_check_soft_reset(rdev);
-       u32 mask;
-       if (ring->idx == R600_RING_TYPE_DMA_INDEX)
-               mask = RADEON_RESET_DMA;
-       else
-               mask = RADEON_RESET_DMA1;
-       if (!(reset_mask & mask)) {
-               radeon_ring_lockup_update(ring);
-               return false;
-       }
-       /* force ring activities */
-       radeon_ring_force_activity(rdev, ring);
-       return radeon_ring_test_lockup(rdev, ring);
- }
  /* MC */
  static void si_mc_program(struct radeon_device *rdev)
  {
@@@ -4079,13 -4066,64 +4066,64 @@@ static int si_vm_packet3_ce_check(struc
        return 0;
  }
  
+ static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
+ {
+       u32 start_reg, reg, i;
+       u32 command = ib[idx + 4];
+       u32 info = ib[idx + 1];
+       u32 idx_value = ib[idx];
+       if (command & PACKET3_CP_DMA_CMD_SAS) {
+               /* src address space is register */
+               if (((info & 0x60000000) >> 29) == 0) {
+                       start_reg = idx_value << 2;
+                       if (command & PACKET3_CP_DMA_CMD_SAIC) {
+                               reg = start_reg;
+                               if (!si_vm_reg_valid(reg)) {
+                                       DRM_ERROR("CP DMA Bad SRC register\n");
+                                       return -EINVAL;
+                               }
+                       } else {
+                               for (i = 0; i < (command & 0x1fffff); i++) {
+                                       reg = start_reg + (4 * i);
+                                       if (!si_vm_reg_valid(reg)) {
+                                               DRM_ERROR("CP DMA Bad SRC register\n");
+                                               return -EINVAL;
+                                       }
+                               }
+                       }
+               }
+       }
+       if (command & PACKET3_CP_DMA_CMD_DAS) {
+               /* dst address space is register */
+               if (((info & 0x00300000) >> 20) == 0) {
+                       start_reg = ib[idx + 2];
+                       if (command & PACKET3_CP_DMA_CMD_DAIC) {
+                               reg = start_reg;
+                               if (!si_vm_reg_valid(reg)) {
+                                       DRM_ERROR("CP DMA Bad DST register\n");
+                                       return -EINVAL;
+                               }
+                       } else {
+                               for (i = 0; i < (command & 0x1fffff); i++) {
+                                       reg = start_reg + (4 * i);
+                               if (!si_vm_reg_valid(reg)) {
+                                               DRM_ERROR("CP DMA Bad DST register\n");
+                                               return -EINVAL;
+                                       }
+                               }
+                       }
+               }
+       }
+       return 0;
+ }
  static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
                                   u32 *ib, struct radeon_cs_packet *pkt)
  {
+       int r;
        u32 idx = pkt->idx + 1;
        u32 idx_value = ib[idx];
        u32 start_reg, end_reg, reg, i;
-       u32 command, info;
  
        switch (pkt->opcode) {
        case PACKET3_NOP:
                }
                break;
        case PACKET3_CP_DMA:
-               command = ib[idx + 4];
-               info = ib[idx + 1];
-               if (command & PACKET3_CP_DMA_CMD_SAS) {
-                       /* src address space is register */
-                       if (((info & 0x60000000) >> 29) == 0) {
-                               start_reg = idx_value << 2;
-                               if (command & PACKET3_CP_DMA_CMD_SAIC) {
-                                       reg = start_reg;
-                                       if (!si_vm_reg_valid(reg)) {
-                                               DRM_ERROR("CP DMA Bad SRC register\n");
-                                               return -EINVAL;
-                                       }
-                               } else {
-                                       for (i = 0; i < (command & 0x1fffff); i++) {
-                                               reg = start_reg + (4 * i);
-                                               if (!si_vm_reg_valid(reg)) {
-                                                       DRM_ERROR("CP DMA Bad SRC register\n");
-                                                       return -EINVAL;
-                                               }
-                                       }
-                               }
-                       }
-               }
-               if (command & PACKET3_CP_DMA_CMD_DAS) {
-                       /* dst address space is register */
-                       if (((info & 0x00300000) >> 20) == 0) {
-                               start_reg = ib[idx + 2];
-                               if (command & PACKET3_CP_DMA_CMD_DAIC) {
-                                       reg = start_reg;
-                                       if (!si_vm_reg_valid(reg)) {
-                                               DRM_ERROR("CP DMA Bad DST register\n");
-                                               return -EINVAL;
-                                       }
-                               } else {
-                                       for (i = 0; i < (command & 0x1fffff); i++) {
-                                               reg = start_reg + (4 * i);
-                                               if (!si_vm_reg_valid(reg)) {
-                                                       DRM_ERROR("CP DMA Bad DST register\n");
-                                                       return -EINVAL;
-                                               }
-                                       }
-                               }
-                       }
-               }
+               r = si_vm_packet3_cp_dma_check(ib, idx);
+               if (r)
+                       return r;
                break;
        default:
                DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
  static int si_vm_packet3_compute_check(struct radeon_device *rdev,
                                       u32 *ib, struct radeon_cs_packet *pkt)
  {
+       int r;
        u32 idx = pkt->idx + 1;
        u32 idx_value = ib[idx];
        u32 start_reg, reg, i;
                                return -EINVAL;
                }
                break;
+       case PACKET3_CP_DMA:
+               r = si_vm_packet3_cp_dma_check(ib, idx);
+               if (r)
+                       return r;
+               break;
        default:
                DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
                return -EINVAL;
@@@ -4704,58 -4707,7 +4707,7 @@@ void si_vm_set_page(struct radeon_devic
                }
        } else {
                /* DMA */
-               if (flags & RADEON_VM_PAGE_SYSTEM) {
-                       while (count) {
-                               ndw = count * 2;
-                               if (ndw > 0xFFFFE)
-                                       ndw = 0xFFFFE;
-                               /* for non-physically contiguous pages (system) */
-                               ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
-                               ib->ptr[ib->length_dw++] = pe;
-                               ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-                               for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-                                       if (flags & RADEON_VM_PAGE_SYSTEM) {
-                                               value = radeon_vm_map_gart(rdev, addr);
-                                               value &= 0xFFFFFFFFFFFFF000ULL;
-                                       } else if (flags & RADEON_VM_PAGE_VALID) {
-                                               value = addr;
-                                       } else {
-                                               value = 0;
-                                       }
-                                       addr += incr;
-                                       value |= r600_flags;
-                                       ib->ptr[ib->length_dw++] = value;
-                                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
-                               }
-                       }
-               } else {
-                       while (count) {
-                               ndw = count * 2;
-                               if (ndw > 0xFFFFE)
-                                       ndw = 0xFFFFE;
-                               if (flags & RADEON_VM_PAGE_VALID)
-                                       value = addr;
-                               else
-                                       value = 0;
-                               /* for physically contiguous pages (vram) */
-                               ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
-                               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-                               ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-                               ib->ptr[ib->length_dw++] = r600_flags; /* mask */
-                               ib->ptr[ib->length_dw++] = 0;
-                               ib->ptr[ib->length_dw++] = value; /* value */
-                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-                               ib->ptr[ib->length_dw++] = incr; /* increment size */
-                               ib->ptr[ib->length_dw++] = 0;
-                               pe += ndw * 4;
-                               addr += (ndw / 2) * incr;
-                               count -= ndw / 2;
-                       }
-               }
-               while (ib->length_dw & 0x7)
-                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
+               si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
        }
  }
  
@@@ -4802,32 -4754,6 +4754,6 @@@ void si_vm_flush(struct radeon_device *
        radeon_ring_write(ring, 0x0);
  }
  
- void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
- {
-       struct radeon_ring *ring = &rdev->ring[ridx];
-       if (vm == NULL)
-               return;
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
-       if (vm->id < 8) {
-               radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
-       } else {
-               radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
-       }
-       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
-       /* flush hdp cache */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
-       radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
-       radeon_ring_write(ring, 1);
-       /* bits 0-7 are the VM contexts0-7 */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
-       radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
-       radeon_ring_write(ring, 1 << vm->id);
- }
  /*
   *  Power and clock gating
   */
@@@ -4895,7 -4821,7 +4821,7 @@@ static void si_set_uvd_dcm(struct radeo
        WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
  }
  
static void si_init_uvd_internal_cg(struct radeon_device *rdev)
+ void si_init_uvd_internal_cg(struct radeon_device *rdev)
  {
        bool hw_mode = true;
  
@@@ -4938,7 -4864,7 +4864,7 @@@ static void si_enable_dma_pg(struct rad
        u32 data, orig;
  
        orig = data = RREG32(DMA_PG);
-       if (enable)
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
                data |= PG_CNTL_ENABLE;
        else
                data &= ~PG_CNTL_ENABLE;
@@@ -4962,7 -4888,7 +4888,7 @@@ static void si_enable_gfx_cgpg(struct r
  {
        u32 tmp;
  
-       if (enable) {
+       if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
                tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
                WREG32(RLC_TTOP_D, tmp);
  
@@@ -5065,9 -4991,9 +4991,9 @@@ static void si_enable_cgcg(struct radeo
  
        orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
  
-       si_enable_gui_idle_interrupt(rdev, enable);
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
+               si_enable_gui_idle_interrupt(rdev, true);
  
-       if (enable) {
                WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
  
                tmp = si_halt_rlc(rdev);
  
                data |= CGCG_EN | CGLS_EN;
        } else {
+               si_enable_gui_idle_interrupt(rdev, false);
                RREG32(CB_CGTT_SCLK_CTRL);
                RREG32(CB_CGTT_SCLK_CTRL);
                RREG32(CB_CGTT_SCLK_CTRL);
@@@ -5101,16 -5029,18 +5029,18 @@@ static void si_enable_mgcg(struct radeo
  {
        u32 data, orig, tmp = 0;
  
-       if (enable) {
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
                orig = data = RREG32(CGTS_SM_CTRL_REG);
                data = 0x96940200;
                if (orig != data)
                        WREG32(CGTS_SM_CTRL_REG, data);
  
-               orig = data = RREG32(CP_MEM_SLP_CNTL);
-               data |= CP_MEM_LS_EN;
-               if (orig != data)
-                       WREG32(CP_MEM_SLP_CNTL, data);
+               if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
+                       orig = data = RREG32(CP_MEM_SLP_CNTL);
+                       data |= CP_MEM_LS_EN;
+                       if (orig != data)
+                               WREG32(CP_MEM_SLP_CNTL, data);
+               }
  
                orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
                data &= 0xffffffc0;
@@@ -5155,7 -5085,7 +5085,7 @@@ static void si_enable_uvd_mgcg(struct r
  {
        u32 orig, data, tmp;
  
-       if (enable) {
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
                tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
                tmp |= 0x3fff;
                WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
@@@ -5203,7 -5133,7 +5133,7 @@@ static void si_enable_mc_ls(struct rade
  
        for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
                orig = data = RREG32(mc_cg_registers[i]);
-               if (enable)
+               if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
                        data |= MC_LS_ENABLE;
                else
                        data &= ~MC_LS_ENABLE;
        }
  }
  
static void si_init_cg(struct radeon_device *rdev)
+ static void si_enable_mc_mgcg(struct radeon_device *rdev,
                             bool enable)
  {
-       bool has_uvd = true;
-       si_enable_mgcg(rdev, true);
-       si_enable_cgcg(rdev, true);
-       /* disable MC LS on Tahiti */
-       if (rdev->family == CHIP_TAHITI)
-               si_enable_mc_ls(rdev, false);
-       if (has_uvd) {
-               si_enable_uvd_mgcg(rdev, true);
-               si_init_uvd_internal_cg(rdev);
+       int i;
+       u32 orig, data;
+       for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+               orig = data = RREG32(mc_cg_registers[i]);
+               if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
+                       data |= MC_CG_ENABLE;
+               else
+                       data &= ~MC_CG_ENABLE;
+               if (data != orig)
+                       WREG32(mc_cg_registers[i], data);
        }
  }
  
- static void si_fini_cg(struct radeon_device *rdev)
+ static void si_enable_dma_mgcg(struct radeon_device *rdev,
+                              bool enable)
  {
-       bool has_uvd = true;
+       u32 orig, data, offset;
+       int i;
  
-       if (has_uvd)
-               si_enable_uvd_mgcg(rdev, false);
-       si_enable_cgcg(rdev, false);
-       si_enable_mgcg(rdev, false);
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
+               for (i = 0; i < 2; i++) {
+                       if (i == 0)
+                               offset = DMA0_REGISTER_OFFSET;
+                       else
+                               offset = DMA1_REGISTER_OFFSET;
+                       orig = data = RREG32(DMA_POWER_CNTL + offset);
+                       data &= ~MEM_POWER_OVERRIDE;
+                       if (data != orig)
+                               WREG32(DMA_POWER_CNTL + offset, data);
+                       WREG32(DMA_CLK_CTRL + offset, 0x00000100);
+               }
+       } else {
+               for (i = 0; i < 2; i++) {
+                       if (i == 0)
+                               offset = DMA0_REGISTER_OFFSET;
+                       else
+                               offset = DMA1_REGISTER_OFFSET;
+                       orig = data = RREG32(DMA_POWER_CNTL + offset);
+                       data |= MEM_POWER_OVERRIDE;
+                       if (data != orig)
+                               WREG32(DMA_POWER_CNTL + offset, data);
+                       orig = data = RREG32(DMA_CLK_CTRL + offset);
+                       data = 0xff000000;
+                       if (data != orig)
+                               WREG32(DMA_CLK_CTRL + offset, data);
+               }
+       }
  }
  
- static void si_init_pg(struct radeon_device *rdev)
+ static void si_enable_bif_mgls(struct radeon_device *rdev,
+                              bool enable)
  {
-       bool has_pg = false;
+       u32 orig, data;
  
-       /* only cape verde supports PG */
-       if (rdev->family == CHIP_VERDE)
-               has_pg = true;
+       orig = data = RREG32_PCIE(PCIE_CNTL2);
  
-       if (has_pg) {
-               si_init_ao_cu_mask(rdev);
-               si_init_dma_pg(rdev);
-               si_enable_dma_pg(rdev, true);
-               si_init_gfx_cgpg(rdev);
-               si_enable_gfx_cgpg(rdev, true);
-       } else {
-               WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
-               WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
-       }
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
+               data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
+                       REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
+       else
+               data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
+                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
+       if (orig != data)
+               WREG32_PCIE(PCIE_CNTL2, data);
  }
  
- static void si_fini_pg(struct radeon_device *rdev)
+ static void si_enable_hdp_mgcg(struct radeon_device *rdev,
+                              bool enable)
  {
-       bool has_pg = false;
+       u32 orig, data;
  
-       /* only cape verde supports PG */
-       if (rdev->family == CHIP_VERDE)
-               has_pg = true;
+       orig = data = RREG32(HDP_HOST_PATH_CNTL);
  
-       if (has_pg) {
-               si_enable_dma_pg(rdev, false);
-               si_enable_gfx_cgpg(rdev, false);
-       }
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
+               data &= ~CLOCK_GATING_DIS;
+       else
+               data |= CLOCK_GATING_DIS;
+       if (orig != data)
+               WREG32(HDP_HOST_PATH_CNTL, data);
  }
  
- /*
-  * RLC
-  */
- void si_rlc_fini(struct radeon_device *rdev)
+ static void si_enable_hdp_ls(struct radeon_device *rdev,
+                            bool enable)
  {
-       int r;
+       u32 orig, data;
  
-       /* save restore block */
-       if (rdev->rlc.save_restore_obj) {
-               r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
-               if (unlikely(r != 0))
-                       dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
-               radeon_bo_unpin(rdev->rlc.save_restore_obj);
-               radeon_bo_unreserve(rdev->rlc.save_restore_obj);
+       orig = data = RREG32(HDP_MEM_POWER_LS);
  
-               radeon_bo_unref(&rdev->rlc.save_restore_obj);
-               rdev->rlc.save_restore_obj = NULL;
-       }
-       /* clear state block */
-       if (rdev->rlc.clear_state_obj) {
-               r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
-               if (unlikely(r != 0))
-                       dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
-               radeon_bo_unpin(rdev->rlc.clear_state_obj);
-               radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+       if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
+               data |= HDP_LS_ENABLE;
+       else
+               data &= ~HDP_LS_ENABLE;
  
-               radeon_bo_unref(&rdev->rlc.clear_state_obj);
-               rdev->rlc.clear_state_obj = NULL;
-       }
+       if (orig != data)
+               WREG32(HDP_MEM_POWER_LS, data);
  }
  
- #define RLC_CLEAR_STATE_END_MARKER          0x00000001
- int si_rlc_init(struct radeon_device *rdev)
+ void si_update_cg(struct radeon_device *rdev,
+                 u32 block, bool enable)
  {
-       volatile u32 *dst_ptr;
-       u32 dws, data, i, j, k, reg_num;
-       u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
-       u64 reg_list_mc_addr;
-       const struct cs_section_def *cs_data = si_cs_data;
-       int r;
-       /* save restore block */
-       if (rdev->rlc.save_restore_obj == NULL) {
-               r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
-                                    RADEON_GEM_DOMAIN_VRAM, NULL,
-                                    &rdev->rlc.save_restore_obj);
-               if (r) {
-                       dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
-                       return r;
+       if (block & RADEON_CG_BLOCK_GFX) {
+               /* order matters! */
+               if (enable) {
+                       si_enable_mgcg(rdev, true);
+                       si_enable_cgcg(rdev, true);
+               } else {
+                       si_enable_cgcg(rdev, false);
+                       si_enable_mgcg(rdev, false);
                }
        }
  
-       r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
-       if (unlikely(r != 0)) {
-               si_rlc_fini(rdev);
-               return r;
+       if (block & RADEON_CG_BLOCK_MC) {
+               si_enable_mc_mgcg(rdev, enable);
+               si_enable_mc_ls(rdev, enable);
        }
-       r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
-                         &rdev->rlc.save_restore_gpu_addr);
-       if (r) {
-               radeon_bo_unreserve(rdev->rlc.save_restore_obj);
-               dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
-               si_rlc_fini(rdev);
-               return r;
+       if (block & RADEON_CG_BLOCK_SDMA) {
+               si_enable_dma_mgcg(rdev, enable);
        }
  
-       if (rdev->family == CHIP_VERDE) {
-               r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
-               if (r) {
-                       dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
-                       si_rlc_fini(rdev);
-               return r;
-               }
-               /* write the sr buffer */
-               dst_ptr = rdev->rlc.sr_ptr;
-               for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
-                       dst_ptr[i] = verde_rlc_save_restore_register_list[i];
-               }
-               radeon_bo_kunmap(rdev->rlc.save_restore_obj);
+       if (block & RADEON_CG_BLOCK_BIF) {
+               si_enable_bif_mgls(rdev, enable);
        }
-       radeon_bo_unreserve(rdev->rlc.save_restore_obj);
  
-       /* clear state block */
-       reg_list_num = 0;
-       dws = 0;
-       for (i = 0; cs_data[i].section != NULL; i++) {
-               for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
-                       reg_list_num++;
-                       dws += cs_data[i].section[j].reg_count;
+       if (block & RADEON_CG_BLOCK_UVD) {
+               if (rdev->has_uvd) {
+                       si_enable_uvd_mgcg(rdev, enable);
                }
        }
-       reg_list_blk_index = (3 * reg_list_num + 2);
-       dws += reg_list_blk_index;
  
-       if (rdev->rlc.clear_state_obj == NULL) {
-               r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-                                    RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
-               if (r) {
-                       dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
-                       si_rlc_fini(rdev);
-                       return r;
-               }
+       if (block & RADEON_CG_BLOCK_HDP) {
+               si_enable_hdp_mgcg(rdev, enable);
+               si_enable_hdp_ls(rdev, enable);
        }
-       r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
-       if (unlikely(r != 0)) {
-               si_rlc_fini(rdev);
-               return r;
+ }
+ static void si_init_cg(struct radeon_device *rdev)
+ {
+       si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+                           RADEON_CG_BLOCK_MC |
+                           RADEON_CG_BLOCK_SDMA |
+                           RADEON_CG_BLOCK_BIF |
+                           RADEON_CG_BLOCK_HDP), true);
+       if (rdev->has_uvd) {
+               si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
+               si_init_uvd_internal_cg(rdev);
        }
-       r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
-                         &rdev->rlc.clear_state_gpu_addr);
-       if (r) {
+ }
  
-               radeon_bo_unreserve(rdev->rlc.clear_state_obj);
-               dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
-               si_rlc_fini(rdev);
-               return r;
+ static void si_fini_cg(struct radeon_device *rdev)
+ {
+       if (rdev->has_uvd) {
+               si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
        }
-       r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
-       if (r) {
-               dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
-               si_rlc_fini(rdev);
-               return r;
+       si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+                           RADEON_CG_BLOCK_MC |
+                           RADEON_CG_BLOCK_SDMA |
+                           RADEON_CG_BLOCK_BIF |
+                           RADEON_CG_BLOCK_HDP), false);
+ }
+ u32 si_get_csb_size(struct radeon_device *rdev)
+ {
+       u32 count = 0;
+       const struct cs_section_def *sect = NULL;
+       const struct cs_extent_def *ext = NULL;
+       if (rdev->rlc.cs_data == NULL)
+               return 0;
+       /* begin clear state */
+       count += 2;
+       /* context control state */
+       count += 3;
+       for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+               for (ext = sect->section; ext->extent != NULL; ++ext) {
+                       if (sect->id == SECT_CONTEXT)
+                               count += 2 + ext->reg_count;
+                       else
+                               return 0;
+               }
        }
-       /* set up the cs buffer */
-       dst_ptr = rdev->rlc.cs_ptr;
-       reg_list_hdr_blk_index = 0;
-       reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
-       data = upper_32_bits(reg_list_mc_addr);
-       dst_ptr[reg_list_hdr_blk_index] = data;
-       reg_list_hdr_blk_index++;
-       for (i = 0; cs_data[i].section != NULL; i++) {
-               for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
-                       reg_num = cs_data[i].section[j].reg_count;
-                       data = reg_list_mc_addr & 0xffffffff;
-                       dst_ptr[reg_list_hdr_blk_index] = data;
-                       reg_list_hdr_blk_index++;
-                       data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
-                       dst_ptr[reg_list_hdr_blk_index] = data;
-                       reg_list_hdr_blk_index++;
-                       data = 0x08000000 | (reg_num * 4);
-                       dst_ptr[reg_list_hdr_blk_index] = data;
-                       reg_list_hdr_blk_index++;
-                       for (k = 0; k < reg_num; k++) {
-                               data = cs_data[i].section[j].extent[k];
-                               dst_ptr[reg_list_blk_index + k] = data;
+       /* pa_sc_raster_config */
+       count += 3;
+       /* end clear state */
+       count += 2;
+       /* clear state */
+       count += 2;
+       return count;
+ }
+ void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
+ {
+       u32 count = 0, i;
+       const struct cs_section_def *sect = NULL;
+       const struct cs_extent_def *ext = NULL;
+       if (rdev->rlc.cs_data == NULL)
+               return;
+       if (buffer == NULL)
+               return;
+       buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+       buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
+       buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
+       buffer[count++] = 0x80000000;
+       buffer[count++] = 0x80000000;
+       for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+               for (ext = sect->section; ext->extent != NULL; ++ext) {
+                       if (sect->id == SECT_CONTEXT) {
+                               buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
+                               buffer[count++] = ext->reg_index - 0xa000;
+                               for (i = 0; i < ext->reg_count; i++)
+                                       buffer[count++] = ext->extent[i];
+                       } else {
+                               return;
                        }
-                       reg_list_mc_addr += reg_num * 4;
-                       reg_list_blk_index += reg_num;
                }
        }
-       dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
  
-       radeon_bo_kunmap(rdev->rlc.clear_state_obj);
-       radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+       buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
+       buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
+       switch (rdev->family) {
+       case CHIP_TAHITI:
+       case CHIP_PITCAIRN:
+               buffer[count++] = 0x2a00126a;
+               break;
+       case CHIP_VERDE:
+               buffer[count++] = 0x0000124a;
+               break;
+       case CHIP_OLAND:
+               buffer[count++] = 0x00000082;
+               break;
+       case CHIP_HAINAN:
+               buffer[count++] = 0x00000000;
+               break;
+       default:
+               buffer[count++] = 0x00000000;
+               break;
+       }
+       buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+       buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
  
-       return 0;
+       buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
+       buffer[count++] = 0;
+ }
+ static void si_init_pg(struct radeon_device *rdev)
+ {
+       if (rdev->pg_flags) {
+               if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
+                       si_init_dma_pg(rdev);
+               }
+               si_init_ao_cu_mask(rdev);
+               if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
+                       si_init_gfx_cgpg(rdev);
+               }
+               si_enable_dma_pg(rdev, true);
+               si_enable_gfx_cgpg(rdev, true);
+       } else {
+               WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
+               WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
+       }
+ }
+ static void si_fini_pg(struct radeon_device *rdev)
+ {
+       if (rdev->pg_flags) {
+               si_enable_dma_pg(rdev, false);
+               si_enable_gfx_cgpg(rdev, false);
+       }
  }
  
- static void si_rlc_reset(struct radeon_device *rdev)
+ /*
+  * RLC
+  */
+ void si_rlc_reset(struct radeon_device *rdev)
  {
        u32 tmp = RREG32(GRBM_SOFT_RESET);
  
@@@ -5651,7 -5646,7 +5646,7 @@@ static int si_irq_init(struct radeon_de
        WREG32(INTERRUPT_CNTL, interrupt_cntl);
  
        WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
 -      rb_bufsz = drm_order(rdev->ih.ring_size / 4);
 +      rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
  
        ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
                      IH_WPTR_OVERFLOW_CLEAR |
@@@ -6335,80 -6330,6 +6330,6 @@@ restart_ih
        return IRQ_HANDLED;
  }
  
- /**
-  * si_copy_dma - copy pages using the DMA engine
-  *
-  * @rdev: radeon_device pointer
-  * @src_offset: src GPU address
-  * @dst_offset: dst GPU address
-  * @num_gpu_pages: number of GPU pages to xfer
-  * @fence: radeon fence object
-  *
-  * Copy GPU paging using the DMA engine (SI).
-  * Used by the radeon ttm implementation to move pages if
-  * registered as the asic copy callback.
-  */
- int si_copy_dma(struct radeon_device *rdev,
-               uint64_t src_offset, uint64_t dst_offset,
-               unsigned num_gpu_pages,
-               struct radeon_fence **fence)
- {
-       struct radeon_semaphore *sem = NULL;
-       int ring_index = rdev->asic->copy.dma_ring_index;
-       struct radeon_ring *ring = &rdev->ring[ring_index];
-       u32 size_in_bytes, cur_size_in_bytes;
-       int i, num_loops;
-       int r = 0;
-       r = radeon_semaphore_create(rdev, &sem);
-       if (r) {
-               DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
-       }
-       size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
-       num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
-       r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
-       if (r) {
-               DRM_ERROR("radeon: moving bo (%d).\n", r);
-               radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
-       }
-       if (radeon_fence_need_sync(*fence, ring->idx)) {
-               radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
-                                           ring->idx);
-               radeon_fence_note_sync(*fence, ring->idx);
-       } else {
-               radeon_semaphore_free(rdev, &sem, NULL);
-       }
-       for (i = 0; i < num_loops; i++) {
-               cur_size_in_bytes = size_in_bytes;
-               if (cur_size_in_bytes > 0xFFFFF)
-                       cur_size_in_bytes = 0xFFFFF;
-               size_in_bytes -= cur_size_in_bytes;
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
-               radeon_ring_write(ring, dst_offset & 0xffffffff);
-               radeon_ring_write(ring, src_offset & 0xffffffff);
-               radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
-               radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
-               src_offset += cur_size_in_bytes;
-               dst_offset += cur_size_in_bytes;
-       }
-       r = radeon_fence_emit(rdev, fence, ring->idx);
-       if (r) {
-               radeon_ring_unlock_undo(rdev, ring);
-               return r;
-       }
-       radeon_ring_unlock_commit(rdev, ring);
-       radeon_semaphore_free(rdev, &sem, *fence);
-       return r;
- }
  /*
   * startup/shutdown callbacks
   */
@@@ -6422,6 -6343,13 +6343,13 @@@ static int si_startup(struct radeon_dev
        /* enable aspm */
        si_program_aspm(rdev);
  
+       /* scratch needs to be initialized before MC */
+       r = r600_vram_scratch_init(rdev);
+       if (r)
+               return r;
+       si_mc_program(rdev);
        if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
            !rdev->rlc_fw || !rdev->mc_fw) {
                r = si_init_microcode(rdev);
                return r;
        }
  
-       r = r600_vram_scratch_init(rdev);
-       if (r)
-               return r;
-       si_mc_program(rdev);
        r = si_pcie_gart_enable(rdev);
        if (r)
                return r;
        si_gpu_init(rdev);
  
        /* allocate rlc buffers */
-       r = si_rlc_init(rdev);
+       if (rdev->family == CHIP_VERDE) {
+               rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
+               rdev->rlc.reg_list_size =
+                       (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
+       }
+       rdev->rlc.cs_data = si_cs_data;
+       r = sumo_rlc_init(rdev);
        if (r) {
                DRM_ERROR("Failed to init rlc BOs!\n");
                return r;
        }
  
        if (rdev->has_uvd) {
-               r = rv770_uvd_resume(rdev);
+               r = uvd_v2_2_resume(rdev);
                if (!r) {
                        r = radeon_fence_driver_start_ring(rdev,
                                                           R600_RING_TYPE_UVD_INDEX);
        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
                             CP_RB0_RPTR, CP_RB0_WPTR,
-                            0, 0xfffff, RADEON_CP_PACKET2);
+                            RADEON_CP_PACKET2);
        if (r)
                return r;
  
        ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
                             CP_RB1_RPTR, CP_RB1_WPTR,
-                            0, 0xfffff, RADEON_CP_PACKET2);
+                            RADEON_CP_PACKET2);
        if (r)
                return r;
  
        ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
                             CP_RB2_RPTR, CP_RB2_WPTR,
-                            0, 0xfffff, RADEON_CP_PACKET2);
+                            RADEON_CP_PACKET2);
        if (r)
                return r;
  
        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
                             DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
                             DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
-                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+                            DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
        if (r)
                return r;
  
        r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
                             DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
                             DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
-                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+                            DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
        if (r)
                return r;
  
        if (rdev->has_uvd) {
                ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
                if (ring->ring_size) {
-                       r = radeon_ring_init(rdev, ring, ring->ring_size,
-                                            R600_WB_UVD_RPTR_OFFSET,
+                       r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
                                             UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
-                                            0, 0xfffff, RADEON_CP_PACKET2);
+                                            RADEON_CP_PACKET2);
                        if (!r)
-                               r = r600_uvd_init(rdev);
+                               r = uvd_v1_0_init(rdev);
                        if (r)
                                DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
                }
                return r;
        }
  
+       r = dce6_audio_init(rdev);
+       if (r)
+               return r;
        return 0;
  }
  
@@@ -6621,13 -6553,16 +6553,16 @@@ int si_resume(struct radeon_device *rde
  
  int si_suspend(struct radeon_device *rdev)
  {
+       dce6_audio_fini(rdev);
        radeon_vm_manager_fini(rdev);
        si_cp_enable(rdev, false);
        cayman_dma_stop(rdev);
        if (rdev->has_uvd) {
-               r600_uvd_rbc_stop(rdev);
+               uvd_v1_0_fini(rdev);
                radeon_uvd_suspend(rdev);
        }
+       si_fini_pg(rdev);
+       si_fini_cg(rdev);
        si_irq_suspend(rdev);
        radeon_wb_disable(rdev);
        si_pcie_gart_disable(rdev);
@@@ -6734,7 -6669,7 +6669,7 @@@ int si_init(struct radeon_device *rdev
                si_cp_fini(rdev);
                cayman_dma_fini(rdev);
                si_irq_fini(rdev);
-               si_rlc_fini(rdev);
+               sumo_rlc_fini(rdev);
                radeon_wb_fini(rdev);
                radeon_ib_pool_fini(rdev);
                radeon_vm_manager_fini(rdev);
@@@ -6759,16 -6694,18 +6694,18 @@@ void si_fini(struct radeon_device *rdev
  {
        si_cp_fini(rdev);
        cayman_dma_fini(rdev);
-       si_irq_fini(rdev);
-       si_rlc_fini(rdev);
-       si_fini_cg(rdev);
        si_fini_pg(rdev);
+       si_fini_cg(rdev);
+       si_irq_fini(rdev);
+       sumo_rlc_fini(rdev);
        radeon_wb_fini(rdev);
        radeon_vm_manager_fini(rdev);
        radeon_ib_pool_fini(rdev);
        radeon_irq_kms_fini(rdev);
-       if (rdev->has_uvd)
+       if (rdev->has_uvd) {
+               uvd_v1_0_fini(rdev);
                radeon_uvd_fini(rdev);
+       }
        si_pcie_gart_fini(rdev);
        r600_vram_scratch_fini(rdev);
        radeon_gem_fini(rdev);
index 0000000000000000000000000000000000000000,3426be9aa38af1e90107f43ddf312608a0295cd7..7266805d9786c6fe9bfd38501f7ab674f7d05a73
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,436 +1,436 @@@
 -      rb_bufsz = drm_order(ring->ring_size);
+ /*
+  * Copyright 2013 Advanced Micro Devices, Inc.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in
+  * all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  *
+  * Authors: Christian König <christian.koenig@amd.com>
+  */
+ #include <drm/drmP.h>
+ #include "radeon.h"
+ #include "radeon_asic.h"
+ #include "r600d.h"
+ /**
+  * uvd_v1_0_get_rptr - get read pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  *
+  * Returns the current hardware read pointer
+  */
+ uint32_t uvd_v1_0_get_rptr(struct radeon_device *rdev,
+                          struct radeon_ring *ring)
+ {
+       return RREG32(UVD_RBC_RB_RPTR);
+ }
+ /**
+  * uvd_v1_0_get_wptr - get write pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  *
+  * Returns the current hardware write pointer
+  */
+ uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev,
+                          struct radeon_ring *ring)
+ {
+       return RREG32(UVD_RBC_RB_WPTR);
+ }
+ /**
+  * uvd_v1_0_set_wptr - set write pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  *
+  * Commits the write pointer to the hardware
+  */
+ void uvd_v1_0_set_wptr(struct radeon_device *rdev,
+                      struct radeon_ring *ring)
+ {
+       WREG32(UVD_RBC_RB_WPTR, ring->wptr);
+ }
+ /**
+  * uvd_v1_0_init - start and test UVD block
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Initialize the hardware, boot up the VCPU and do some testing
+  */
+ int uvd_v1_0_init(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       uint32_t tmp;
+       int r;
+       /* raise clocks while booting up the VCPU */
+       radeon_set_uvd_clocks(rdev, 53300, 40000);
+       r = uvd_v1_0_start(rdev);
+       if (r)
+               goto done;
+       ring->ready = true;
+       r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
+       if (r) {
+               ring->ready = false;
+               goto done;
+       }
+       r = radeon_ring_lock(rdev, ring, 10);
+       if (r) {
+               DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
+               goto done;
+       }
+       tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+       radeon_ring_write(ring, tmp);
+       radeon_ring_write(ring, 0xFFFFF);
+       tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+       radeon_ring_write(ring, tmp);
+       radeon_ring_write(ring, 0xFFFFF);
+       tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+       radeon_ring_write(ring, tmp);
+       radeon_ring_write(ring, 0xFFFFF);
+       /* Clear timeout status bits */
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
+       radeon_ring_write(ring, 0x8);
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
+       radeon_ring_write(ring, 3);
+       radeon_ring_unlock_commit(rdev, ring);
+ done:
+       /* lower clocks again */
+       radeon_set_uvd_clocks(rdev, 0, 0);
+       if (!r)
+               DRM_INFO("UVD initialized successfully.\n");
+       return r;
+ }
+ /**
+  * uvd_v1_0_fini - stop the hardware block
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the UVD block, mark ring as not ready any more
+  */
+ void uvd_v1_0_fini(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       uvd_v1_0_stop(rdev);
+       ring->ready = false;
+ }
+ /**
+  * uvd_v1_0_start - start UVD block
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Setup and start the UVD block
+  */
+ int uvd_v1_0_start(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       uint32_t rb_bufsz;
+       int i, j, r;
+       /* disable byte swapping */
+       u32 lmi_swap_cntl = 0;
+       u32 mp_swap_cntl = 0;
+       /* disable clock gating */
+       WREG32(UVD_CGC_GATE, 0);
+       /* disable interupt */
+       WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
+       /* Stall UMC and register bus before resetting VCPU */
+       WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+       WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
+       mdelay(1);
+       /* put LMI, VCPU, RBC etc... into reset */
+       WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
+              LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
+              CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
+       mdelay(5);
+       /* take UVD block out of reset */
+       WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
+       mdelay(5);
+       /* initialize UVD memory controller */
+       WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
+                            (1 << 21) | (1 << 9) | (1 << 20));
+ #ifdef __BIG_ENDIAN
+       /* swap (8 in 32) RB and IB */
+       lmi_swap_cntl = 0xa;
+       mp_swap_cntl = 0;
+ #endif
+       WREG32(UVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+       WREG32(UVD_MP_SWAP_CNTL, mp_swap_cntl);
+       WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
+       WREG32(UVD_MPC_SET_MUXA1, 0x0);
+       WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
+       WREG32(UVD_MPC_SET_MUXB1, 0x0);
+       WREG32(UVD_MPC_SET_ALU, 0);
+       WREG32(UVD_MPC_SET_MUX, 0x88);
+       /* take all subblocks out of reset, except VCPU */
+       WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
+       mdelay(5);
+       /* enable VCPU clock */
+       WREG32(UVD_VCPU_CNTL,  1 << 9);
+       /* enable UMC */
+       WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
+       /* boot up the VCPU */
+       WREG32(UVD_SOFT_RESET, 0);
+       mdelay(10);
+       WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+       for (i = 0; i < 10; ++i) {
+               uint32_t status;
+               for (j = 0; j < 100; ++j) {
+                       status = RREG32(UVD_STATUS);
+                       if (status & 2)
+                               break;
+                       mdelay(10);
+               }
+               r = 0;
+               if (status & 2)
+                       break;
+               DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+               WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
+               mdelay(10);
+               WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
+               mdelay(10);
+               r = -1;
+       }
+       if (r) {
+               DRM_ERROR("UVD not responding, giving up!!!\n");
+               return r;
+       }
+       /* enable interupt */
+       WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
+       /* force RBC into idle state */
+       WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+       /* Set the write pointer delay */
+       WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
+       /* programm the 4GB memory segment for rptr and ring buffer */
+       WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
+                                  (0x7 << 16) | (0x1 << 31));
+       /* Initialize the ring buffer's read and write pointers */
+       WREG32(UVD_RBC_RB_RPTR, 0x0);
+       ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR);
+       WREG32(UVD_RBC_RB_WPTR, ring->wptr);
+       /* set the ring address */
+       WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
+       /* Set ring buffer size */
++      rb_bufsz = order_base_2(ring->ring_size);
+       rb_bufsz = (0x1 << 8) | rb_bufsz;
+       WREG32_P(UVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f);
+       return 0;
+ }
+ /**
+  * uvd_v1_0_stop - stop UVD block
+  *
+  * @rdev: radeon_device pointer
+  *
+  * stop the UVD block
+  */
+ void uvd_v1_0_stop(struct radeon_device *rdev)
+ {
+       /* force RBC into idle state */
+       WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+       /* Stall UMC and register bus before resetting VCPU */
+       WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+       WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
+       mdelay(1);
+       /* put VCPU into reset */
+       WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
+       mdelay(5);
+       /* disable VCPU clock */
+       WREG32(UVD_VCPU_CNTL, 0x0);
+       /* Unstall UMC and register bus */
+       WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
+       WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+ }
+ /**
+  * uvd_v1_0_ring_test - register write test
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  *
+  * Test if we can successfully write to the context register
+  */
+ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       uint32_t tmp = 0;
+       unsigned i;
+       int r;
+       WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
+       r = radeon_ring_lock(rdev, ring, 3);
+       if (r) {
+               DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
+                         ring->idx, r);
+               return r;
+       }
+       radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
+       radeon_ring_write(ring, 0xDEADBEEF);
+       radeon_ring_unlock_commit(rdev, ring);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = RREG32(UVD_CONTEXT_ID);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ring test on %d succeeded in %d usecs\n",
+                        ring->idx, i);
+       } else {
+               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+                         ring->idx, tmp);
+               r = -EINVAL;
+       }
+       return r;
+ }
+ /**
+  * uvd_v1_0_semaphore_emit - emit semaphore command
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  * @semaphore: semaphore to emit commands for
+  * @emit_wait: true if we should emit a wait command
+  *
+  * Emit a semaphore command (either wait or signal) to the UVD ring.
+  */
+ void uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
+                            struct radeon_ring *ring,
+                            struct radeon_semaphore *semaphore,
+                            bool emit_wait)
+ {
+       uint64_t addr = semaphore->gpu_addr;
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+       radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+       radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+       radeon_ring_write(ring, emit_wait ? 1 : 0);
+ }
+ /**
+  * uvd_v1_0_ib_execute - execute indirect buffer
+  *
+  * @rdev: radeon_device pointer
+  * @ib: indirect buffer to execute
+  *
+  * Write ring commands to execute the indirect buffer
+  */
+ void uvd_v1_0_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+ {
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+       radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
+       radeon_ring_write(ring, ib->gpu_addr);
+       radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
+       radeon_ring_write(ring, ib->length_dw);
+ }
+ /**
+  * uvd_v1_0_ib_test - test ib execution
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  *
+  * Test if we can successfully execute an IB
+  */
+ int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       struct radeon_fence *fence = NULL;
+       int r;
+       r = radeon_set_uvd_clocks(rdev, 53300, 40000);
+       if (r) {
+               DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r);
+               return r;
+       }
+       r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
+       if (r) {
+               DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
+               goto error;
+       }
+       r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
+       if (r) {
+               DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
+               goto error;
+       }
+       r = radeon_fence_wait(fence, false);
+       if (r) {
+               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+               goto error;
+       }
+       DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+ error:
+       radeon_fence_unref(&fence);
+       radeon_set_uvd_clocks(rdev, 0, 0);
+       return r;
+ }
diff --combined include/drm/drm_edid.h
index 7b75621fda4c905b1bc5e666689bacb5bb97c02b,c76a129b99536902bca2a8d690a5516c8db87023..a1441c5ac63d535faca20d9775c6e09bea6feb75
@@@ -256,10 -256,10 +256,11 @@@ struct drm_encoder
  struct drm_connector;
  struct drm_display_mode;
  struct hdmi_avi_infoframe;
 +struct hdmi_vendor_infoframe;
  
  void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid);
  int drm_edid_to_sad(struct edid *edid, struct cea_sad **sads);
+ int drm_edid_to_speaker_allocation(struct edid *edid, u8 **sadb);
  int drm_av_sync_delay(struct drm_connector *connector,
                      struct drm_display_mode *mode);
  struct drm_connector *drm_select_eld(struct drm_encoder *encoder,
@@@ -269,8 -269,5 +270,8 @@@ int drm_load_edid_firmware(struct drm_c
  int
  drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
                                         const struct drm_display_mode *mode);
 +int
 +drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame,
 +                                          const struct drm_display_mode *mode);
  
  #endif /* __DRM_EDID_H__ */
diff --combined include/drm/drm_pciids.h
index 0a85e5c5d61b5e2aca938166b7c233ff53bdefca,78bc8041a8a1ab27aad4249b493369c5390cd001..fd54a14a7c2a23972f4074217265c3e767063feb
@@@ -1,4 -1,22 +1,22 @@@
  #define radeon_PCI_IDS \
+       {0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x1306, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x1307, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x1309, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x130A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x130B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x130C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x130D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x130E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x130F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x1310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x1311, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x1313, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x1315, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x1316, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x131B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x131C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
        {0x1002, 0x3150, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
        {0x1002, 0x3151, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
        {0x1002, 0x3152, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
        {0x102b, 0x2527, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MGA_CARD_TYPE_G550}, \
        {0, 0, 0}
  
 -#define mach64_PCI_IDS \
 -      {0x1002, 0x4749, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4751, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4742, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4744, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4c49, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4c50, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4c51, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4c42, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4c44, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x474c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x474f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4752, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4753, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x474d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x474e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4c52, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4c53, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4c4d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0x1002, 0x4c4e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0, 0, 0}
 -
  #define sisdrv_PCI_IDS \
        {0x1039, 0x0300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
        {0x1039, 0x5300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
        {0x8086, 0x1132, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
        {0, 0, 0}
  
 -#define gamma_PCI_IDS \
 -      {0x3d3d, 0x0008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 -      {0, 0, 0}
 -
  #define savage_PCI_IDS \
        {0x5333, 0x8a20, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE3D}, \
        {0x5333, 0x8a21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE3D}, \
        {0x5333, 0x8d03, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_PROSAVAGEDDR}, \
        {0x5333, 0x8d04, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_PROSAVAGEDDR}, \
        {0, 0, 0}
 -
 -#define ffb_PCI_IDS \
 -      {0, 0, 0}