drivers/gpu/drm/msm/adreno/a5xx_gpu.c

   1 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   2  *
   3  * This program is free software; you can redistribute it and/or modify
   4  * it under the terms of the GNU General Public License version 2 and
   5  * only version 2 as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful,
   8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10  * GNU General Public License for more details.
  11  *
  12  */
  13
  14 #include <linux/kernel.h>
  15 #include <linux/types.h>
  16 #include <linux/cpumask.h>
  17 #include <linux/qcom_scm.h>
  18 #include <linux/dma-mapping.h>
  19 #include <linux/of_address.h>
  20 #include <linux/soc/qcom/mdt_loader.h>
  21 #include <linux/pm_opp.h>
  22 #include <linux/nvmem-consumer.h>
  23 #include <linux/iopoll.h>
  24 #include <linux/slab.h>
  25 #include "msm_gem.h"
  26 #include "msm_mmu.h"
  27 #include "a5xx_gpu.h"
  28
  29 extern bool hang_debug;
  30 static void a5xx_dump(struct msm_gpu *gpu);
  31
  32 #define GPU_PAS_ID 13
  33
  34 static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
  35 {
  36         struct device *dev = &gpu->pdev->dev;
  37         const struct firmware *fw;
  38         struct device_node *np;
  39         struct resource r;
  40         phys_addr_t mem_phys;
  41         ssize_t mem_size;
  42         void *mem_region = NULL;
  43         int ret;
  44
  45         if (!IS_ENABLED(CONFIG_ARCH_QCOM))
  46                 return -EINVAL;
  47
  48         np = of_get_child_by_name(dev->of_node, "zap-shader");
  49         if (!np)
  50                 return -ENODEV;
  51
  52         np = of_parse_phandle(np, "memory-region", 0);
  53         if (!np)
  54                 return -EINVAL;
  55
  56         ret = of_address_to_resource(np, 0, &r);
  57         if (ret)
  58                 return ret;
  59
  60         mem_phys = r.start;
  61         mem_size = resource_size(&r);
  62
  63         /* Request the MDT file for the firmware */
  64         fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
  65         if (IS_ERR(fw)) {
  66                 DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
  67                 return PTR_ERR(fw);
  68         }
  69
  70         /* Figure out how much memory we need */
  71         mem_size = qcom_mdt_get_size(fw);
  72         if (mem_size < 0) {
  73                 ret = mem_size;
  74                 goto out;
  75         }
  76
  77         /* Allocate memory for the firmware image */
  78         mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
  79         if (!mem_region) {
  80                 ret = -ENOMEM;
  81                 goto out;
  82         }
  83
  84         /*
  85          * Load the rest of the MDT
  86          *
  87          * Note that we could be dealing with two different paths, since
  88          * with upstream linux-firmware it would be in a qcom/ subdir..
  89          * adreno_request_fw() handles this, but qcom_mdt_load() does
  90          * not.  But since we've already gotten thru adreno_request_fw()
  91          * we know which of the two cases it is:
  92          */
  93         if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
  94                 ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
  95                                 mem_region, mem_phys, mem_size, NULL);
  96         } else {
  97                 char *newname;
  98
  99                 newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
 100
 101                 ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
 102                                 mem_region, mem_phys, mem_size, NULL);
 103                 kfree(newname);
 104         }
 105         if (ret)
 106                 goto out;
 107
 108         /* Send the image to the secure world */
 109         ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
 110         if (ret)
 111                 DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
 112
 113 out:
 114         if (mem_region)
 115                 memunmap(mem_region);
 116
 117         release_firmware(fw);
 118
 119         return ret;
 120 }
 121
 122 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 123 {
 124         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 125         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 126         uint32_t wptr;
 127         unsigned long flags;
 128
 129         spin_lock_irqsave(&ring->lock, flags);
 130
 131         /* Copy the shadow to the actual register */
 132         ring->cur = ring->next;
 133
 134         /* Make sure to wrap wptr if we need to */
 135         wptr = get_wptr(ring);
 136
 137         spin_unlock_irqrestore(&ring->lock, flags);
 138
 139         /* Make sure everything is posted before making a decision */
 140         mb();
 141
 142         /* Update HW if this is the current ring and we are not in preempt */
 143         if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
 144                 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
 145 }
 146
 147 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 148         struct msm_file_private *ctx)
 149 {
 150         struct msm_drm_private *priv = gpu->dev->dev_private;
 151         struct msm_ringbuffer *ring = submit->ring;
 152         struct msm_gem_object *obj;
 153         uint32_t *ptr, dwords;
 154         unsigned int i;
 155
 156         for (i = 0; i < submit->nr_cmds; i++) {
 157                 switch (submit->cmd[i].type) {
 158                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 159                         break;
 160                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 161                         if (priv->lastctx == ctx)
 162                                 break;
 163                 case MSM_SUBMIT_CMD_BUF:
 164                         /* copy commands into RB: */
 165                         obj = submit->bos[submit->cmd[i].idx].obj;
 166                         dwords = submit->cmd[i].size;
 167
 168                         ptr = msm_gem_get_vaddr(&obj->base);
 169
 170                         /* _get_vaddr() shouldn't fail at this point,
 171                          * since we've already mapped it once in
 172                          * submit_reloc()
 173                          */
 174                         if (WARN_ON(!ptr))
 175                                 return;
 176
 177                         for (i = 0; i < dwords; i++) {
 178                                 /* normally the OUT_PKTn() would wait
 179                                  * for space for the packet.  But since
 180                                  * we just OUT_RING() the whole thing,
 181                                  * need to call adreno_wait_ring()
 182                                  * ourself:
 183                                  */
 184                                 adreno_wait_ring(ring, 1);
 185                                 OUT_RING(ring, ptr[i]);
 186                         }
 187
 188                         msm_gem_put_vaddr(&obj->base);
 189
 190                         break;
 191                 }
 192         }
 193
 194         a5xx_flush(gpu, ring);
 195         a5xx_preempt_trigger(gpu);
 196
 197         /* we might not necessarily have a cmd from userspace to
 198          * trigger an event to know that submit has completed, so
 199          * do this manually:
 200          */
 201         a5xx_idle(gpu, ring);
 202         ring->memptrs->fence = submit->seqno;
 203         msm_gpu_retire(gpu);
 204 }
 205
 206 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 207         struct msm_file_private *ctx)
 208 {
 209         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 210         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 211         struct msm_drm_private *priv = gpu->dev->dev_private;
 212         struct msm_ringbuffer *ring = submit->ring;
 213         unsigned int i, ibs = 0;
 214
 215         if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
 216                 priv->lastctx = NULL;
 217                 a5xx_submit_in_rb(gpu, submit, ctx);
 218                 return;
 219         }
 220
 221         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 222         OUT_RING(ring, 0x02);
 223
 224         /* Turn off protected mode to write to special registers */
 225         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 226         OUT_RING(ring, 0);
 227
 228         /* Set the save preemption record for the ring/command */
 229         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 230         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 231         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 232
 233         /* Turn back on protected mode */
 234         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 235         OUT_RING(ring, 1);
 236
 237         /* Enable local preemption for finegrain preemption */
 238         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 239         OUT_RING(ring, 0x02);
 240
 241         /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 242         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 243         OUT_RING(ring, 0x02);
 244
 245         /* Submit the commands */
 246         for (i = 0; i < submit->nr_cmds; i++) {
 247                 switch (submit->cmd[i].type) {
 248                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 249                         break;
 250                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 251                         if (priv->lastctx == ctx)
 252                                 break;
 253                 case MSM_SUBMIT_CMD_BUF:
 254                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 255                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 256                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 257                         OUT_RING(ring, submit->cmd[i].size);
 258                         ibs++;
 259                         break;
 260                 }
 261         }
 262
 263         /*
 264          * Write the render mode to NULL (0) to indicate to the CP that the IBs
 265          * are done rendering - otherwise a lucky preemption would start
 266          * replaying from the last checkpoint
 267          */
 268         OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 269         OUT_RING(ring, 0);
 270         OUT_RING(ring, 0);
 271         OUT_RING(ring, 0);
 272         OUT_RING(ring, 0);
 273         OUT_RING(ring, 0);
 274
 275         /* Turn off IB level preemptions */
 276         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 277         OUT_RING(ring, 0x01);
 278
 279         /* Write the fence to the scratch register */
 280         OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 281         OUT_RING(ring, submit->seqno);
 282
 283         /*
 284          * Execute a CACHE_FLUSH_TS event. This will ensure that the
 285          * timestamp is written to the memory and then triggers the interrupt
 286          */
 287         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 288         OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
 289         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 290         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 291         OUT_RING(ring, submit->seqno);
 292
 293         /* Yield the floor on command completion */
 294         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 295         /*
 296          * If dword[2:1] are non zero, they specify an address for the CP to
 297          * write the value of dword[3] to on preemption complete. Write 0 to
 298          * skip the write
 299          */
 300         OUT_RING(ring, 0x00);
 301         OUT_RING(ring, 0x00);
 302         /* Data value - not used if the address above is 0 */
 303         OUT_RING(ring, 0x01);
 304         /* Set bit 0 to trigger an interrupt on preempt complete */
 305         OUT_RING(ring, 0x01);
 306
 307         a5xx_flush(gpu, ring);
 308
 309         /* Check to see if we need to start preemption */
 310         a5xx_preempt_trigger(gpu);
 311 }
 312
 313 static const struct {
 314         u32 offset;
 315         u32 value;
 316 } a5xx_hwcg[] = {
 317         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 318         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 319         {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 320         {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 321         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 322         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 323         {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 324         {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 325         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 326         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 327         {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 328         {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 329         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 330         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 331         {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 332         {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 333         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 334         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 335         {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 336         {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 337         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 338         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 339         {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 340         {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 341         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 342         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 343         {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 344         {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 345         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 346         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 347         {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 348         {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 349         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 350         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 351         {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 352         {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 353         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 354         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 355         {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 356         {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 357         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 358         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 359         {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 360         {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 361         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 362         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 363         {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 364         {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 365         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 366         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 367         {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 368         {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 369         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 370         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 371         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 372         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 373         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 374         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 375         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 376         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 377         {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 378         {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 379         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 380         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 381         {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 382         {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 383         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 384         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 385         {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 386         {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 387         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 388         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 389         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 390         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 391         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 392         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 393         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 394         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 395         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 396         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 397         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 398         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 399         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 400         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 401         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 402         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 403         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 404         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 405         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 406         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 407         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 408         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 409 };
 410
 411 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 412 {
 413         unsigned int i;
 414
 415         for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
 416                 gpu_write(gpu, a5xx_hwcg[i].offset,
 417                         state ? a5xx_hwcg[i].value : 0);
 418
 419         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 420         gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 421 }
 422
 423 static int a5xx_me_init(struct msm_gpu *gpu)
 424 {
 425         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 426         struct msm_ringbuffer *ring = gpu->rb[0];
 427
 428         OUT_PKT7(ring, CP_ME_INIT, 8);
 429
 430         OUT_RING(ring, 0x0000002F);
 431
 432         /* Enable multiple hardware contexts */
 433         OUT_RING(ring, 0x00000003);
 434
 435         /* Enable error detection */
 436         OUT_RING(ring, 0x20000000);
 437
 438         /* Don't enable header dump */
 439         OUT_RING(ring, 0x00000000);
 440         OUT_RING(ring, 0x00000000);
 441
 442         /* Specify workarounds for various microcode issues */
 443         if (adreno_is_a530(adreno_gpu)) {
 444                 /* Workaround for token end syncs
 445                  * Force a WFI after every direct-render 3D mode draw and every
 446                  * 2D mode 3 draw
 447                  */
 448                 OUT_RING(ring, 0x0000000B);
 449         } else {
 450                 /* No workarounds enabled */
 451                 OUT_RING(ring, 0x00000000);
 452         }
 453
 454         OUT_RING(ring, 0x00000000);
 455         OUT_RING(ring, 0x00000000);
 456
 457         gpu->funcs->flush(gpu, ring);
 458         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 459 }
 460
 461 static int a5xx_preempt_start(struct msm_gpu *gpu)
 462 {
 463         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 464         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 465         struct msm_ringbuffer *ring = gpu->rb[0];
 466
 467         if (gpu->nr_rings == 1)
 468                 return 0;
 469
 470         /* Turn off protected mode to write to special registers */
 471         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 472         OUT_RING(ring, 0);
 473
 474         /* Set the save preemption record for the ring/command */
 475         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 476         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 477         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 478
 479         /* Turn back on protected mode */
 480         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 481         OUT_RING(ring, 1);
 482
 483         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 484         OUT_RING(ring, 0x00);
 485
 486         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 487         OUT_RING(ring, 0x01);
 488
 489         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 490         OUT_RING(ring, 0x01);
 491
 492         /* Yield the floor on command completion */
 493         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 494         OUT_RING(ring, 0x00);
 495         OUT_RING(ring, 0x00);
 496         OUT_RING(ring, 0x01);
 497         OUT_RING(ring, 0x01);
 498
 499         gpu->funcs->flush(gpu, ring);
 500
 501         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 502 }
 503
 504 static int a5xx_ucode_init(struct msm_gpu *gpu)
 505 {
 506         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 507         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 508         int ret;
 509
 510         if (!a5xx_gpu->pm4_bo) {
 511                 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
 512                         adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
 513
 514                 if (IS_ERR(a5xx_gpu->pm4_bo)) {
 515                         ret = PTR_ERR(a5xx_gpu->pm4_bo);
 516                         a5xx_gpu->pm4_bo = NULL;
 517                         dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
 518                                 ret);
 519                         return ret;
 520                 }
 521         }
 522
 523         if (!a5xx_gpu->pfp_bo) {
 524                 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
 525                         adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
 526
 527                 if (IS_ERR(a5xx_gpu->pfp_bo)) {
 528                         ret = PTR_ERR(a5xx_gpu->pfp_bo);
 529                         a5xx_gpu->pfp_bo = NULL;
 530                         dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
 531                                 ret);
 532                         return ret;
 533                 }
 534         }
 535
 536         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 537                 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 538
 539         gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 540                 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 541
 542         return 0;
 543 }
 544
 545 #define SCM_GPU_ZAP_SHADER_RESUME 0
 546
 547 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 548 {
 549         int ret;
 550
 551         ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 552         if (ret)
 553                 DRM_ERROR("%s: zap-shader resume failed: %d\n",
 554                         gpu->name, ret);
 555
 556         return ret;
 557 }
 558
 559 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 560 {
 561         static bool loaded;
 562         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 563         struct platform_device *pdev = gpu->pdev;
 564         int ret;
 565
 566         /*
 567          * If the zap shader is already loaded into memory we just need to kick
 568          * the remote processor to reinitialize it
 569          */
 570         if (loaded)
 571                 return a5xx_zap_shader_resume(gpu);
 572
 573         /* We need SCM to be able to load the firmware */
 574         if (!qcom_scm_is_available()) {
 575                 DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
 576                 return -EPROBE_DEFER;
 577         }
 578
 579         /* Each GPU has a target specific zap shader firmware name to use */
 580         if (!adreno_gpu->info->zapfw) {
 581                 DRM_DEV_ERROR(&pdev->dev,
 582                         "Zap shader firmware file not specified for this target\n");
 583                 return -ENODEV;
 584         }
 585
 586         ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
 587
 588         loaded = !ret;
 589
 590         return ret;
 591 }
 592
 593 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 594           A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 595           A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 596           A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 597           A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 598           A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 599           A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 600           A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 601           A5XX_RBBM_INT_0_MASK_CP_SW | \
 602           A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 603           A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 604           A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 605
 606 static int a5xx_hw_init(struct msm_gpu *gpu)
 607 {
 608         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 609         int ret;
 610
 611         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 612
 613         /* Make all blocks contribute to the GPU BUSY perf counter */
 614         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 615
 616         /* Enable RBBM error reporting bits */
 617         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 618
 619         if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 620                 /*
 621                  * Mask out the activity signals from RB1-3 to avoid false
 622                  * positives
 623                  */
 624
 625                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 626                         0xF0000000);
 627                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 628                         0xFFFFFFFF);
 629                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 630                         0xFFFFFFFF);
 631                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 632                         0xFFFFFFFF);
 633                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 634                         0xFFFFFFFF);
 635                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 636                         0xFFFFFFFF);
 637                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 638                         0xFFFFFFFF);
 639                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 640                         0xFFFFFFFF);
 641         }
 642
 643         /* Enable fault detection */
 644         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 645                 (1 << 30) | 0xFFFF);
 646
 647         /* Turn on performance counters */
 648         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 649
 650         /* Select CP0 to always count cycles */
 651         gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 652
 653         /* Select RBBM0 to countable 6 to get the busy status for devfreq */
 654         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 655
 656         /* Increase VFD cache access so LRZ and other data gets evicted less */
 657         gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 658
 659         /* Disable L2 bypass in the UCHE */
 660         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 661         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 662         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 663         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 664
 665         /* Set the GMEM VA range (0 to gpu->gmem) */
 666         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 667         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 668         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 669                 0x00100000 + adreno_gpu->gmem - 1);
 670         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 671
 672         gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 673         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 674         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 675         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 676
 677         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
 678
 679         if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 680                 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 681
 682         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
 683
 684         /* Enable USE_RETENTION_FLOPS */
 685         gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 686
 687         /* Enable ME/PFP split notification */
 688         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 689
 690         /* Enable HWCG */
 691         a5xx_set_hwcg(gpu, true);
 692
 693         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 694
 695         /* Set the highest bank bit */
 696         gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
 697         gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
 698
 699         /* Protect registers from the CP */
 700         gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 701
 702         /* RBBM */
 703         gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 704         gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 705         gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 706         gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 707         gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 708         gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 709
 710         /* Content protect */
 711         gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 712                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 713                         16));
 714         gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 715                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 716
 717         /* CP */
 718         gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 719         gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 720         gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 721         gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 722
 723         /* RB */
 724         gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 725         gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 726
 727         /* VPC */
 728         gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 729         gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
 730
 731         /* UCHE */
 732         gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 733
 734         if (adreno_is_a530(adreno_gpu))
 735                 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 736                         ADRENO_PROTECT_RW(0x10000, 0x8000));
 737
 738         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 739         /*
 740          * Disable the trusted memory range - we don't actually supported secure
 741          * memory rendering at this point in time and we don't want to block off
 742          * part of the virtual memory space.
 743          */
 744         gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 745                 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 746         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 747
 748         ret = adreno_hw_init(gpu);
 749         if (ret)
 750                 return ret;
 751
 752         a5xx_preempt_hw_init(gpu);
 753
 754         a5xx_gpmu_ucode_init(gpu);
 755
 756         ret = a5xx_ucode_init(gpu);
 757         if (ret)
 758                 return ret;
 759
 760         /* Disable the interrupts through the initial bringup stage */
 761         gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 762
 763         /* Clear ME_HALT to start the micro engine */
 764         gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 765         ret = a5xx_me_init(gpu);
 766         if (ret)
 767                 return ret;
 768
 769         ret = a5xx_power_init(gpu);
 770         if (ret)
 771                 return ret;
 772
 773         /*
 774          * Send a pipeline event stat to get misbehaving counters to start
 775          * ticking correctly
 776          */
 777         if (adreno_is_a530(adreno_gpu)) {
 778                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 779                 OUT_RING(gpu->rb[0], 0x0F);
 780
 781                 gpu->funcs->flush(gpu, gpu->rb[0]);
 782                 if (!a5xx_idle(gpu, gpu->rb[0]))
 783                         return -EINVAL;
 784         }
 785
 786         /*
 787          * Try to load a zap shader into the secure world. If successful
 788          * we can use the CP to switch out of secure mode. If not then we
 789          * have no resource but to try to switch ourselves out manually. If we
 790          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 791          * be blocked and a permissions violation will soon follow.
 792          */
 793         ret = a5xx_zap_shader_init(gpu);
 794         if (!ret) {
 795                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 796                 OUT_RING(gpu->rb[0], 0x00000000);
 797
 798                 gpu->funcs->flush(gpu, gpu->rb[0]);
 799                 if (!a5xx_idle(gpu, gpu->rb[0]))
 800                         return -EINVAL;
 801         } else {
 802                 /* Print a warning so if we die, we know why */
 803                 dev_warn_once(gpu->dev->dev,
 804                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 805                 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 806         }
 807
 808         /* Last step - yield the ringbuffer */
 809         a5xx_preempt_start(gpu);
 810
 811         return 0;
 812 }
 813
 814 static void a5xx_recover(struct msm_gpu *gpu)
 815 {
 816         int i;
 817
 818         adreno_dump_info(gpu);
 819
 820         for (i = 0; i < 8; i++) {
 821                 printk("CP_SCRATCH_REG%d: %u\n", i,
 822                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
 823         }
 824
 825         if (hang_debug)
 826                 a5xx_dump(gpu);
 827
 828         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
 829         gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
 830         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 831         adreno_recover(gpu);
 832 }
 833
 834 static void a5xx_destroy(struct msm_gpu *gpu)
 835 {
 836         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 837         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 838
 839         DBG("%s", gpu->name);
 840
 841         a5xx_preempt_fini(gpu);
 842
 843         if (a5xx_gpu->pm4_bo) {
 844                 if (a5xx_gpu->pm4_iova)
 845                         msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
 846                 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
 847         }
 848
 849         if (a5xx_gpu->pfp_bo) {
 850                 if (a5xx_gpu->pfp_iova)
 851                         msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
 852                 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
 853         }
 854
 855         if (a5xx_gpu->gpmu_bo) {
 856                 if (a5xx_gpu->gpmu_iova)
 857                         msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
 858                 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
 859         }
 860
 861         adreno_gpu_cleanup(adreno_gpu);
 862         kfree(a5xx_gpu);
 863 }
 864
 865 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
 866 {
 867         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
 868                 return false;
 869
 870         /*
 871          * Nearly every abnormality ends up pausing the GPU and triggering a
 872          * fault so we can safely just watch for this one interrupt to fire
 873          */
 874         return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
 875                 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
 876 }
 877
 878 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 879 {
 880         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 881         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 882
 883         if (ring != a5xx_gpu->cur_ring) {
 884                 WARN(1, "Tried to idle a non-current ringbuffer\n");
 885                 return false;
 886         }
 887
 888         /* wait for CP to drain ringbuffer: */
 889         if (!adreno_idle(gpu, ring))
 890                 return false;
 891
 892         if (spin_until(_a5xx_check_idle(gpu))) {
 893                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
 894                         gpu->name, __builtin_return_address(0),
 895                         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 896                         gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
 897                         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 898                         gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
 899                 return false;
 900         }
 901
 902         return true;
 903 }
 904
 905 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
 906 {
 907         struct msm_gpu *gpu = arg;
 908         pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
 909                         iova, flags,
 910                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
 911                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
 912                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
 913                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 914
 915         return -EFAULT;
 916 }
 917
 918 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 919 {
 920         u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
 921
 922         if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
 923                 u32 val;
 924
 925                 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
 926
 927                 /*
 928                  * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
 929                  * read it twice
 930                  */
 931
 932                 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 933                 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 934
 935                 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
 936                         val);
 937         }
 938
 939         if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
 940                 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
 941                         gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
 942
 943         if (status & A5XX_CP_INT_CP_DMA_ERROR)
 944                 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
 945
 946         if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
 947                 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
 948
 949                 dev_err_ratelimited(gpu->dev->dev,
 950                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
 951                         val & (1 << 24) ? "WRITE" : "READ",
 952                         (val & 0xFFFFF) >> 2, val);
 953         }
 954
 955         if (status & A5XX_CP_INT_CP_AHB_ERROR) {
 956                 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
 957                 const char *access[16] = { "reserved", "reserved",
 958                         "timestamp lo", "timestamp hi", "pfp read", "pfp write",
 959                         "", "", "me read", "me write", "", "", "crashdump read",
 960                         "crashdump write" };
 961
 962                 dev_err_ratelimited(gpu->dev->dev,
 963                         "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
 964                         status & 0xFFFFF, access[(status >> 24) & 0xF],
 965                         (status & (1 << 31)), status);
 966         }
 967 }
 968
 969 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
 970 {
 971         if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
 972                 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
 973
 974                 dev_err_ratelimited(gpu->dev->dev,
 975                         "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
 976                         val & (1 << 28) ? "WRITE" : "READ",
 977                         (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
 978                         (val >> 24) & 0xF);
 979
 980                 /* Clear the error */
 981                 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
 982
 983                 /* Clear the interrupt */
 984                 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 985                         A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 986         }
 987
 988         if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
 989                 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
 990
 991         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
 992                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
 993                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
 994
 995         if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
 996                 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
 997                         gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
 998
 999         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1000                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1001                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1002
1003         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1004                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1005
1006         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1007                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1008 }
1009
1010 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1011 {
1012         uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1013
1014         addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1015
1016         dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1017                 addr);
1018 }
1019
1020 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1021 {
1022         dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1023 }
1024
1025 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1026 {
1027         struct drm_device *dev = gpu->dev;
1028         struct msm_drm_private *priv = dev->dev_private;
1029         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1030
1031         dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1032                 ring ? ring->id : -1, ring ? ring->seqno : 0,
1033                 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1034                 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1035                 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1036                 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1037                 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1038                 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1039                 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1040
1041         /* Turn off the hangcheck timer to keep it from bothering us */
1042         del_timer(&gpu->hangcheck_timer);
1043
1044         queue_work(priv->wq, &gpu->recover_work);
1045 }
1046
1047 #define RBBM_ERROR_MASK \
1048         (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1049         A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1050         A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1051         A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1052         A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1053         A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1054
1055 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1056 {
1057         u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1058
1059         /*
1060          * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1061          * before the source is cleared the interrupt will storm.
1062          */
1063         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1064                 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1065
1066         /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1067         if (status & RBBM_ERROR_MASK)
1068                 a5xx_rbbm_err_irq(gpu, status);
1069
1070         if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1071                 a5xx_cp_err_irq(gpu);
1072
1073         if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1074                 a5xx_fault_detect_irq(gpu);
1075
1076         if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1077                 a5xx_uche_err_irq(gpu);
1078
1079         if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1080                 a5xx_gpmu_err_irq(gpu);
1081
1082         if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1083                 a5xx_preempt_trigger(gpu);
1084                 msm_gpu_retire(gpu);
1085         }
1086
1087         if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1088                 a5xx_preempt_irq(gpu);
1089
1090         return IRQ_HANDLED;
1091 }
1092
1093 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1094         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1095         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1096         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1097         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1098                 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1099         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1100         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1101         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1102 };
1103
1104 static const u32 a5xx_registers[] = {
1105         0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1106         0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1107         0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1108         0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1109         0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1110         0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1111         0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1112         0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1113         0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1114         0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1115         0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1116         0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1117         0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1118         0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1119         0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1120         0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1121         0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1122         0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1123         0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1124         0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1125         0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1126         0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1127         0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1128         0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1129         0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1130         0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1131         0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1132         0xAC60, 0xAC60, ~0,
1133 };
1134
1135 static void a5xx_dump(struct msm_gpu *gpu)
1136 {
1137         dev_info(gpu->dev->dev, "status:   %08x\n",
1138                 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1139         adreno_dump(gpu);
1140 }
1141
1142 static int a5xx_pm_resume(struct msm_gpu *gpu)
1143 {
1144         int ret;
1145
1146         /* Turn on the core power */
1147         ret = msm_gpu_pm_resume(gpu);
1148         if (ret)
1149                 return ret;
1150
1151         /* Turn the RBCCU domain first to limit the chances of voltage droop */
1152         gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1153
1154         /* Wait 3 usecs before polling */
1155         udelay(3);
1156
1157         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1158                 (1 << 20), (1 << 20));
1159         if (ret) {
1160                 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1161                         gpu->name,
1162                         gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1163                 return ret;
1164         }
1165
1166         /* Turn on the SP domain */
1167         gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1168         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1169                 (1 << 20), (1 << 20));
1170         if (ret)
1171                 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1172                         gpu->name);
1173
1174         return ret;
1175 }
1176
1177 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1178 {
1179         /* Clear the VBIF pipe before shutting down */
1180         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1181         spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1182
1183         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1184
1185         /*
1186          * Reset the VBIF before power collapse to avoid issue with FIFO
1187          * entries
1188          */
1189         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1190         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1191
1192         return msm_gpu_pm_suspend(gpu);
1193 }
1194
1195 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1196 {
1197         *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1198                 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1199
1200         return 0;
1201 }
1202
1203 struct a5xx_crashdumper {
1204         void *ptr;
1205         struct drm_gem_object *bo;
1206         u64 iova;
1207 };
1208
1209 struct a5xx_gpu_state {
1210         struct msm_gpu_state base;
1211         u32 *hlsqregs;
1212 };
1213
1214 #define gpu_poll_timeout(gpu, addr, val, cond, interval, timeout) \
1215         readl_poll_timeout((gpu)->mmio + ((addr) << 2), val, cond, \
1216                 interval, timeout)
1217
1218 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1219                 struct a5xx_crashdumper *dumper)
1220 {
1221         dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1222                 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1223                 &dumper->bo, &dumper->iova);
1224
1225         return PTR_ERR_OR_ZERO(dumper->ptr);
1226 }
1227
1228 static void a5xx_crashdumper_free(struct msm_gpu *gpu,
1229                 struct a5xx_crashdumper *dumper)
1230 {
1231         msm_gem_put_iova(dumper->bo, gpu->aspace);
1232         msm_gem_put_vaddr(dumper->bo);
1233
1234         drm_gem_object_put(dumper->bo);
1235 }
1236
1237 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1238                 struct a5xx_crashdumper *dumper)
1239 {
1240         u32 val;
1241
1242         if (IS_ERR_OR_NULL(dumper->ptr))
1243                 return -EINVAL;
1244
1245         gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1246                 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1247
1248         gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1249
1250         return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1251                 val & 0x04, 100, 10000);
1252 }
1253
1254 /*
1255  * These are a list of the registers that need to be read through the HLSQ
1256  * aperture through the crashdumper.  These are not nominally accessible from
1257  * the CPU on a secure platform.
1258  */
1259 static const struct {
1260         u32 type;
1261         u32 regoffset;
1262         u32 count;
1263 } a5xx_hlsq_aperture_regs[] = {
1264         { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1265         { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1266         { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1267         { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1268         { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1269         { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1270         { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1271         { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1272         { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1273         { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1274         { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1275         { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1276         { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1277         { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1278         { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1279 };
1280
1281 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1282                 struct a5xx_gpu_state *a5xx_state)
1283 {
1284         struct a5xx_crashdumper dumper = { 0 };
1285         u32 offset, count = 0;
1286         u64 *ptr;
1287         int i;
1288
1289         if (a5xx_crashdumper_init(gpu, &dumper))
1290                 return;
1291
1292         /* The script will be written at offset 0 */
1293         ptr = dumper.ptr;
1294
1295         /* Start writing the data at offset 256k */
1296         offset = dumper.iova + (256 * SZ_1K);
1297
1298         /* Count how many additional registers to get from the HLSQ aperture */
1299         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1300                 count += a5xx_hlsq_aperture_regs[i].count;
1301
1302         a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1303         if (!a5xx_state->hlsqregs)
1304                 return;
1305
1306         /* Build the crashdump script */
1307         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1308                 u32 type = a5xx_hlsq_aperture_regs[i].type;
1309                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1310
1311                 /* Write the register to select the desired bank */
1312                 *ptr++ = ((u64) type << 8);
1313                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1314                         (1 << 21) | 1;
1315
1316                 *ptr++ = offset;
1317                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1318                         | c;
1319
1320                 offset += c * sizeof(u32);
1321         }
1322
1323         /* Write two zeros to close off the script */
1324         *ptr++ = 0;
1325         *ptr++ = 0;
1326
1327         if (a5xx_crashdumper_run(gpu, &dumper)) {
1328                 kfree(a5xx_state->hlsqregs);
1329                 a5xx_crashdumper_free(gpu, &dumper);
1330                 return;
1331         }
1332
1333         /* Copy the data from the crashdumper to the state */
1334         memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1335                 count * sizeof(u32));
1336
1337         a5xx_crashdumper_free(gpu, &dumper);
1338 }
1339
1340 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1341 {
1342         struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1343                         GFP_KERNEL);
1344
1345         if (!a5xx_state)
1346                 return ERR_PTR(-ENOMEM);
1347
1348         /* Temporarily disable hardware clock gating before reading the hw */
1349         a5xx_set_hwcg(gpu, false);
1350
1351         /* First get the generic state from the adreno core */
1352         adreno_gpu_state_get(gpu, &(a5xx_state->base));
1353
1354         a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1355
1356         /* Get the HLSQ regs with the help of the crashdumper */
1357         a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1358
1359         a5xx_set_hwcg(gpu, true);
1360
1361         return &a5xx_state->base;
1362 }
1363
1364 static void a5xx_gpu_state_destroy(struct kref *kref)
1365 {
1366         struct msm_gpu_state *state = container_of(kref,
1367                 struct msm_gpu_state, ref);
1368         struct a5xx_gpu_state *a5xx_state = container_of(state,
1369                 struct a5xx_gpu_state, base);
1370
1371         kfree(a5xx_state->hlsqregs);
1372
1373         adreno_gpu_state_destroy(state);
1374         kfree(a5xx_state);
1375 }
1376
1377 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1378 {
1379         if (IS_ERR_OR_NULL(state))
1380                 return 1;
1381
1382         return kref_put(&state->ref, a5xx_gpu_state_destroy);
1383 }
1384
1385
1386 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1387 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1388                 struct drm_printer *p)
1389 {
1390         int i, j;
1391         u32 pos = 0;
1392         struct a5xx_gpu_state *a5xx_state = container_of(state,
1393                 struct a5xx_gpu_state, base);
1394
1395         if (IS_ERR_OR_NULL(state))
1396                 return;
1397
1398         adreno_show(gpu, state, p);
1399
1400         /* Dump the additional a5xx HLSQ registers */
1401         if (!a5xx_state->hlsqregs)
1402                 return;
1403
1404         drm_printf(p, "registers-hlsq:\n");
1405
1406         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1407                 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1408                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1409
1410                 for (j = 0; j < c; j++, pos++, o++) {
1411                         /*
1412                          * To keep the crashdump simple we pull the entire range
1413                          * for each register type but not all of the registers
1414                          * in the range are valid. Fortunately invalid registers
1415                          * stick out like a sore thumb with a value of
1416                          * 0xdeadbeef
1417                          */
1418                         if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1419                                 continue;
1420
1421                         drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1422                                 o << 2, a5xx_state->hlsqregs[pos]);
1423                 }
1424         }
1425 }
1426 #endif
1427
1428 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1429 {
1430         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1431         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1432
1433         return a5xx_gpu->cur_ring;
1434 }
1435
1436 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1437 {
1438         u64 busy_cycles, busy_time;
1439
1440         busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1441                         REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1442
1443         busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1444         do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1445
1446         gpu->devfreq.busy_cycles = busy_cycles;
1447
1448         if (WARN_ON(busy_time > ~0LU))
1449                 return ~0LU;
1450
1451         return (unsigned long)busy_time;
1452 }
1453
1454 static const struct adreno_gpu_funcs funcs = {
1455         .base = {
1456                 .get_param = adreno_get_param,
1457                 .hw_init = a5xx_hw_init,
1458                 .pm_suspend = a5xx_pm_suspend,
1459                 .pm_resume = a5xx_pm_resume,
1460                 .recover = a5xx_recover,
1461                 .submit = a5xx_submit,
1462                 .flush = a5xx_flush,
1463                 .active_ring = a5xx_active_ring,
1464                 .irq = a5xx_irq,
1465                 .destroy = a5xx_destroy,
1466 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1467                 .show = a5xx_show,
1468 #endif
1469 #if defined(CONFIG_DEBUG_FS)
1470                 .debugfs_init = a5xx_debugfs_init,
1471 #endif
1472                 .gpu_busy = a5xx_gpu_busy,
1473                 .gpu_state_get = a5xx_gpu_state_get,
1474                 .gpu_state_put = a5xx_gpu_state_put,
1475         },
1476         .get_timestamp = a5xx_get_timestamp,
1477 };
1478
1479 static void check_speed_bin(struct device *dev)
1480 {
1481         struct nvmem_cell *cell;
1482         u32 bin, val;
1483
1484         cell = nvmem_cell_get(dev, "speed_bin");
1485
1486         /* If a nvmem cell isn't defined, nothing to do */
1487         if (IS_ERR(cell))
1488                 return;
1489
1490         bin = *((u32 *) nvmem_cell_read(cell, NULL));
1491         nvmem_cell_put(cell);
1492
1493         val = (1 << bin);
1494
1495         dev_pm_opp_set_supported_hw(dev, &val, 1);
1496 }
1497
1498 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1499 {
1500         struct msm_drm_private *priv = dev->dev_private;
1501         struct platform_device *pdev = priv->gpu_pdev;
1502         struct a5xx_gpu *a5xx_gpu = NULL;
1503         struct adreno_gpu *adreno_gpu;
1504         struct msm_gpu *gpu;
1505         int ret;
1506
1507         if (!pdev) {
1508                 dev_err(dev->dev, "No A5XX device is defined\n");
1509                 return ERR_PTR(-ENXIO);
1510         }
1511
1512         a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1513         if (!a5xx_gpu)
1514                 return ERR_PTR(-ENOMEM);
1515
1516         adreno_gpu = &a5xx_gpu->base;
1517         gpu = &adreno_gpu->base;
1518
1519         adreno_gpu->registers = a5xx_registers;
1520         adreno_gpu->reg_offsets = a5xx_register_offsets;
1521
1522         a5xx_gpu->lm_leakage = 0x4E001A;
1523
1524         check_speed_bin(&pdev->dev);
1525
1526         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1527         if (ret) {
1528                 a5xx_destroy(&(a5xx_gpu->base.base));
1529                 return ERR_PTR(ret);
1530         }
1531
1532         if (gpu->aspace)
1533                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1534
1535         /* Set up the preemption specific bits and pieces for each ringbuffer */
1536         a5xx_preempt_init(gpu);
1537
1538         return gpu;
1539 }