]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/msm/adreno/a5xx_gpu.c
Merge tag 'drm-misc-fixes-2019-12-11' of git://anongit.freedesktop.org/drm/drm-misc...
[linux.git] / drivers / gpu / drm / msm / adreno / a5xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18
19 #define GPU_PAS_ID 13
20
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25         uint32_t wptr;
26         unsigned long flags;
27
28         spin_lock_irqsave(&ring->lock, flags);
29
30         /* Copy the shadow to the actual register */
31         ring->cur = ring->next;
32
33         /* Make sure to wrap wptr if we need to */
34         wptr = get_wptr(ring);
35
36         spin_unlock_irqrestore(&ring->lock, flags);
37
38         /* Make sure everything is posted before making a decision */
39         mb();
40
41         /* Update HW if this is the current ring and we are not in preempt */
42         if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43                 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44 }
45
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47         struct msm_file_private *ctx)
48 {
49         struct msm_drm_private *priv = gpu->dev->dev_private;
50         struct msm_ringbuffer *ring = submit->ring;
51         struct msm_gem_object *obj;
52         uint32_t *ptr, dwords;
53         unsigned int i;
54
55         for (i = 0; i < submit->nr_cmds; i++) {
56                 switch (submit->cmd[i].type) {
57                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58                         break;
59                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60                         if (priv->lastctx == ctx)
61                                 break;
62                         /* fall-thru */
63                 case MSM_SUBMIT_CMD_BUF:
64                         /* copy commands into RB: */
65                         obj = submit->bos[submit->cmd[i].idx].obj;
66                         dwords = submit->cmd[i].size;
67
68                         ptr = msm_gem_get_vaddr(&obj->base);
69
70                         /* _get_vaddr() shouldn't fail at this point,
71                          * since we've already mapped it once in
72                          * submit_reloc()
73                          */
74                         if (WARN_ON(!ptr))
75                                 return;
76
77                         for (i = 0; i < dwords; i++) {
78                                 /* normally the OUT_PKTn() would wait
79                                  * for space for the packet.  But since
80                                  * we just OUT_RING() the whole thing,
81                                  * need to call adreno_wait_ring()
82                                  * ourself:
83                                  */
84                                 adreno_wait_ring(ring, 1);
85                                 OUT_RING(ring, ptr[i]);
86                         }
87
88                         msm_gem_put_vaddr(&obj->base);
89
90                         break;
91                 }
92         }
93
94         a5xx_flush(gpu, ring);
95         a5xx_preempt_trigger(gpu);
96
97         /* we might not necessarily have a cmd from userspace to
98          * trigger an event to know that submit has completed, so
99          * do this manually:
100          */
101         a5xx_idle(gpu, ring);
102         ring->memptrs->fence = submit->seqno;
103         msm_gpu_retire(gpu);
104 }
105
106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107         struct msm_file_private *ctx)
108 {
109         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111         struct msm_drm_private *priv = gpu->dev->dev_private;
112         struct msm_ringbuffer *ring = submit->ring;
113         unsigned int i, ibs = 0;
114
115         if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116                 priv->lastctx = NULL;
117                 a5xx_submit_in_rb(gpu, submit, ctx);
118                 return;
119         }
120
121         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122         OUT_RING(ring, 0x02);
123
124         /* Turn off protected mode to write to special registers */
125         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
126         OUT_RING(ring, 0);
127
128         /* Set the save preemption record for the ring/command */
129         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
132
133         /* Turn back on protected mode */
134         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135         OUT_RING(ring, 1);
136
137         /* Enable local preemption for finegrain preemption */
138         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139         OUT_RING(ring, 0x02);
140
141         /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143         OUT_RING(ring, 0x02);
144
145         /* Submit the commands */
146         for (i = 0; i < submit->nr_cmds; i++) {
147                 switch (submit->cmd[i].type) {
148                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
149                         break;
150                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151                         if (priv->lastctx == ctx)
152                                 break;
153                         /* fall-thru */
154                 case MSM_SUBMIT_CMD_BUF:
155                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158                         OUT_RING(ring, submit->cmd[i].size);
159                         ibs++;
160                         break;
161                 }
162         }
163
164         /*
165          * Write the render mode to NULL (0) to indicate to the CP that the IBs
166          * are done rendering - otherwise a lucky preemption would start
167          * replaying from the last checkpoint
168          */
169         OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
170         OUT_RING(ring, 0);
171         OUT_RING(ring, 0);
172         OUT_RING(ring, 0);
173         OUT_RING(ring, 0);
174         OUT_RING(ring, 0);
175
176         /* Turn off IB level preemptions */
177         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178         OUT_RING(ring, 0x01);
179
180         /* Write the fence to the scratch register */
181         OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182         OUT_RING(ring, submit->seqno);
183
184         /*
185          * Execute a CACHE_FLUSH_TS event. This will ensure that the
186          * timestamp is written to the memory and then triggers the interrupt
187          */
188         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189         OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
190         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
191         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
192         OUT_RING(ring, submit->seqno);
193
194         /* Yield the floor on command completion */
195         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
196         /*
197          * If dword[2:1] are non zero, they specify an address for the CP to
198          * write the value of dword[3] to on preemption complete. Write 0 to
199          * skip the write
200          */
201         OUT_RING(ring, 0x00);
202         OUT_RING(ring, 0x00);
203         /* Data value - not used if the address above is 0 */
204         OUT_RING(ring, 0x01);
205         /* Set bit 0 to trigger an interrupt on preempt complete */
206         OUT_RING(ring, 0x01);
207
208         a5xx_flush(gpu, ring);
209
210         /* Check to see if we need to start preemption */
211         a5xx_preempt_trigger(gpu);
212 }
213
214 static const struct {
215         u32 offset;
216         u32 value;
217 } a5xx_hwcg[] = {
218         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
219         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
220         {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
221         {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
222         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
223         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
224         {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
225         {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
226         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
227         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
228         {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
229         {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
230         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
231         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
232         {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
233         {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
234         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
235         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
236         {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
237         {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
238         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
239         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
240         {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
241         {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
242         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
243         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
244         {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
245         {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
246         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
247         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
248         {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
249         {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
250         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
251         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
252         {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
253         {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
254         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
255         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
256         {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
257         {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
258         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
259         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
260         {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
261         {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
262         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
263         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
264         {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
265         {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
266         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
267         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
268         {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
269         {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
270         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
271         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
272         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
273         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
274         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
275         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
276         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
277         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
278         {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
279         {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
280         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
281         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
282         {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
283         {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
284         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
285         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
286         {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
287         {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
288         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
289         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
290         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
291         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
292         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
293         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
294         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
295         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
296         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
297         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
298         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
299         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
300         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
301         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
302         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
303         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
304         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
305         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
306         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
307         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
308         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
309         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
310 };
311
312 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
313 {
314         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
315         unsigned int i;
316
317         for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
318                 gpu_write(gpu, a5xx_hwcg[i].offset,
319                         state ? a5xx_hwcg[i].value : 0);
320
321         if (adreno_is_a540(adreno_gpu)) {
322                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
323                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
324         }
325
326         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
327         gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
328 }
329
330 static int a5xx_me_init(struct msm_gpu *gpu)
331 {
332         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
333         struct msm_ringbuffer *ring = gpu->rb[0];
334
335         OUT_PKT7(ring, CP_ME_INIT, 8);
336
337         OUT_RING(ring, 0x0000002F);
338
339         /* Enable multiple hardware contexts */
340         OUT_RING(ring, 0x00000003);
341
342         /* Enable error detection */
343         OUT_RING(ring, 0x20000000);
344
345         /* Don't enable header dump */
346         OUT_RING(ring, 0x00000000);
347         OUT_RING(ring, 0x00000000);
348
349         /* Specify workarounds for various microcode issues */
350         if (adreno_is_a530(adreno_gpu)) {
351                 /* Workaround for token end syncs
352                  * Force a WFI after every direct-render 3D mode draw and every
353                  * 2D mode 3 draw
354                  */
355                 OUT_RING(ring, 0x0000000B);
356         } else if (adreno_is_a510(adreno_gpu)) {
357                 /* Workaround for token and syncs */
358                 OUT_RING(ring, 0x00000001);
359         } else {
360                 /* No workarounds enabled */
361                 OUT_RING(ring, 0x00000000);
362         }
363
364         OUT_RING(ring, 0x00000000);
365         OUT_RING(ring, 0x00000000);
366
367         gpu->funcs->flush(gpu, ring);
368         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
369 }
370
371 static int a5xx_preempt_start(struct msm_gpu *gpu)
372 {
373         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
374         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
375         struct msm_ringbuffer *ring = gpu->rb[0];
376
377         if (gpu->nr_rings == 1)
378                 return 0;
379
380         /* Turn off protected mode to write to special registers */
381         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
382         OUT_RING(ring, 0);
383
384         /* Set the save preemption record for the ring/command */
385         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
386         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
387         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
388
389         /* Turn back on protected mode */
390         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
391         OUT_RING(ring, 1);
392
393         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
394         OUT_RING(ring, 0x00);
395
396         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
397         OUT_RING(ring, 0x01);
398
399         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
400         OUT_RING(ring, 0x01);
401
402         /* Yield the floor on command completion */
403         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
404         OUT_RING(ring, 0x00);
405         OUT_RING(ring, 0x00);
406         OUT_RING(ring, 0x01);
407         OUT_RING(ring, 0x01);
408
409         gpu->funcs->flush(gpu, ring);
410
411         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
412 }
413
414 static int a5xx_ucode_init(struct msm_gpu *gpu)
415 {
416         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
417         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
418         int ret;
419
420         if (!a5xx_gpu->pm4_bo) {
421                 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
422                         adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
423
424
425                 if (IS_ERR(a5xx_gpu->pm4_bo)) {
426                         ret = PTR_ERR(a5xx_gpu->pm4_bo);
427                         a5xx_gpu->pm4_bo = NULL;
428                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
429                                 ret);
430                         return ret;
431                 }
432
433                 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
434         }
435
436         if (!a5xx_gpu->pfp_bo) {
437                 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
438                         adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
439
440                 if (IS_ERR(a5xx_gpu->pfp_bo)) {
441                         ret = PTR_ERR(a5xx_gpu->pfp_bo);
442                         a5xx_gpu->pfp_bo = NULL;
443                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
444                                 ret);
445                         return ret;
446                 }
447
448                 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
449         }
450
451         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
452                 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
453
454         gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
455                 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
456
457         return 0;
458 }
459
460 #define SCM_GPU_ZAP_SHADER_RESUME 0
461
462 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
463 {
464         int ret;
465
466         ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
467         if (ret)
468                 DRM_ERROR("%s: zap-shader resume failed: %d\n",
469                         gpu->name, ret);
470
471         return ret;
472 }
473
474 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
475 {
476         static bool loaded;
477         int ret;
478
479         /*
480          * If the zap shader is already loaded into memory we just need to kick
481          * the remote processor to reinitialize it
482          */
483         if (loaded)
484                 return a5xx_zap_shader_resume(gpu);
485
486         ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
487
488         loaded = !ret;
489         return ret;
490 }
491
492 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
493           A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
494           A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
495           A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
496           A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
497           A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
498           A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
499           A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
500           A5XX_RBBM_INT_0_MASK_CP_SW | \
501           A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
502           A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
503           A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
504
505 static int a5xx_hw_init(struct msm_gpu *gpu)
506 {
507         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
508         int ret;
509
510         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
511
512         if (adreno_is_a540(adreno_gpu))
513                 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
514
515         /* Make all blocks contribute to the GPU BUSY perf counter */
516         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
517
518         /* Enable RBBM error reporting bits */
519         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
520
521         if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
522                 /*
523                  * Mask out the activity signals from RB1-3 to avoid false
524                  * positives
525                  */
526
527                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
528                         0xF0000000);
529                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
530                         0xFFFFFFFF);
531                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
532                         0xFFFFFFFF);
533                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
534                         0xFFFFFFFF);
535                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
536                         0xFFFFFFFF);
537                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
538                         0xFFFFFFFF);
539                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
540                         0xFFFFFFFF);
541                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
542                         0xFFFFFFFF);
543         }
544
545         /* Enable fault detection */
546         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
547                 (1 << 30) | 0xFFFF);
548
549         /* Turn on performance counters */
550         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
551
552         /* Select CP0 to always count cycles */
553         gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
554
555         /* Select RBBM0 to countable 6 to get the busy status for devfreq */
556         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
557
558         /* Increase VFD cache access so LRZ and other data gets evicted less */
559         gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
560
561         /* Disable L2 bypass in the UCHE */
562         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
563         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
564         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
565         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
566
567         /* Set the GMEM VA range (0 to gpu->gmem) */
568         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
569         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
570         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
571                 0x00100000 + adreno_gpu->gmem - 1);
572         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
573
574         if (adreno_is_a510(adreno_gpu)) {
575                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
576                 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
577                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
578                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
579                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
580                           (0x200 << 11 | 0x200 << 22));
581         } else {
582                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
583                 if (adreno_is_a530(adreno_gpu))
584                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
585                 if (adreno_is_a540(adreno_gpu))
586                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
587                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
588                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
589                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
590                           (0x400 << 11 | 0x300 << 22));
591         }
592
593         if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
594                 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
595
596         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
597
598         /* Enable USE_RETENTION_FLOPS */
599         gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
600
601         /* Enable ME/PFP split notification */
602         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
603
604         /*
605          *  In A5x, CCU can send context_done event of a particular context to
606          *  UCHE which ultimately reaches CP even when there is valid
607          *  transaction of that context inside CCU. This can let CP to program
608          *  config registers, which will make the "valid transaction" inside
609          *  CCU to be interpreted differently. This can cause gpu fault. This
610          *  bug is fixed in latest A510 revision. To enable this bug fix -
611          *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
612          *  (disable). For older A510 version this bit is unused.
613          */
614         if (adreno_is_a510(adreno_gpu))
615                 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
616
617         /* Enable HWCG */
618         a5xx_set_hwcg(gpu, true);
619
620         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
621
622         /* Set the highest bank bit */
623         gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
624         gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
625         if (adreno_is_a540(adreno_gpu))
626                 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
627
628         /* Protect registers from the CP */
629         gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
630
631         /* RBBM */
632         gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
633         gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
634         gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
635         gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
636         gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
637         gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
638
639         /* Content protect */
640         gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
641                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
642                         16));
643         gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
644                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
645
646         /* CP */
647         gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
648         gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
649         gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
650         gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
651
652         /* RB */
653         gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
654         gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
655
656         /* VPC */
657         gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
658         gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
659
660         /* UCHE */
661         gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
662
663         if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
664                 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
665                         ADRENO_PROTECT_RW(0x10000, 0x8000));
666
667         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
668         /*
669          * Disable the trusted memory range - we don't actually supported secure
670          * memory rendering at this point in time and we don't want to block off
671          * part of the virtual memory space.
672          */
673         gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
674                 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
675         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
676
677         /* Put the GPU into 64 bit by default */
678         gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
679         gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
680         gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
681         gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
682         gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
683         gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
684         gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
685         gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
686         gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
687         gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
688         gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
689         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
690
691         /*
692          * VPC corner case with local memory load kill leads to corrupt
693          * internal state. Normal Disable does not work for all a5x chips.
694          * So do the following setting to disable it.
695          */
696         if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
697                 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
698                 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
699         }
700
701         ret = adreno_hw_init(gpu);
702         if (ret)
703                 return ret;
704
705         a5xx_preempt_hw_init(gpu);
706
707         if (!adreno_is_a510(adreno_gpu))
708                 a5xx_gpmu_ucode_init(gpu);
709
710         ret = a5xx_ucode_init(gpu);
711         if (ret)
712                 return ret;
713
714         /* Disable the interrupts through the initial bringup stage */
715         gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
716
717         /* Clear ME_HALT to start the micro engine */
718         gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
719         ret = a5xx_me_init(gpu);
720         if (ret)
721                 return ret;
722
723         ret = a5xx_power_init(gpu);
724         if (ret)
725                 return ret;
726
727         /*
728          * Send a pipeline event stat to get misbehaving counters to start
729          * ticking correctly
730          */
731         if (adreno_is_a530(adreno_gpu)) {
732                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
733                 OUT_RING(gpu->rb[0], 0x0F);
734
735                 gpu->funcs->flush(gpu, gpu->rb[0]);
736                 if (!a5xx_idle(gpu, gpu->rb[0]))
737                         return -EINVAL;
738         }
739
740         /*
741          * If the chip that we are using does support loading one, then
742          * try to load a zap shader into the secure world. If successful
743          * we can use the CP to switch out of secure mode. If not then we
744          * have no resource but to try to switch ourselves out manually. If we
745          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
746          * be blocked and a permissions violation will soon follow.
747          */
748         ret = a5xx_zap_shader_init(gpu);
749         if (!ret) {
750                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
751                 OUT_RING(gpu->rb[0], 0x00000000);
752
753                 gpu->funcs->flush(gpu, gpu->rb[0]);
754                 if (!a5xx_idle(gpu, gpu->rb[0]))
755                         return -EINVAL;
756         } else {
757                 /* Print a warning so if we die, we know why */
758                 dev_warn_once(gpu->dev->dev,
759                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
760                 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
761         }
762
763         /* Last step - yield the ringbuffer */
764         a5xx_preempt_start(gpu);
765
766         return 0;
767 }
768
769 static void a5xx_recover(struct msm_gpu *gpu)
770 {
771         int i;
772
773         adreno_dump_info(gpu);
774
775         for (i = 0; i < 8; i++) {
776                 printk("CP_SCRATCH_REG%d: %u\n", i,
777                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
778         }
779
780         if (hang_debug)
781                 a5xx_dump(gpu);
782
783         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
784         gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
785         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
786         adreno_recover(gpu);
787 }
788
789 static void a5xx_destroy(struct msm_gpu *gpu)
790 {
791         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
792         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
793
794         DBG("%s", gpu->name);
795
796         a5xx_preempt_fini(gpu);
797
798         if (a5xx_gpu->pm4_bo) {
799                 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
800                 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
801         }
802
803         if (a5xx_gpu->pfp_bo) {
804                 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
805                 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
806         }
807
808         if (a5xx_gpu->gpmu_bo) {
809                 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
810                 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
811         }
812
813         adreno_gpu_cleanup(adreno_gpu);
814         kfree(a5xx_gpu);
815 }
816
817 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
818 {
819         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
820                 return false;
821
822         /*
823          * Nearly every abnormality ends up pausing the GPU and triggering a
824          * fault so we can safely just watch for this one interrupt to fire
825          */
826         return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
827                 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
828 }
829
830 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
831 {
832         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
833         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
834
835         if (ring != a5xx_gpu->cur_ring) {
836                 WARN(1, "Tried to idle a non-current ringbuffer\n");
837                 return false;
838         }
839
840         /* wait for CP to drain ringbuffer: */
841         if (!adreno_idle(gpu, ring))
842                 return false;
843
844         if (spin_until(_a5xx_check_idle(gpu))) {
845                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
846                         gpu->name, __builtin_return_address(0),
847                         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
848                         gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
849                         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
850                         gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
851                 return false;
852         }
853
854         return true;
855 }
856
857 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
858 {
859         struct msm_gpu *gpu = arg;
860         pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
861                         iova, flags,
862                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
863                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
864                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
865                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
866
867         return -EFAULT;
868 }
869
870 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
871 {
872         u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
873
874         if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
875                 u32 val;
876
877                 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
878
879                 /*
880                  * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
881                  * read it twice
882                  */
883
884                 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
885                 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
886
887                 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
888                         val);
889         }
890
891         if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
892                 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
893                         gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
894
895         if (status & A5XX_CP_INT_CP_DMA_ERROR)
896                 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
897
898         if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
899                 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
900
901                 dev_err_ratelimited(gpu->dev->dev,
902                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
903                         val & (1 << 24) ? "WRITE" : "READ",
904                         (val & 0xFFFFF) >> 2, val);
905         }
906
907         if (status & A5XX_CP_INT_CP_AHB_ERROR) {
908                 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
909                 const char *access[16] = { "reserved", "reserved",
910                         "timestamp lo", "timestamp hi", "pfp read", "pfp write",
911                         "", "", "me read", "me write", "", "", "crashdump read",
912                         "crashdump write" };
913
914                 dev_err_ratelimited(gpu->dev->dev,
915                         "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
916                         status & 0xFFFFF, access[(status >> 24) & 0xF],
917                         (status & (1 << 31)), status);
918         }
919 }
920
921 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
922 {
923         if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
924                 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
925
926                 dev_err_ratelimited(gpu->dev->dev,
927                         "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
928                         val & (1 << 28) ? "WRITE" : "READ",
929                         (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
930                         (val >> 24) & 0xF);
931
932                 /* Clear the error */
933                 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
934
935                 /* Clear the interrupt */
936                 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
937                         A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
938         }
939
940         if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
941                 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
942
943         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
944                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
945                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
946
947         if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
948                 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
949                         gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
950
951         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
952                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
953                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
954
955         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
956                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
957
958         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
959                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
960 }
961
962 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
963 {
964         uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
965
966         addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
967
968         dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
969                 addr);
970 }
971
972 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
973 {
974         dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
975 }
976
977 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
978 {
979         struct drm_device *dev = gpu->dev;
980         struct msm_drm_private *priv = dev->dev_private;
981         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
982
983         DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
984                 ring ? ring->id : -1, ring ? ring->seqno : 0,
985                 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
986                 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
987                 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
988                 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
989                 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
990                 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
991                 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
992
993         /* Turn off the hangcheck timer to keep it from bothering us */
994         del_timer(&gpu->hangcheck_timer);
995
996         queue_work(priv->wq, &gpu->recover_work);
997 }
998
999 #define RBBM_ERROR_MASK \
1000         (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1001         A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1002         A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1003         A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1004         A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1005         A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1006
1007 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1008 {
1009         u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1010
1011         /*
1012          * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1013          * before the source is cleared the interrupt will storm.
1014          */
1015         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1016                 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1017
1018         /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1019         if (status & RBBM_ERROR_MASK)
1020                 a5xx_rbbm_err_irq(gpu, status);
1021
1022         if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1023                 a5xx_cp_err_irq(gpu);
1024
1025         if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1026                 a5xx_fault_detect_irq(gpu);
1027
1028         if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1029                 a5xx_uche_err_irq(gpu);
1030
1031         if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1032                 a5xx_gpmu_err_irq(gpu);
1033
1034         if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1035                 a5xx_preempt_trigger(gpu);
1036                 msm_gpu_retire(gpu);
1037         }
1038
1039         if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1040                 a5xx_preempt_irq(gpu);
1041
1042         return IRQ_HANDLED;
1043 }
1044
1045 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1046         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1047         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1048         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1049         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1050                 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1051         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1052         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1053         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1054 };
1055
1056 static const u32 a5xx_registers[] = {
1057         0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1058         0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1059         0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1060         0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1061         0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1062         0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1063         0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1064         0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1065         0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1066         0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1067         0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1068         0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1069         0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1070         0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1071         0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1072         0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1073         0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1074         0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1075         0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1076         0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1077         0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1078         0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1079         0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1080         0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1081         0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1082         0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1083         0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1084         0xAC60, 0xAC60, ~0,
1085 };
1086
1087 static void a5xx_dump(struct msm_gpu *gpu)
1088 {
1089         DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1090                 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1091         adreno_dump(gpu);
1092 }
1093
1094 static int a5xx_pm_resume(struct msm_gpu *gpu)
1095 {
1096         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1097         int ret;
1098
1099         /* Turn on the core power */
1100         ret = msm_gpu_pm_resume(gpu);
1101         if (ret)
1102                 return ret;
1103
1104         if (adreno_is_a510(adreno_gpu)) {
1105                 /* Halt the sp_input_clk at HM level */
1106                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1107                 a5xx_set_hwcg(gpu, true);
1108                 /* Turn on sp_input_clk at HM level */
1109                 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1110                 return 0;
1111         }
1112
1113         /* Turn the RBCCU domain first to limit the chances of voltage droop */
1114         gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1115
1116         /* Wait 3 usecs before polling */
1117         udelay(3);
1118
1119         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1120                 (1 << 20), (1 << 20));
1121         if (ret) {
1122                 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1123                         gpu->name,
1124                         gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1125                 return ret;
1126         }
1127
1128         /* Turn on the SP domain */
1129         gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1130         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1131                 (1 << 20), (1 << 20));
1132         if (ret)
1133                 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1134                         gpu->name);
1135
1136         return ret;
1137 }
1138
1139 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1140 {
1141         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1142         u32 mask = 0xf;
1143
1144         /* A510 has 3 XIN ports in VBIF */
1145         if (adreno_is_a510(adreno_gpu))
1146                 mask = 0x7;
1147
1148         /* Clear the VBIF pipe before shutting down */
1149         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1150         spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1151                                 mask) == mask);
1152
1153         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1154
1155         /*
1156          * Reset the VBIF before power collapse to avoid issue with FIFO
1157          * entries
1158          */
1159         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1160         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1161
1162         return msm_gpu_pm_suspend(gpu);
1163 }
1164
1165 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1166 {
1167         *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1168                 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1169
1170         return 0;
1171 }
1172
1173 struct a5xx_crashdumper {
1174         void *ptr;
1175         struct drm_gem_object *bo;
1176         u64 iova;
1177 };
1178
1179 struct a5xx_gpu_state {
1180         struct msm_gpu_state base;
1181         u32 *hlsqregs;
1182 };
1183
1184 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1185                 struct a5xx_crashdumper *dumper)
1186 {
1187         dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1188                 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1189                 &dumper->bo, &dumper->iova);
1190
1191         if (!IS_ERR(dumper->ptr))
1192                 msm_gem_object_set_name(dumper->bo, "crashdump");
1193
1194         return PTR_ERR_OR_ZERO(dumper->ptr);
1195 }
1196
1197 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1198                 struct a5xx_crashdumper *dumper)
1199 {
1200         u32 val;
1201
1202         if (IS_ERR_OR_NULL(dumper->ptr))
1203                 return -EINVAL;
1204
1205         gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1206                 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1207
1208         gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1209
1210         return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1211                 val & 0x04, 100, 10000);
1212 }
1213
1214 /*
1215  * These are a list of the registers that need to be read through the HLSQ
1216  * aperture through the crashdumper.  These are not nominally accessible from
1217  * the CPU on a secure platform.
1218  */
1219 static const struct {
1220         u32 type;
1221         u32 regoffset;
1222         u32 count;
1223 } a5xx_hlsq_aperture_regs[] = {
1224         { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1225         { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1226         { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1227         { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1228         { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1229         { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1230         { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1231         { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1232         { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1233         { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1234         { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1235         { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1236         { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1237         { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1238         { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1239 };
1240
1241 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1242                 struct a5xx_gpu_state *a5xx_state)
1243 {
1244         struct a5xx_crashdumper dumper = { 0 };
1245         u32 offset, count = 0;
1246         u64 *ptr;
1247         int i;
1248
1249         if (a5xx_crashdumper_init(gpu, &dumper))
1250                 return;
1251
1252         /* The script will be written at offset 0 */
1253         ptr = dumper.ptr;
1254
1255         /* Start writing the data at offset 256k */
1256         offset = dumper.iova + (256 * SZ_1K);
1257
1258         /* Count how many additional registers to get from the HLSQ aperture */
1259         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1260                 count += a5xx_hlsq_aperture_regs[i].count;
1261
1262         a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1263         if (!a5xx_state->hlsqregs)
1264                 return;
1265
1266         /* Build the crashdump script */
1267         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1268                 u32 type = a5xx_hlsq_aperture_regs[i].type;
1269                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1270
1271                 /* Write the register to select the desired bank */
1272                 *ptr++ = ((u64) type << 8);
1273                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1274                         (1 << 21) | 1;
1275
1276                 *ptr++ = offset;
1277                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1278                         | c;
1279
1280                 offset += c * sizeof(u32);
1281         }
1282
1283         /* Write two zeros to close off the script */
1284         *ptr++ = 0;
1285         *ptr++ = 0;
1286
1287         if (a5xx_crashdumper_run(gpu, &dumper)) {
1288                 kfree(a5xx_state->hlsqregs);
1289                 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1290                 return;
1291         }
1292
1293         /* Copy the data from the crashdumper to the state */
1294         memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1295                 count * sizeof(u32));
1296
1297         msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1298 }
1299
1300 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1301 {
1302         struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1303                         GFP_KERNEL);
1304
1305         if (!a5xx_state)
1306                 return ERR_PTR(-ENOMEM);
1307
1308         /* Temporarily disable hardware clock gating before reading the hw */
1309         a5xx_set_hwcg(gpu, false);
1310
1311         /* First get the generic state from the adreno core */
1312         adreno_gpu_state_get(gpu, &(a5xx_state->base));
1313
1314         a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1315
1316         /* Get the HLSQ regs with the help of the crashdumper */
1317         a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1318
1319         a5xx_set_hwcg(gpu, true);
1320
1321         return &a5xx_state->base;
1322 }
1323
1324 static void a5xx_gpu_state_destroy(struct kref *kref)
1325 {
1326         struct msm_gpu_state *state = container_of(kref,
1327                 struct msm_gpu_state, ref);
1328         struct a5xx_gpu_state *a5xx_state = container_of(state,
1329                 struct a5xx_gpu_state, base);
1330
1331         kfree(a5xx_state->hlsqregs);
1332
1333         adreno_gpu_state_destroy(state);
1334         kfree(a5xx_state);
1335 }
1336
1337 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1338 {
1339         if (IS_ERR_OR_NULL(state))
1340                 return 1;
1341
1342         return kref_put(&state->ref, a5xx_gpu_state_destroy);
1343 }
1344
1345
1346 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1347 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1348                       struct drm_printer *p)
1349 {
1350         int i, j;
1351         u32 pos = 0;
1352         struct a5xx_gpu_state *a5xx_state = container_of(state,
1353                 struct a5xx_gpu_state, base);
1354
1355         if (IS_ERR_OR_NULL(state))
1356                 return;
1357
1358         adreno_show(gpu, state, p);
1359
1360         /* Dump the additional a5xx HLSQ registers */
1361         if (!a5xx_state->hlsqregs)
1362                 return;
1363
1364         drm_printf(p, "registers-hlsq:\n");
1365
1366         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1367                 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1368                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1369
1370                 for (j = 0; j < c; j++, pos++, o++) {
1371                         /*
1372                          * To keep the crashdump simple we pull the entire range
1373                          * for each register type but not all of the registers
1374                          * in the range are valid. Fortunately invalid registers
1375                          * stick out like a sore thumb with a value of
1376                          * 0xdeadbeef
1377                          */
1378                         if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1379                                 continue;
1380
1381                         drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1382                                 o << 2, a5xx_state->hlsqregs[pos]);
1383                 }
1384         }
1385 }
1386 #endif
1387
1388 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1389 {
1390         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1391         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1392
1393         return a5xx_gpu->cur_ring;
1394 }
1395
1396 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1397 {
1398         u64 busy_cycles, busy_time;
1399
1400         busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1401                         REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1402
1403         busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1404         do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1405
1406         gpu->devfreq.busy_cycles = busy_cycles;
1407
1408         if (WARN_ON(busy_time > ~0LU))
1409                 return ~0LU;
1410
1411         return (unsigned long)busy_time;
1412 }
1413
1414 static const struct adreno_gpu_funcs funcs = {
1415         .base = {
1416                 .get_param = adreno_get_param,
1417                 .hw_init = a5xx_hw_init,
1418                 .pm_suspend = a5xx_pm_suspend,
1419                 .pm_resume = a5xx_pm_resume,
1420                 .recover = a5xx_recover,
1421                 .submit = a5xx_submit,
1422                 .flush = a5xx_flush,
1423                 .active_ring = a5xx_active_ring,
1424                 .irq = a5xx_irq,
1425                 .destroy = a5xx_destroy,
1426 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1427                 .show = a5xx_show,
1428 #endif
1429 #if defined(CONFIG_DEBUG_FS)
1430                 .debugfs_init = a5xx_debugfs_init,
1431 #endif
1432                 .gpu_busy = a5xx_gpu_busy,
1433                 .gpu_state_get = a5xx_gpu_state_get,
1434                 .gpu_state_put = a5xx_gpu_state_put,
1435         },
1436         .get_timestamp = a5xx_get_timestamp,
1437 };
1438
1439 static void check_speed_bin(struct device *dev)
1440 {
1441         struct nvmem_cell *cell;
1442         u32 bin, val;
1443
1444         cell = nvmem_cell_get(dev, "speed_bin");
1445
1446         /* If a nvmem cell isn't defined, nothing to do */
1447         if (IS_ERR(cell))
1448                 return;
1449
1450         bin = *((u32 *) nvmem_cell_read(cell, NULL));
1451         nvmem_cell_put(cell);
1452
1453         val = (1 << bin);
1454
1455         dev_pm_opp_set_supported_hw(dev, &val, 1);
1456 }
1457
1458 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1459 {
1460         struct msm_drm_private *priv = dev->dev_private;
1461         struct platform_device *pdev = priv->gpu_pdev;
1462         struct a5xx_gpu *a5xx_gpu = NULL;
1463         struct adreno_gpu *adreno_gpu;
1464         struct msm_gpu *gpu;
1465         int ret;
1466
1467         if (!pdev) {
1468                 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1469                 return ERR_PTR(-ENXIO);
1470         }
1471
1472         a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1473         if (!a5xx_gpu)
1474                 return ERR_PTR(-ENOMEM);
1475
1476         adreno_gpu = &a5xx_gpu->base;
1477         gpu = &adreno_gpu->base;
1478
1479         adreno_gpu->registers = a5xx_registers;
1480         adreno_gpu->reg_offsets = a5xx_register_offsets;
1481
1482         a5xx_gpu->lm_leakage = 0x4E001A;
1483
1484         check_speed_bin(&pdev->dev);
1485
1486         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1487         if (ret) {
1488                 a5xx_destroy(&(a5xx_gpu->base.base));
1489                 return ERR_PTR(ret);
1490         }
1491
1492         if (gpu->aspace)
1493                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1494
1495         /* Set up the preemption specific bits and pieces for each ringbuffer */
1496         a5xx_preempt_init(gpu);
1497
1498         return gpu;
1499 }